Show More
@@ -0,0 +1,149 b'' | |||||
|
1 | /* | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under both the BSD-style license (found in the | |||
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
|
7 | * in the COPYING file in the root directory of this source tree). | |||
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
|
9 | */ | |||
|
10 | ||||
|
11 | /*-************************************* | |||
|
12 | * Dependencies | |||
|
13 | ***************************************/ | |||
|
14 | #include "zstd_compress_literals.h" | |||
|
15 | ||||
|
16 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
|
17 | { | |||
|
18 | BYTE* const ostart = (BYTE* const)dst; | |||
|
19 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |||
|
20 | ||||
|
21 | RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); | |||
|
22 | ||||
|
23 | switch(flSize) | |||
|
24 | { | |||
|
25 | case 1: /* 2 - 1 - 5 */ | |||
|
26 | ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); | |||
|
27 | break; | |||
|
28 | case 2: /* 2 - 2 - 12 */ | |||
|
29 | MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); | |||
|
30 | break; | |||
|
31 | case 3: /* 2 - 2 - 20 */ | |||
|
32 | MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); | |||
|
33 | break; | |||
|
34 | default: /* not necessary : flSize is {1,2,3} */ | |||
|
35 | assert(0); | |||
|
36 | } | |||
|
37 | ||||
|
38 | memcpy(ostart + flSize, src, srcSize); | |||
|
39 | return srcSize + flSize; | |||
|
40 | } | |||
|
41 | ||||
|
42 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
|
43 | { | |||
|
44 | BYTE* const ostart = (BYTE* const)dst; | |||
|
45 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |||
|
46 | ||||
|
47 | (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ | |||
|
48 | ||||
|
49 | switch(flSize) | |||
|
50 | { | |||
|
51 | case 1: /* 2 - 1 - 5 */ | |||
|
52 | ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); | |||
|
53 | break; | |||
|
54 | case 2: /* 2 - 2 - 12 */ | |||
|
55 | MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); | |||
|
56 | break; | |||
|
57 | case 3: /* 2 - 2 - 20 */ | |||
|
58 | MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); | |||
|
59 | break; | |||
|
60 | default: /* not necessary : flSize is {1,2,3} */ | |||
|
61 | assert(0); | |||
|
62 | } | |||
|
63 | ||||
|
64 | ostart[flSize] = *(const BYTE*)src; | |||
|
65 | return flSize+1; | |||
|
66 | } | |||
|
67 | ||||
|
68 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |||
|
69 | ZSTD_hufCTables_t* nextHuf, | |||
|
70 | ZSTD_strategy strategy, int disableLiteralCompression, | |||
|
71 | void* dst, size_t dstCapacity, | |||
|
72 | const void* src, size_t srcSize, | |||
|
73 | void* workspace, size_t wkspSize, | |||
|
74 | const int bmi2) | |||
|
75 | { | |||
|
76 | size_t const minGain = ZSTD_minGain(srcSize, strategy); | |||
|
77 | size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); | |||
|
78 | BYTE* const ostart = (BYTE*)dst; | |||
|
79 | U32 singleStream = srcSize < 256; | |||
|
80 | symbolEncodingType_e hType = set_compressed; | |||
|
81 | size_t cLitSize; | |||
|
82 | ||||
|
83 | DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", | |||
|
84 | disableLiteralCompression); | |||
|
85 | ||||
|
86 | /* Prepare nextEntropy assuming reusing the existing table */ | |||
|
87 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
|
88 | ||||
|
89 | if (disableLiteralCompression) | |||
|
90 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
|
91 | ||||
|
92 | /* small ? don't even attempt compression (speed opt) */ | |||
|
93 | # define COMPRESS_LITERALS_SIZE_MIN 63 | |||
|
94 | { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; | |||
|
95 | if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
|
96 | } | |||
|
97 | ||||
|
98 | RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); | |||
|
99 | { HUF_repeat repeat = prevHuf->repeatMode; | |||
|
100 | int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; | |||
|
101 | if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; | |||
|
102 | cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, | |||
|
103 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) | |||
|
104 | : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, | |||
|
105 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); | |||
|
106 | if (repeat != HUF_repeat_none) { | |||
|
107 | /* reused the existing table */ | |||
|
108 | hType = set_repeat; | |||
|
109 | } | |||
|
110 | } | |||
|
111 | ||||
|
112 | if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { | |||
|
113 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
|
114 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
|
115 | } | |||
|
116 | if (cLitSize==1) { | |||
|
117 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
|
118 | return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); | |||
|
119 | } | |||
|
120 | ||||
|
121 | if (hType == set_compressed) { | |||
|
122 | /* using a newly constructed table */ | |||
|
123 | nextHuf->repeatMode = HUF_repeat_check; | |||
|
124 | } | |||
|
125 | ||||
|
126 | /* Build header */ | |||
|
127 | switch(lhSize) | |||
|
128 | { | |||
|
129 | case 3: /* 2 - 2 - 10 - 10 */ | |||
|
130 | { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); | |||
|
131 | MEM_writeLE24(ostart, lhc); | |||
|
132 | break; | |||
|
133 | } | |||
|
134 | case 4: /* 2 - 2 - 14 - 14 */ | |||
|
135 | { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); | |||
|
136 | MEM_writeLE32(ostart, lhc); | |||
|
137 | break; | |||
|
138 | } | |||
|
139 | case 5: /* 2 - 2 - 18 - 18 */ | |||
|
140 | { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); | |||
|
141 | MEM_writeLE32(ostart, lhc); | |||
|
142 | ostart[4] = (BYTE)(cLitSize >> 10); | |||
|
143 | break; | |||
|
144 | } | |||
|
145 | default: /* not possible : lhSize is {3,4,5} */ | |||
|
146 | assert(0); | |||
|
147 | } | |||
|
148 | return lhSize+cLitSize; | |||
|
149 | } |
@@ -0,0 +1,29 b'' | |||||
|
1 | /* | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under both the BSD-style license (found in the | |||
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
|
7 | * in the COPYING file in the root directory of this source tree). | |||
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
|
9 | */ | |||
|
10 | ||||
|
11 | #ifndef ZSTD_COMPRESS_LITERALS_H | |||
|
12 | #define ZSTD_COMPRESS_LITERALS_H | |||
|
13 | ||||
|
14 | #include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ | |||
|
15 | ||||
|
16 | ||||
|
17 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |||
|
18 | ||||
|
19 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |||
|
20 | ||||
|
21 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |||
|
22 | ZSTD_hufCTables_t* nextHuf, | |||
|
23 | ZSTD_strategy strategy, int disableLiteralCompression, | |||
|
24 | void* dst, size_t dstCapacity, | |||
|
25 | const void* src, size_t srcSize, | |||
|
26 | void* workspace, size_t wkspSize, | |||
|
27 | const int bmi2); | |||
|
28 | ||||
|
29 | #endif /* ZSTD_COMPRESS_LITERALS_H */ |
@@ -0,0 +1,415 b'' | |||||
|
1 | /* | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under both the BSD-style license (found in the | |||
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
|
7 | * in the COPYING file in the root directory of this source tree). | |||
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
|
9 | */ | |||
|
10 | ||||
|
11 | /*-************************************* | |||
|
12 | * Dependencies | |||
|
13 | ***************************************/ | |||
|
14 | #include "zstd_compress_sequences.h" | |||
|
15 | ||||
|
16 | /** | |||
|
17 | * -log2(x / 256) lookup table for x in [0, 256). | |||
|
18 | * If x == 0: Return 0 | |||
|
19 | * Else: Return floor(-log2(x / 256) * 256) | |||
|
20 | */ | |||
|
21 | static unsigned const kInverseProbabilityLog256[256] = { | |||
|
22 | 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, | |||
|
23 | 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, | |||
|
24 | 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, | |||
|
25 | 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, | |||
|
26 | 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, | |||
|
27 | 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, | |||
|
28 | 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, | |||
|
29 | 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, | |||
|
30 | 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, | |||
|
31 | 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, | |||
|
32 | 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, | |||
|
33 | 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, | |||
|
34 | 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, | |||
|
35 | 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, | |||
|
36 | 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, | |||
|
37 | 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, | |||
|
38 | 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, | |||
|
39 | 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, | |||
|
40 | 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, | |||
|
41 | 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, | |||
|
42 | 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, | |||
|
43 | 5, 4, 2, 1, | |||
|
44 | }; | |||
|
45 | ||||
|
46 | static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { | |||
|
47 | void const* ptr = ctable; | |||
|
48 | U16 const* u16ptr = (U16 const*)ptr; | |||
|
49 | U32 const maxSymbolValue = MEM_read16(u16ptr + 1); | |||
|
50 | return maxSymbolValue; | |||
|
51 | } | |||
|
52 | ||||
|
53 | /** | |||
|
54 | * Returns the cost in bytes of encoding the normalized count header. | |||
|
55 | * Returns an error if any of the helper functions return an error. | |||
|
56 | */ | |||
|
57 | static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, | |||
|
58 | size_t const nbSeq, unsigned const FSELog) | |||
|
59 | { | |||
|
60 | BYTE wksp[FSE_NCOUNTBOUND]; | |||
|
61 | S16 norm[MaxSeq + 1]; | |||
|
62 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |||
|
63 | FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); | |||
|
64 | return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); | |||
|
65 | } | |||
|
66 | ||||
|
67 | /** | |||
|
68 | * Returns the cost in bits of encoding the distribution described by count | |||
|
69 | * using the entropy bound. | |||
|
70 | */ | |||
|
71 | static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) | |||
|
72 | { | |||
|
73 | unsigned cost = 0; | |||
|
74 | unsigned s; | |||
|
75 | for (s = 0; s <= max; ++s) { | |||
|
76 | unsigned norm = (unsigned)((256 * count[s]) / total); | |||
|
77 | if (count[s] != 0 && norm == 0) | |||
|
78 | norm = 1; | |||
|
79 | assert(count[s] < total); | |||
|
80 | cost += count[s] * kInverseProbabilityLog256[norm]; | |||
|
81 | } | |||
|
82 | return cost >> 8; | |||
|
83 | } | |||
|
84 | ||||
|
85 | /** | |||
|
86 | * Returns the cost in bits of encoding the distribution in count using ctable. | |||
|
87 | * Returns an error if ctable cannot represent all the symbols in count. | |||
|
88 | */ | |||
|
89 | static size_t ZSTD_fseBitCost( | |||
|
90 | FSE_CTable const* ctable, | |||
|
91 | unsigned const* count, | |||
|
92 | unsigned const max) | |||
|
93 | { | |||
|
94 | unsigned const kAccuracyLog = 8; | |||
|
95 | size_t cost = 0; | |||
|
96 | unsigned s; | |||
|
97 | FSE_CState_t cstate; | |||
|
98 | FSE_initCState(&cstate, ctable); | |||
|
99 | RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, | |||
|
100 | "Repeat FSE_CTable has maxSymbolValue %u < %u", | |||
|
101 | ZSTD_getFSEMaxSymbolValue(ctable), max); | |||
|
102 | for (s = 0; s <= max; ++s) { | |||
|
103 | unsigned const tableLog = cstate.stateLog; | |||
|
104 | unsigned const badCost = (tableLog + 1) << kAccuracyLog; | |||
|
105 | unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); | |||
|
106 | if (count[s] == 0) | |||
|
107 | continue; | |||
|
108 | RETURN_ERROR_IF(bitCost >= badCost, GENERIC, | |||
|
109 | "Repeat FSE_CTable has Prob[%u] == 0", s); | |||
|
110 | cost += count[s] * bitCost; | |||
|
111 | } | |||
|
112 | return cost >> kAccuracyLog; | |||
|
113 | } | |||
|
114 | ||||
|
115 | /** | |||
|
116 | * Returns the cost in bits of encoding the distribution in count using the | |||
|
117 | * table described by norm. The max symbol support by norm is assumed >= max. | |||
|
118 | * norm must be valid for every symbol with non-zero probability in count. | |||
|
119 | */ | |||
|
120 | static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, | |||
|
121 | unsigned const* count, unsigned const max) | |||
|
122 | { | |||
|
123 | unsigned const shift = 8 - accuracyLog; | |||
|
124 | size_t cost = 0; | |||
|
125 | unsigned s; | |||
|
126 | assert(accuracyLog <= 8); | |||
|
127 | for (s = 0; s <= max; ++s) { | |||
|
128 | unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; | |||
|
129 | unsigned const norm256 = normAcc << shift; | |||
|
130 | assert(norm256 > 0); | |||
|
131 | assert(norm256 < 256); | |||
|
132 | cost += count[s] * kInverseProbabilityLog256[norm256]; | |||
|
133 | } | |||
|
134 | return cost >> 8; | |||
|
135 | } | |||
|
136 | ||||
|
137 | symbolEncodingType_e | |||
|
138 | ZSTD_selectEncodingType( | |||
|
139 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, | |||
|
140 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, | |||
|
141 | FSE_CTable const* prevCTable, | |||
|
142 | short const* defaultNorm, U32 defaultNormLog, | |||
|
143 | ZSTD_defaultPolicy_e const isDefaultAllowed, | |||
|
144 | ZSTD_strategy const strategy) | |||
|
145 | { | |||
|
146 | ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); | |||
|
147 | if (mostFrequent == nbSeq) { | |||
|
148 | *repeatMode = FSE_repeat_none; | |||
|
149 | if (isDefaultAllowed && nbSeq <= 2) { | |||
|
150 | /* Prefer set_basic over set_rle when there are 2 or less symbols, | |||
|
151 | * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. | |||
|
152 | * If basic encoding isn't possible, always choose RLE. | |||
|
153 | */ | |||
|
154 | DEBUGLOG(5, "Selected set_basic"); | |||
|
155 | return set_basic; | |||
|
156 | } | |||
|
157 | DEBUGLOG(5, "Selected set_rle"); | |||
|
158 | return set_rle; | |||
|
159 | } | |||
|
160 | if (strategy < ZSTD_lazy) { | |||
|
161 | if (isDefaultAllowed) { | |||
|
162 | size_t const staticFse_nbSeq_max = 1000; | |||
|
163 | size_t const mult = 10 - strategy; | |||
|
164 | size_t const baseLog = 3; | |||
|
165 | size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ | |||
|
166 | assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ | |||
|
167 | assert(mult <= 9 && mult >= 7); | |||
|
168 | if ( (*repeatMode == FSE_repeat_valid) | |||
|
169 | && (nbSeq < staticFse_nbSeq_max) ) { | |||
|
170 | DEBUGLOG(5, "Selected set_repeat"); | |||
|
171 | return set_repeat; | |||
|
172 | } | |||
|
173 | if ( (nbSeq < dynamicFse_nbSeq_min) | |||
|
174 | || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { | |||
|
175 | DEBUGLOG(5, "Selected set_basic"); | |||
|
176 | /* The format allows default tables to be repeated, but it isn't useful. | |||
|
177 | * When using simple heuristics to select encoding type, we don't want | |||
|
178 | * to confuse these tables with dictionaries. When running more careful | |||
|
179 | * analysis, we don't need to waste time checking both repeating tables | |||
|
180 | * and default tables. | |||
|
181 | */ | |||
|
182 | *repeatMode = FSE_repeat_none; | |||
|
183 | return set_basic; | |||
|
184 | } | |||
|
185 | } | |||
|
186 | } else { | |||
|
187 | size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); | |||
|
188 | size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); | |||
|
189 | size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); | |||
|
190 | size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); | |||
|
191 | ||||
|
192 | if (isDefaultAllowed) { | |||
|
193 | assert(!ZSTD_isError(basicCost)); | |||
|
194 | assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); | |||
|
195 | } | |||
|
196 | assert(!ZSTD_isError(NCountCost)); | |||
|
197 | assert(compressedCost < ERROR(maxCode)); | |||
|
198 | DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", | |||
|
199 | (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); | |||
|
200 | if (basicCost <= repeatCost && basicCost <= compressedCost) { | |||
|
201 | DEBUGLOG(5, "Selected set_basic"); | |||
|
202 | assert(isDefaultAllowed); | |||
|
203 | *repeatMode = FSE_repeat_none; | |||
|
204 | return set_basic; | |||
|
205 | } | |||
|
206 | if (repeatCost <= compressedCost) { | |||
|
207 | DEBUGLOG(5, "Selected set_repeat"); | |||
|
208 | assert(!ZSTD_isError(repeatCost)); | |||
|
209 | return set_repeat; | |||
|
210 | } | |||
|
211 | assert(compressedCost < basicCost && compressedCost < repeatCost); | |||
|
212 | } | |||
|
213 | DEBUGLOG(5, "Selected set_compressed"); | |||
|
214 | *repeatMode = FSE_repeat_check; | |||
|
215 | return set_compressed; | |||
|
216 | } | |||
|
217 | ||||
|
218 | size_t | |||
|
219 | ZSTD_buildCTable(void* dst, size_t dstCapacity, | |||
|
220 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |||
|
221 | unsigned* count, U32 max, | |||
|
222 | const BYTE* codeTable, size_t nbSeq, | |||
|
223 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |||
|
224 | const FSE_CTable* prevCTable, size_t prevCTableSize, | |||
|
225 | void* workspace, size_t workspaceSize) | |||
|
226 | { | |||
|
227 | BYTE* op = (BYTE*)dst; | |||
|
228 | const BYTE* const oend = op + dstCapacity; | |||
|
229 | DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); | |||
|
230 | ||||
|
231 | switch (type) { | |||
|
232 | case set_rle: | |||
|
233 | FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); | |||
|
234 | RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); | |||
|
235 | *op = codeTable[0]; | |||
|
236 | return 1; | |||
|
237 | case set_repeat: | |||
|
238 | memcpy(nextCTable, prevCTable, prevCTableSize); | |||
|
239 | return 0; | |||
|
240 | case set_basic: | |||
|
241 | FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ | |||
|
242 | return 0; | |||
|
243 | case set_compressed: { | |||
|
244 | S16 norm[MaxSeq + 1]; | |||
|
245 | size_t nbSeq_1 = nbSeq; | |||
|
246 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |||
|
247 | if (count[codeTable[nbSeq-1]] > 1) { | |||
|
248 | count[codeTable[nbSeq-1]]--; | |||
|
249 | nbSeq_1--; | |||
|
250 | } | |||
|
251 | assert(nbSeq_1 > 1); | |||
|
252 | FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); | |||
|
253 | { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ | |||
|
254 | FORWARD_IF_ERROR(NCountSize); | |||
|
255 | FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); | |||
|
256 | return NCountSize; | |||
|
257 | } | |||
|
258 | } | |||
|
259 | default: assert(0); RETURN_ERROR(GENERIC); | |||
|
260 | } | |||
|
261 | } | |||
|
262 | ||||
|
263 | FORCE_INLINE_TEMPLATE size_t | |||
|
264 | ZSTD_encodeSequences_body( | |||
|
265 | void* dst, size_t dstCapacity, | |||
|
266 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
|
267 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
|
268 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
|
269 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |||
|
270 | { | |||
|
271 | BIT_CStream_t blockStream; | |||
|
272 | FSE_CState_t stateMatchLength; | |||
|
273 | FSE_CState_t stateOffsetBits; | |||
|
274 | FSE_CState_t stateLitLength; | |||
|
275 | ||||
|
276 | RETURN_ERROR_IF( | |||
|
277 | ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), | |||
|
278 | dstSize_tooSmall, "not enough space remaining"); | |||
|
279 | DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", | |||
|
280 | (int)(blockStream.endPtr - blockStream.startPtr), | |||
|
281 | (unsigned)dstCapacity); | |||
|
282 | ||||
|
283 | /* first symbols */ | |||
|
284 | FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); | |||
|
285 | FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); | |||
|
286 | FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); | |||
|
287 | BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); | |||
|
288 | if (MEM_32bits()) BIT_flushBits(&blockStream); | |||
|
289 | BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); | |||
|
290 | if (MEM_32bits()) BIT_flushBits(&blockStream); | |||
|
291 | if (longOffsets) { | |||
|
292 | U32 const ofBits = ofCodeTable[nbSeq-1]; | |||
|
293 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |||
|
294 | if (extraBits) { | |||
|
295 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); | |||
|
296 | BIT_flushBits(&blockStream); | |||
|
297 | } | |||
|
298 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, | |||
|
299 | ofBits - extraBits); | |||
|
300 | } else { | |||
|
301 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); | |||
|
302 | } | |||
|
303 | BIT_flushBits(&blockStream); | |||
|
304 | ||||
|
305 | { size_t n; | |||
|
306 | for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ | |||
|
307 | BYTE const llCode = llCodeTable[n]; | |||
|
308 | BYTE const ofCode = ofCodeTable[n]; | |||
|
309 | BYTE const mlCode = mlCodeTable[n]; | |||
|
310 | U32 const llBits = LL_bits[llCode]; | |||
|
311 | U32 const ofBits = ofCode; | |||
|
312 | U32 const mlBits = ML_bits[mlCode]; | |||
|
313 | DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", | |||
|
314 | (unsigned)sequences[n].litLength, | |||
|
315 | (unsigned)sequences[n].matchLength + MINMATCH, | |||
|
316 | (unsigned)sequences[n].offset); | |||
|
317 | /* 32b*/ /* 64b*/ | |||
|
318 | /* (7)*/ /* (7)*/ | |||
|
319 | FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ | |||
|
320 | FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ | |||
|
321 | if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ | |||
|
322 | FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ | |||
|
323 | if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) | |||
|
324 | BIT_flushBits(&blockStream); /* (7)*/ | |||
|
325 | BIT_addBits(&blockStream, sequences[n].litLength, llBits); | |||
|
326 | if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); | |||
|
327 | BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); | |||
|
328 | if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); | |||
|
329 | if (longOffsets) { | |||
|
330 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |||
|
331 | if (extraBits) { | |||
|
332 | BIT_addBits(&blockStream, sequences[n].offset, extraBits); | |||
|
333 | BIT_flushBits(&blockStream); /* (7)*/ | |||
|
334 | } | |||
|
335 | BIT_addBits(&blockStream, sequences[n].offset >> extraBits, | |||
|
336 | ofBits - extraBits); /* 31 */ | |||
|
337 | } else { | |||
|
338 | BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ | |||
|
339 | } | |||
|
340 | BIT_flushBits(&blockStream); /* (7)*/ | |||
|
341 | DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); | |||
|
342 | } } | |||
|
343 | ||||
|
344 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); | |||
|
345 | FSE_flushCState(&blockStream, &stateMatchLength); | |||
|
346 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); | |||
|
347 | FSE_flushCState(&blockStream, &stateOffsetBits); | |||
|
348 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); | |||
|
349 | FSE_flushCState(&blockStream, &stateLitLength); | |||
|
350 | ||||
|
351 | { size_t const streamSize = BIT_closeCStream(&blockStream); | |||
|
352 | RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); | |||
|
353 | return streamSize; | |||
|
354 | } | |||
|
355 | } | |||
|
356 | ||||
|
357 | static size_t | |||
|
358 | ZSTD_encodeSequences_default( | |||
|
359 | void* dst, size_t dstCapacity, | |||
|
360 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
|
361 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
|
362 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
|
363 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |||
|
364 | { | |||
|
365 | return ZSTD_encodeSequences_body(dst, dstCapacity, | |||
|
366 | CTable_MatchLength, mlCodeTable, | |||
|
367 | CTable_OffsetBits, ofCodeTable, | |||
|
368 | CTable_LitLength, llCodeTable, | |||
|
369 | sequences, nbSeq, longOffsets); | |||
|
370 | } | |||
|
371 | ||||
|
372 | ||||
|
373 | #if DYNAMIC_BMI2 | |||
|
374 | ||||
|
375 | static TARGET_ATTRIBUTE("bmi2") size_t | |||
|
376 | ZSTD_encodeSequences_bmi2( | |||
|
377 | void* dst, size_t dstCapacity, | |||
|
378 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
|
379 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
|
380 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
|
381 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |||
|
382 | { | |||
|
383 | return ZSTD_encodeSequences_body(dst, dstCapacity, | |||
|
384 | CTable_MatchLength, mlCodeTable, | |||
|
385 | CTable_OffsetBits, ofCodeTable, | |||
|
386 | CTable_LitLength, llCodeTable, | |||
|
387 | sequences, nbSeq, longOffsets); | |||
|
388 | } | |||
|
389 | ||||
|
390 | #endif | |||
|
391 | ||||
|
392 | size_t ZSTD_encodeSequences( | |||
|
393 | void* dst, size_t dstCapacity, | |||
|
394 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
|
395 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
|
396 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
|
397 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) | |||
|
398 | { | |||
|
399 | DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); | |||
|
400 | #if DYNAMIC_BMI2 | |||
|
401 | if (bmi2) { | |||
|
402 | return ZSTD_encodeSequences_bmi2(dst, dstCapacity, | |||
|
403 | CTable_MatchLength, mlCodeTable, | |||
|
404 | CTable_OffsetBits, ofCodeTable, | |||
|
405 | CTable_LitLength, llCodeTable, | |||
|
406 | sequences, nbSeq, longOffsets); | |||
|
407 | } | |||
|
408 | #endif | |||
|
409 | (void)bmi2; | |||
|
410 | return ZSTD_encodeSequences_default(dst, dstCapacity, | |||
|
411 | CTable_MatchLength, mlCodeTable, | |||
|
412 | CTable_OffsetBits, ofCodeTable, | |||
|
413 | CTable_LitLength, llCodeTable, | |||
|
414 | sequences, nbSeq, longOffsets); | |||
|
415 | } |
@@ -0,0 +1,47 b'' | |||||
|
1 | /* | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under both the BSD-style license (found in the | |||
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
|
7 | * in the COPYING file in the root directory of this source tree). | |||
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
|
9 | */ | |||
|
10 | ||||
|
11 | #ifndef ZSTD_COMPRESS_SEQUENCES_H | |||
|
12 | #define ZSTD_COMPRESS_SEQUENCES_H | |||
|
13 | ||||
|
14 | #include "fse.h" /* FSE_repeat, FSE_CTable */ | |||
|
15 | #include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ | |||
|
16 | ||||
|
17 | typedef enum { | |||
|
18 | ZSTD_defaultDisallowed = 0, | |||
|
19 | ZSTD_defaultAllowed = 1 | |||
|
20 | } ZSTD_defaultPolicy_e; | |||
|
21 | ||||
|
22 | symbolEncodingType_e | |||
|
23 | ZSTD_selectEncodingType( | |||
|
24 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, | |||
|
25 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, | |||
|
26 | FSE_CTable const* prevCTable, | |||
|
27 | short const* defaultNorm, U32 defaultNormLog, | |||
|
28 | ZSTD_defaultPolicy_e const isDefaultAllowed, | |||
|
29 | ZSTD_strategy const strategy); | |||
|
30 | ||||
|
31 | size_t | |||
|
32 | ZSTD_buildCTable(void* dst, size_t dstCapacity, | |||
|
33 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |||
|
34 | unsigned* count, U32 max, | |||
|
35 | const BYTE* codeTable, size_t nbSeq, | |||
|
36 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |||
|
37 | const FSE_CTable* prevCTable, size_t prevCTableSize, | |||
|
38 | void* workspace, size_t workspaceSize); | |||
|
39 | ||||
|
40 | size_t ZSTD_encodeSequences( | |||
|
41 | void* dst, size_t dstCapacity, | |||
|
42 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
|
43 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
|
44 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
|
45 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); | |||
|
46 | ||||
|
47 | #endif /* ZSTD_COMPRESS_SEQUENCES_H */ |
@@ -49,6 +49,10 b' contrib/python-zstandard/zstd/compress/h' | |||||
49 | contrib/python-zstandard/zstd/compress/huf_compress.c |
|
49 | contrib/python-zstandard/zstd/compress/huf_compress.c | |
50 | contrib/python-zstandard/zstd/compress/zstd_compress.c |
|
50 | contrib/python-zstandard/zstd/compress/zstd_compress.c | |
51 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h |
|
51 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h | |
|
52 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.c | |||
|
53 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.h | |||
|
54 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c | |||
|
55 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h | |||
52 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c |
|
56 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c | |
53 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h |
|
57 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h | |
54 | contrib/python-zstandard/zstd/compress/zstd_fast.c |
|
58 | contrib/python-zstandard/zstd/compress/zstd_fast.c |
@@ -44,6 +44,7 b' Actions Blocking Release' | |||||
44 | zstd API. |
|
44 | zstd API. | |
45 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. |
|
45 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. | |
46 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. |
|
46 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. | |
|
47 | * Support ``ZSTD_c_literalCompressionMode `` compression parameter. | |||
47 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving |
|
48 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving | |
48 | compression parameters. |
|
49 | compression parameters. | |
49 | * Consider exposing ``ZSTDMT_toFlushNow()``. |
|
50 | * Consider exposing ``ZSTDMT_toFlushNow()``. | |
@@ -66,10 +67,39 b' Other Actions Not Blocking Release' | |||||
66 | * API for ensuring max memory ceiling isn't exceeded. |
|
67 | * API for ensuring max memory ceiling isn't exceeded. | |
67 | * Move off nose for testing. |
|
68 | * Move off nose for testing. | |
68 |
|
69 | |||
|
70 | 0.12.0 (released 2019-09-15) | |||
|
71 | ============================ | |||
|
72 | ||||
|
73 | Backwards Compatibility Notes | |||
|
74 | ----------------------------- | |||
|
75 | ||||
|
76 | * Support for Python 3.4 has been dropped since Python 3.4 is no longer | |||
|
77 | a supported Python version upstream. (But it will likely continue to | |||
|
78 | work until Python 2.7 support is dropped and we port to Python 3.5+ | |||
|
79 | APIs.) | |||
|
80 | ||||
|
81 | Bug Fixes | |||
|
82 | --------- | |||
|
83 | ||||
|
84 | * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91). | |||
|
85 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). | |||
|
86 | ||||
|
87 | Changes | |||
|
88 | ------- | |||
|
89 | ||||
|
90 | * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI). | |||
|
91 | * Switched to ``pytest`` for running tests (from ``nose``). | |||
|
92 | * Bundled zstandard library upgraded from 1.3.8 to 1.4.3. | |||
|
93 | ||||
|
94 | 0.11.1 (released 2019-05-14) | |||
|
95 | ============================ | |||
|
96 | ||||
|
97 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). | |||
|
98 | ||||
69 | 0.11.0 (released 2019-02-24) |
|
99 | 0.11.0 (released 2019-02-24) | |
70 | ============================ |
|
100 | ============================ | |
71 |
|
101 | |||
72 |
Backwards Compatibility No |
|
102 | Backwards Compatibility Notes | |
73 | ----------------------------- |
|
103 | ----------------------------- | |
74 |
|
104 | |||
75 | * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0`` |
|
105 | * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0`` |
@@ -15,7 +15,7 b' The canonical home for this project live' | |||||
15 | the author. For convenience, that repository is frequently synchronized to |
|
15 | the author. For convenience, that repository is frequently synchronized to | |
16 | https://github.com/indygreg/python-zstandard. |
|
16 | https://github.com/indygreg/python-zstandard. | |
17 |
|
17 | |||
18 |
| |ci-status| |
|
18 | | |ci-status| | |
19 |
|
19 | |||
20 | Requirements |
|
20 | Requirements | |
21 | ============ |
|
21 | ============ | |
@@ -1598,9 +1598,5 b' their work, consider donating some money' | |||||
1598 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted |
|
1598 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted | |
1599 | :alt: Donate via PayPal |
|
1599 | :alt: Donate via PayPal | |
1600 |
|
1600 | |||
1601 |
.. |ci-status| image:: https:// |
|
1601 | .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master | |
1602 | :target: https://travis-ci.org/indygreg/python-zstandard |
|
1602 | :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master | |
1603 |
|
||||
1604 | .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true |
|
|||
1605 | :target: https://ci.appveyor.com/project/indygreg/python-zstandard |
|
|||
1606 | :alt: Windows build status |
|
@@ -11,7 +11,7 b'' | |||||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) { |
|
13 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) { | |
14 | size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value); |
|
14 | size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value); | |
15 | if (ZSTD_isError(zresult)) { |
|
15 | if (ZSTD_isError(zresult)) { | |
16 | PyErr_Format(ZstdError, "unable to set compression context parameter: %s", |
|
16 | PyErr_Format(ZstdError, "unable to set compression context parameter: %s", | |
17 | ZSTD_getErrorName(zresult)); |
|
17 | ZSTD_getErrorName(zresult)); | |
@@ -25,11 +25,11 b' int set_parameter(ZSTD_CCtx_params* para' | |||||
25 |
|
25 | |||
26 | #define TRY_COPY_PARAMETER(source, dest, param) { \ |
|
26 | #define TRY_COPY_PARAMETER(source, dest, param) { \ | |
27 | int result; \ |
|
27 | int result; \ | |
28 | size_t zresult = ZSTD_CCtxParam_getParameter(source, param, &result); \ |
|
28 | size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \ | |
29 | if (ZSTD_isError(zresult)) { \ |
|
29 | if (ZSTD_isError(zresult)) { \ | |
30 | return 1; \ |
|
30 | return 1; \ | |
31 | } \ |
|
31 | } \ | |
32 | zresult = ZSTD_CCtxParam_setParameter(dest, param, result); \ |
|
32 | zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \ | |
33 | if (ZSTD_isError(zresult)) { \ |
|
33 | if (ZSTD_isError(zresult)) { \ | |
34 | return 1; \ |
|
34 | return 1; \ | |
35 | } \ |
|
35 | } \ | |
@@ -78,7 +78,7 b' int reset_params(ZstdCompressionParamete' | |||||
78 | } |
|
78 | } | |
79 |
|
79 | |||
80 | #define TRY_GET_PARAMETER(params, param, value) { \ |
|
80 | #define TRY_GET_PARAMETER(params, param, value) { \ | |
81 | size_t zresult = ZSTD_CCtxParam_getParameter(params, param, value); \ |
|
81 | size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \ | |
82 | if (ZSTD_isError(zresult)) { \ |
|
82 | if (ZSTD_isError(zresult)) { \ | |
83 | PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \ |
|
83 | PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \ | |
84 | return 1; \ |
|
84 | return 1; \ | |
@@ -436,7 +436,7 b' static void ZstdCompressionParameters_de' | |||||
436 | int result; \ |
|
436 | int result; \ | |
437 | size_t zresult; \ |
|
437 | size_t zresult; \ | |
438 | ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \ |
|
438 | ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \ | |
439 | zresult = ZSTD_CCtxParam_getParameter(p->params, param, &result); \ |
|
439 | zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \ | |
440 | if (ZSTD_isError(zresult)) { \ |
|
440 | if (ZSTD_isError(zresult)) { \ | |
441 | PyErr_Format(ZstdError, "unable to get compression parameter: %s", \ |
|
441 | PyErr_Format(ZstdError, "unable to get compression parameter: %s", \ | |
442 | ZSTD_getErrorName(zresult)); \ |
|
442 | ZSTD_getErrorName(zresult)); \ |
@@ -653,6 +653,8 b' static PyObject* reader_seek(ZstdDecompr' | |||||
653 |
|
653 | |||
654 | readSize = PyBytes_GET_SIZE(readResult); |
|
654 | readSize = PyBytes_GET_SIZE(readResult); | |
655 |
|
655 | |||
|
656 | Py_CLEAR(readResult); | |||
|
657 | ||||
656 | /* Empty read means EOF. */ |
|
658 | /* Empty read means EOF. */ | |
657 | if (!readSize) { |
|
659 | if (!readSize) { | |
658 | break; |
|
660 | break; |
@@ -16,7 +16,7 b'' | |||||
16 | #include <zdict.h> |
|
16 | #include <zdict.h> | |
17 |
|
17 | |||
18 | /* Remember to change the string in zstandard/__init__ as well */ |
|
18 | /* Remember to change the string in zstandard/__init__ as well */ | |
19 |
#define PYTHON_ZSTANDARD_VERSION "0.1 |
|
19 | #define PYTHON_ZSTANDARD_VERSION "0.12.0" | |
20 |
|
20 | |||
21 | typedef enum { |
|
21 | typedef enum { | |
22 | compressorobj_flush_finish, |
|
22 | compressorobj_flush_finish, |
@@ -29,6 +29,8 b" SOURCES = ['zstd/%s' % p for p in (" | |||||
29 | 'compress/hist.c', |
|
29 | 'compress/hist.c', | |
30 | 'compress/huf_compress.c', |
|
30 | 'compress/huf_compress.c', | |
31 | 'compress/zstd_compress.c', |
|
31 | 'compress/zstd_compress.c', | |
|
32 | 'compress/zstd_compress_literals.c', | |||
|
33 | 'compress/zstd_compress_sequences.c', | |||
32 | 'compress/zstd_double_fast.c', |
|
34 | 'compress/zstd_double_fast.c', | |
33 | 'compress/zstd_fast.c', |
|
35 | 'compress/zstd_fast.c', | |
34 | 'compress/zstd_lazy.c', |
|
36 | 'compress/zstd_lazy.c', | |
@@ -119,7 +121,11 b' def preprocess(path):' | |||||
119 | os.close(fd) |
|
121 | os.close(fd) | |
120 |
|
122 | |||
121 | try: |
|
123 | try: | |
122 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE) |
|
124 | env = dict(os.environ) | |
|
125 | if getattr(compiler, '_paths', None): | |||
|
126 | env['PATH'] = compiler._paths | |||
|
127 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, | |||
|
128 | env=env) | |||
123 | output = process.communicate()[0] |
|
129 | output = process.communicate()[0] | |
124 | ret = process.poll() |
|
130 | ret = process.poll() | |
125 | if ret: |
|
131 | if ret: |
@@ -100,7 +100,6 b' setup(' | |||||
100 | 'License :: OSI Approved :: BSD License', |
|
100 | 'License :: OSI Approved :: BSD License', | |
101 | 'Programming Language :: C', |
|
101 | 'Programming Language :: C', | |
102 | 'Programming Language :: Python :: 2.7', |
|
102 | 'Programming Language :: Python :: 2.7', | |
103 | 'Programming Language :: Python :: 3.4', |
|
|||
104 | 'Programming Language :: Python :: 3.5', |
|
103 | 'Programming Language :: Python :: 3.5', | |
105 | 'Programming Language :: Python :: 3.6', |
|
104 | 'Programming Language :: Python :: 3.6', | |
106 | 'Programming Language :: Python :: 3.7', |
|
105 | 'Programming Language :: Python :: 3.7', |
@@ -22,6 +22,8 b" zstd_sources = ['zstd/%s' % p for p in (" | |||||
22 | 'compress/fse_compress.c', |
|
22 | 'compress/fse_compress.c', | |
23 | 'compress/hist.c', |
|
23 | 'compress/hist.c', | |
24 | 'compress/huf_compress.c', |
|
24 | 'compress/huf_compress.c', | |
|
25 | 'compress/zstd_compress_literals.c', | |||
|
26 | 'compress/zstd_compress_sequences.c', | |||
25 | 'compress/zstd_compress.c', |
|
27 | 'compress/zstd_compress.c', | |
26 | 'compress/zstd_double_fast.c', |
|
28 | 'compress/zstd_double_fast.c', | |
27 | 'compress/zstd_fast.c', |
|
29 | 'compress/zstd_fast.c', |
@@ -1038,7 +1038,7 b' class TestCompressor_stream_writer(unitt' | |||||
1038 | d = zstd.train_dictionary(8192, samples) |
|
1038 | d = zstd.train_dictionary(8192, samples) | |
1039 |
|
1039 | |||
1040 | h = hashlib.sha1(d.as_bytes()).hexdigest() |
|
1040 | h = hashlib.sha1(d.as_bytes()).hexdigest() | |
1041 | self.assertEqual(h, '88ca0d38332aff379d4ced166a51c280a7679aad') |
|
1041 | self.assertEqual(h, '7a2e59a876db958f74257141045af8f912e00d4e') | |
1042 |
|
1042 | |||
1043 | buffer = NonClosingBytesIO() |
|
1043 | buffer = NonClosingBytesIO() | |
1044 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
1044 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) | |
@@ -1056,7 +1056,7 b' class TestCompressor_stream_writer(unitt' | |||||
1056 | self.assertFalse(params.has_checksum) |
|
1056 | self.assertFalse(params.has_checksum) | |
1057 |
|
1057 | |||
1058 | h = hashlib.sha1(compressed).hexdigest() |
|
1058 | h = hashlib.sha1(compressed).hexdigest() | |
1059 | self.assertEqual(h, '8703b4316f274d26697ea5dd480f29c08e85d940') |
|
1059 | self.assertEqual(h, '0a7c05635061f58039727cdbe76388c6f4cfef06') | |
1060 |
|
1060 | |||
1061 | source = b'foo' + b'bar' + (b'foo' * 16384) |
|
1061 | source = b'foo' + b'bar' + (b'foo' * 16384) | |
1062 |
|
1062 | |||
@@ -1091,7 +1091,7 b' class TestCompressor_stream_writer(unitt' | |||||
1091 | self.assertFalse(params.has_checksum) |
|
1091 | self.assertFalse(params.has_checksum) | |
1092 |
|
1092 | |||
1093 | h = hashlib.sha1(compressed).hexdigest() |
|
1093 | h = hashlib.sha1(compressed).hexdigest() | |
1094 | self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef') |
|
1094 | self.assertEqual(h, 'dd4bb7d37c1a0235b38a2f6b462814376843ef0b') | |
1095 |
|
1095 | |||
1096 | def test_write_checksum(self): |
|
1096 | def test_write_checksum(self): | |
1097 | no_checksum = NonClosingBytesIO() |
|
1097 | no_checksum = NonClosingBytesIO() |
@@ -100,7 +100,7 b' class TestCompressionParameters(unittest' | |||||
100 | strategy=zstd.STRATEGY_DFAST) |
|
100 | strategy=zstd.STRATEGY_DFAST) | |
101 |
|
101 | |||
102 | # 32-bit has slightly different values from 64-bit. |
|
102 | # 32-bit has slightly different values from 64-bit. | |
103 |
self.assertAlmostEqual(p.estimated_compression_context_size(), 1294 |
|
103 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, | |
104 | delta=250) |
|
104 | delta=250) | |
105 |
|
105 | |||
106 | def test_strategy(self): |
|
106 | def test_strategy(self): |
@@ -12,9 +12,9 b' from . common import (' | |||||
12 | @make_cffi |
|
12 | @make_cffi | |
13 | class TestModuleAttributes(unittest.TestCase): |
|
13 | class TestModuleAttributes(unittest.TestCase): | |
14 | def test_version(self): |
|
14 | def test_version(self): | |
15 |
self.assertEqual(zstd.ZSTD_VERSION, (1, |
|
15 | self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 3)) | |
16 |
|
16 | |||
17 |
self.assertEqual(zstd.__version__, '0.1 |
|
17 | self.assertEqual(zstd.__version__, '0.12.0') | |
18 |
|
18 | |||
19 | def test_constants(self): |
|
19 | def test_constants(self): | |
20 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
|
20 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
@@ -7,6 +7,7 b' import zstandard as zstd' | |||||
7 | from . common import ( |
|
7 | from . common import ( | |
8 | generate_samples, |
|
8 | generate_samples, | |
9 | make_cffi, |
|
9 | make_cffi, | |
|
10 | random_input_data, | |||
10 | ) |
|
11 | ) | |
11 |
|
12 | |||
12 | if sys.version_info[0] >= 3: |
|
13 | if sys.version_info[0] >= 3: | |
@@ -29,7 +30,7 b' class TestTrainDictionary(unittest.TestC' | |||||
29 | zstd.train_dictionary(8192, [u'foo']) |
|
30 | zstd.train_dictionary(8192, [u'foo']) | |
30 |
|
31 | |||
31 | def test_no_params(self): |
|
32 | def test_no_params(self): | |
32 |
d = zstd.train_dictionary(8192, |
|
33 | d = zstd.train_dictionary(8192, random_input_data()) | |
33 | self.assertIsInstance(d.dict_id(), int_type) |
|
34 | self.assertIsInstance(d.dict_id(), int_type) | |
34 |
|
35 | |||
35 | # The dictionary ID may be different across platforms. |
|
36 | # The dictionary ID may be different across platforms. |
@@ -62,4 +62,4 b' else:' | |||||
62 | 'cext, or cffi' % _module_policy) |
|
62 | 'cext, or cffi' % _module_policy) | |
63 |
|
63 | |||
64 | # Keep this in sync with python-zstandard.h. |
|
64 | # Keep this in sync with python-zstandard.h. | |
65 |
__version__ = '0.1 |
|
65 | __version__ = '0.12.0' |
@@ -416,7 +416,7 b' def estimate_decompression_context_size(' | |||||
416 |
|
416 | |||
417 |
|
417 | |||
418 | def _set_compression_parameter(params, param, value): |
|
418 | def _set_compression_parameter(params, param, value): | |
419 | zresult = lib.ZSTD_CCtxParam_setParameter(params, param, value) |
|
419 | zresult = lib.ZSTD_CCtxParams_setParameter(params, param, value) | |
420 | if lib.ZSTD_isError(zresult): |
|
420 | if lib.ZSTD_isError(zresult): | |
421 | raise ZstdError('unable to set compression context parameter: %s' % |
|
421 | raise ZstdError('unable to set compression context parameter: %s' % | |
422 | _zstd_error(zresult)) |
|
422 | _zstd_error(zresult)) | |
@@ -425,7 +425,7 b' def _set_compression_parameter(params, p' | |||||
425 | def _get_compression_parameter(params, param): |
|
425 | def _get_compression_parameter(params, param): | |
426 | result = ffi.new('int *') |
|
426 | result = ffi.new('int *') | |
427 |
|
427 | |||
428 | zresult = lib.ZSTD_CCtxParam_getParameter(params, param, result) |
|
428 | zresult = lib.ZSTD_CCtxParams_getParameter(params, param, result) | |
429 | if lib.ZSTD_isError(zresult): |
|
429 | if lib.ZSTD_isError(zresult): | |
430 | raise ZstdError('unable to get compression context parameter: %s' % |
|
430 | raise ZstdError('unable to get compression context parameter: %s' % | |
431 | _zstd_error(zresult)) |
|
431 | _zstd_error(zresult)) |
@@ -210,7 +210,7 b' void zstd_module_init(PyObject* m) {' | |||||
210 | We detect this mismatch here and refuse to load the module if this |
|
210 | We detect this mismatch here and refuse to load the module if this | |
211 | scenario is detected. |
|
211 | scenario is detected. | |
212 | */ |
|
212 | */ | |
213 |
if (ZSTD_VERSION_NUMBER != 10 |
|
213 | if (ZSTD_VERSION_NUMBER != 10403 || ZSTD_versionNumber() != 10403) { | |
214 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); |
|
214 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); | |
215 | return; |
|
215 | return; | |
216 | } |
|
216 | } |
@@ -57,6 +57,8 b' extern "C" {' | |||||
57 | =========================================*/ |
|
57 | =========================================*/ | |
58 | #if defined(__BMI__) && defined(__GNUC__) |
|
58 | #if defined(__BMI__) && defined(__GNUC__) | |
59 | # include <immintrin.h> /* support for bextr (experimental) */ |
|
59 | # include <immintrin.h> /* support for bextr (experimental) */ | |
|
60 | #elif defined(__ICCARM__) | |||
|
61 | # include <intrinsics.h> | |||
60 | #endif |
|
62 | #endif | |
61 |
|
63 | |||
62 | #define STREAM_ACCUMULATOR_MIN_32 25 |
|
64 | #define STREAM_ACCUMULATOR_MIN_32 25 | |
@@ -163,6 +165,8 b' MEM_STATIC unsigned BIT_highbit32 (U32 v' | |||||
163 | return (unsigned) r; |
|
165 | return (unsigned) r; | |
164 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ |
|
166 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ | |
165 | return 31 - __builtin_clz (val); |
|
167 | return 31 - __builtin_clz (val); | |
|
168 | # elif defined(__ICCARM__) /* IAR Intrinsic */ | |||
|
169 | return 31 - __CLZ(val); | |||
166 | # else /* Software version */ |
|
170 | # else /* Software version */ | |
167 | static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, |
|
171 | static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, | |
168 | 11, 14, 16, 18, 22, 25, 3, 30, |
|
172 | 11, 14, 16, 18, 22, 25, 3, 30, |
@@ -23,7 +23,7 b'' | |||||
23 | # define INLINE_KEYWORD |
|
23 | # define INLINE_KEYWORD | |
24 | #endif |
|
24 | #endif | |
25 |
|
25 | |||
26 | #if defined(__GNUC__) |
|
26 | #if defined(__GNUC__) || defined(__ICCARM__) | |
27 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) |
|
27 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) | |
28 | #elif defined(_MSC_VER) |
|
28 | #elif defined(_MSC_VER) | |
29 | # define FORCE_INLINE_ATTR __forceinline |
|
29 | # define FORCE_INLINE_ATTR __forceinline | |
@@ -40,7 +40,7 b'' | |||||
40 |
|
40 | |||
41 | /** |
|
41 | /** | |
42 | * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant |
|
42 | * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant | |
43 |
* parameters. They must be inlined for the compiler to elimin |
|
43 | * parameters. They must be inlined for the compiler to eliminate the constant | |
44 | * branches. |
|
44 | * branches. | |
45 | */ |
|
45 | */ | |
46 | #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR |
|
46 | #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR | |
@@ -65,7 +65,7 b'' | |||||
65 | #ifdef _MSC_VER |
|
65 | #ifdef _MSC_VER | |
66 | # define FORCE_NOINLINE static __declspec(noinline) |
|
66 | # define FORCE_NOINLINE static __declspec(noinline) | |
67 | #else |
|
67 | #else | |
68 | # ifdef __GNUC__ |
|
68 | # if defined(__GNUC__) || defined(__ICCARM__) | |
69 | # define FORCE_NOINLINE static __attribute__((__noinline__)) |
|
69 | # define FORCE_NOINLINE static __attribute__((__noinline__)) | |
70 | # else |
|
70 | # else | |
71 | # define FORCE_NOINLINE static |
|
71 | # define FORCE_NOINLINE static | |
@@ -76,7 +76,7 b'' | |||||
76 | #ifndef __has_attribute |
|
76 | #ifndef __has_attribute | |
77 | #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ |
|
77 | #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ | |
78 | #endif |
|
78 | #endif | |
79 | #if defined(__GNUC__) |
|
79 | #if defined(__GNUC__) || defined(__ICCARM__) | |
80 | # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) |
|
80 | # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) | |
81 | #else |
|
81 | #else | |
82 | # define TARGET_ATTRIBUTE(target) |
|
82 | # define TARGET_ATTRIBUTE(target) | |
@@ -127,6 +127,13 b'' | |||||
127 | } \ |
|
127 | } \ | |
128 | } |
|
128 | } | |
129 |
|
129 | |||
|
130 | /* vectorization */ | |||
|
131 | #if !defined(__clang__) && defined(__GNUC__) | |||
|
132 | # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) | |||
|
133 | #else | |||
|
134 | # define DONT_VECTORIZE | |||
|
135 | #endif | |||
|
136 | ||||
130 | /* disable warnings */ |
|
137 | /* disable warnings */ | |
131 | #ifdef _MSC_VER /* Visual Studio */ |
|
138 | #ifdef _MSC_VER /* Visual Studio */ | |
132 | # include <intrin.h> /* For Visual 2005 */ |
|
139 | # include <intrin.h> /* For Visual 2005 */ |
@@ -358,7 +358,7 b' size_t FSE_decompress_wksp(void* dst, si' | |||||
358 | typedef enum { |
|
358 | typedef enum { | |
359 | FSE_repeat_none, /**< Cannot use the previous table */ |
|
359 | FSE_repeat_none, /**< Cannot use the previous table */ | |
360 | FSE_repeat_check, /**< Can use the previous table but it must be checked */ |
|
360 | FSE_repeat_check, /**< Can use the previous table but it must be checked */ | |
361 | FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */ |
|
361 | FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ | |
362 | } FSE_repeat; |
|
362 | } FSE_repeat; | |
363 |
|
363 | |||
364 | /* ***************************************** |
|
364 | /* ***************************************** |
@@ -102,7 +102,7 b' MEM_STATIC void MEM_check(void) { MEM_ST' | |||||
102 | #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ |
|
102 | #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ | |
103 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) |
|
103 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | |
104 | # define MEM_FORCE_MEMORY_ACCESS 2 |
|
104 | # define MEM_FORCE_MEMORY_ACCESS 2 | |
105 | # elif defined(__INTEL_COMPILER) || defined(__GNUC__) |
|
105 | # elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) | |
106 | # define MEM_FORCE_MEMORY_ACCESS 1 |
|
106 | # define MEM_FORCE_MEMORY_ACCESS 1 | |
107 | # endif |
|
107 | # endif | |
108 | #endif |
|
108 | #endif |
@@ -14,8 +14,8 b'' | |||||
14 | * This file will hold wrapper for systems, which do not support pthreads |
|
14 | * This file will hold wrapper for systems, which do not support pthreads | |
15 | */ |
|
15 | */ | |
16 |
|
16 | |||
17 |
/* create fake symbol to avoid empty tr |
|
17 | /* create fake symbol to avoid empty translation unit warning */ | |
18 | int g_ZSTD_threading_useles_symbol; |
|
18 | int g_ZSTD_threading_useless_symbol; | |
19 |
|
19 | |||
20 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) |
|
20 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) | |
21 |
|
21 |
@@ -53,7 +53,8 b'' | |||||
53 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) |
|
53 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | |
54 | # define XXH_FORCE_MEMORY_ACCESS 2 |
|
54 | # define XXH_FORCE_MEMORY_ACCESS 2 | |
55 | # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ |
|
55 | # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ | |
56 | (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) |
|
56 | (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ | |
|
57 | defined(__ICCARM__) | |||
57 | # define XXH_FORCE_MEMORY_ACCESS 1 |
|
58 | # define XXH_FORCE_MEMORY_ACCESS 1 | |
58 | # endif |
|
59 | # endif | |
59 | #endif |
|
60 | #endif | |
@@ -66,10 +67,10 b'' | |||||
66 | /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ |
|
67 | /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ | |
67 |
|
68 | |||
68 | /*!XXH_FORCE_NATIVE_FORMAT : |
|
69 | /*!XXH_FORCE_NATIVE_FORMAT : | |
69 |
* By default, xxHash library provides endian-independ |
|
70 | * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. | |
70 | * Results are therefore identical for little-endian and big-endian CPU. |
|
71 | * Results are therefore identical for little-endian and big-endian CPU. | |
71 | * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. |
|
72 | * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. | |
72 |
* Should endian-independ |
|
73 | * Should endian-independence be of no importance for your application, you may set the #define below to 1, | |
73 | * to improve speed for Big-endian CPU. |
|
74 | * to improve speed for Big-endian CPU. | |
74 | * This option has no impact on Little_Endian CPU. |
|
75 | * This option has no impact on Little_Endian CPU. | |
75 | */ |
|
76 | */ | |
@@ -120,7 +121,7 b' static void* XXH_memcpy(void* dest, cons' | |||||
120 | # define INLINE_KEYWORD |
|
121 | # define INLINE_KEYWORD | |
121 | #endif |
|
122 | #endif | |
122 |
|
123 | |||
123 | #if defined(__GNUC__) |
|
124 | #if defined(__GNUC__) || defined(__ICCARM__) | |
124 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) |
|
125 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) | |
125 | #elif defined(_MSC_VER) |
|
126 | #elif defined(_MSC_VER) | |
126 | # define FORCE_INLINE_ATTR __forceinline |
|
127 | # define FORCE_INLINE_ATTR __forceinline | |
@@ -206,7 +207,12 b' static U64 XXH_read64(const void* memPtr' | |||||
206 | # define XXH_rotl32(x,r) _rotl(x,r) |
|
207 | # define XXH_rotl32(x,r) _rotl(x,r) | |
207 | # define XXH_rotl64(x,r) _rotl64(x,r) |
|
208 | # define XXH_rotl64(x,r) _rotl64(x,r) | |
208 | #else |
|
209 | #else | |
|
210 | #if defined(__ICCARM__) | |||
|
211 | # include <intrinsics.h> | |||
|
212 | # define XXH_rotl32(x,r) __ROR(x,(32 - r)) | |||
|
213 | #else | |||
209 | # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) |
|
214 | # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) | |
|
215 | #endif | |||
210 | # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) |
|
216 | # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) | |
211 | #endif |
|
217 | #endif | |
212 |
|
218 |
@@ -34,7 +34,6 b'' | |||||
34 | #endif |
|
34 | #endif | |
35 | #include "xxhash.h" /* XXH_reset, update, digest */ |
|
35 | #include "xxhash.h" /* XXH_reset, update, digest */ | |
36 |
|
36 | |||
37 |
|
||||
38 | #if defined (__cplusplus) |
|
37 | #if defined (__cplusplus) | |
39 | extern "C" { |
|
38 | extern "C" { | |
40 | #endif |
|
39 | #endif | |
@@ -53,8 +52,50 b' extern "C" {' | |||||
53 | #undef MAX |
|
52 | #undef MAX | |
54 | #define MIN(a,b) ((a)<(b) ? (a) : (b)) |
|
53 | #define MIN(a,b) ((a)<(b) ? (a) : (b)) | |
55 | #define MAX(a,b) ((a)>(b) ? (a) : (b)) |
|
54 | #define MAX(a,b) ((a)>(b) ? (a) : (b)) | |
56 | #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ |
|
55 | ||
57 | #define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ |
|
56 | /** | |
|
57 | * Return the specified error if the condition evaluates to true. | |||
|
58 | * | |||
|
59 | * In debug modes, prints additional information. | |||
|
60 | * In order to do that (particularly, printing the conditional that failed), | |||
|
61 | * this can't just wrap RETURN_ERROR(). | |||
|
62 | */ | |||
|
63 | #define RETURN_ERROR_IF(cond, err, ...) \ | |||
|
64 | if (cond) { \ | |||
|
65 | RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ | |||
|
66 | RAWLOG(3, ": " __VA_ARGS__); \ | |||
|
67 | RAWLOG(3, "\n"); \ | |||
|
68 | return ERROR(err); \ | |||
|
69 | } | |||
|
70 | ||||
|
71 | /** | |||
|
72 | * Unconditionally return the specified error. | |||
|
73 | * | |||
|
74 | * In debug modes, prints additional information. | |||
|
75 | */ | |||
|
76 | #define RETURN_ERROR(err, ...) \ | |||
|
77 | do { \ | |||
|
78 | RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ | |||
|
79 | RAWLOG(3, ": " __VA_ARGS__); \ | |||
|
80 | RAWLOG(3, "\n"); \ | |||
|
81 | return ERROR(err); \ | |||
|
82 | } while(0); | |||
|
83 | ||||
|
84 | /** | |||
|
85 | * If the provided expression evaluates to an error code, returns that error code. | |||
|
86 | * | |||
|
87 | * In debug modes, prints additional information. | |||
|
88 | */ | |||
|
89 | #define FORWARD_IF_ERROR(err, ...) \ | |||
|
90 | do { \ | |||
|
91 | size_t const err_code = (err); \ | |||
|
92 | if (ERR_isError(err_code)) { \ | |||
|
93 | RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ | |||
|
94 | RAWLOG(3, ": " __VA_ARGS__); \ | |||
|
95 | RAWLOG(3, "\n"); \ | |||
|
96 | return err_code; \ | |||
|
97 | } \ | |||
|
98 | } while(0); | |||
58 |
|
99 | |||
59 |
|
100 | |||
60 | /*-************************************* |
|
101 | /*-************************************* | |
@@ -151,19 +192,72 b' static const U32 OF_defaultNormLog = OF_' | |||||
151 | * Shared functions to include for inlining |
|
192 | * Shared functions to include for inlining | |
152 | *********************************************/ |
|
193 | *********************************************/ | |
153 | static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } |
|
194 | static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } | |
|
195 | ||||
154 | #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } |
|
196 | #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } | |
|
197 | static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } | |||
|
198 | #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } | |||
|
199 | ||||
|
200 | #define WILDCOPY_OVERLENGTH 8 | |||
|
201 | #define VECLEN 16 | |||
|
202 | ||||
|
203 | typedef enum { | |||
|
204 | ZSTD_no_overlap, | |||
|
205 | ZSTD_overlap_src_before_dst, | |||
|
206 | /* ZSTD_overlap_dst_before_src, */ | |||
|
207 | } ZSTD_overlap_e; | |||
155 |
|
208 | |||
156 | /*! ZSTD_wildcopy() : |
|
209 | /*! ZSTD_wildcopy() : | |
157 | * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ |
|
210 | * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ | |
158 | #define WILDCOPY_OVERLENGTH 8 |
|
211 | MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE | |
159 |
|
|
212 | void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) | |
160 | { |
|
213 | { | |
|
214 | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | |||
161 | const BYTE* ip = (const BYTE*)src; |
|
215 | const BYTE* ip = (const BYTE*)src; | |
162 | BYTE* op = (BYTE*)dst; |
|
216 | BYTE* op = (BYTE*)dst; | |
163 | BYTE* const oend = op + length; |
|
217 | BYTE* const oend = op + length; | |
164 | do |
|
218 | ||
165 | COPY8(op, ip) |
|
219 | assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); | |
166 | while (op < oend); |
|
220 | if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { | |
|
221 | do | |||
|
222 | COPY8(op, ip) | |||
|
223 | while (op < oend); | |||
|
224 | } | |||
|
225 | else { | |||
|
226 | if ((length & 8) == 0) | |||
|
227 | COPY8(op, ip); | |||
|
228 | do { | |||
|
229 | COPY16(op, ip); | |||
|
230 | } | |||
|
231 | while (op < oend); | |||
|
232 | } | |||
|
233 | } | |||
|
234 | ||||
|
235 | /*! ZSTD_wildcopy_16min() : | |||
|
236 | * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ | |||
|
237 | MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE | |||
|
238 | void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) | |||
|
239 | { | |||
|
240 | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | |||
|
241 | const BYTE* ip = (const BYTE*)src; | |||
|
242 | BYTE* op = (BYTE*)dst; | |||
|
243 | BYTE* const oend = op + length; | |||
|
244 | ||||
|
245 | assert(length >= 8); | |||
|
246 | assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); | |||
|
247 | ||||
|
248 | if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { | |||
|
249 | do | |||
|
250 | COPY8(op, ip) | |||
|
251 | while (op < oend); | |||
|
252 | } | |||
|
253 | else { | |||
|
254 | if ((length & 8) == 0) | |||
|
255 | COPY8(op, ip); | |||
|
256 | do { | |||
|
257 | COPY16(op, ip); | |||
|
258 | } | |||
|
259 | while (op < oend); | |||
|
260 | } | |||
167 | } |
|
261 | } | |
168 |
|
262 | |||
169 | MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ |
|
263 | MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ | |
@@ -200,6 +294,17 b' typedef struct {' | |||||
200 | U32 longLengthPos; |
|
294 | U32 longLengthPos; | |
201 | } seqStore_t; |
|
295 | } seqStore_t; | |
202 |
|
296 | |||
|
297 | /** | |||
|
298 | * Contains the compressed frame size and an upper-bound for the decompressed frame size. | |||
|
299 | * Note: before using `compressedSize`, check for errors using ZSTD_isError(). | |||
|
300 | * similarly, before using `decompressedBound`, check for errors using: | |||
|
301 | * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` | |||
|
302 | */ | |||
|
303 | typedef struct { | |||
|
304 | size_t compressedSize; | |||
|
305 | unsigned long long decompressedBound; | |||
|
306 | } ZSTD_frameSizeInfo; /* decompress & legacy */ | |||
|
307 | ||||
203 | const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ |
|
308 | const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ | |
204 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ |
|
309 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ | |
205 |
|
310 | |||
@@ -219,6 +324,8 b' MEM_STATIC U32 ZSTD_highbit32(U32 val) ' | |||||
219 | return (unsigned)r; |
|
324 | return (unsigned)r; | |
220 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ |
|
325 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ | |
221 | return 31 - __builtin_clz(val); |
|
326 | return 31 - __builtin_clz(val); | |
|
327 | # elif defined(__ICCARM__) /* IAR Intrinsic */ | |||
|
328 | return 31 - __CLZ(val); | |||
222 | # else /* Software version */ |
|
329 | # else /* Software version */ | |
223 | static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; |
|
330 | static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; | |
224 | U32 v = val; |
|
331 | U32 v = val; |
@@ -129,9 +129,9 b' size_t FSE_buildCTable_wksp(FSE_CTable* ' | |||||
129 | { U32 position = 0; |
|
129 | { U32 position = 0; | |
130 | U32 symbol; |
|
130 | U32 symbol; | |
131 | for (symbol=0; symbol<=maxSymbolValue; symbol++) { |
|
131 | for (symbol=0; symbol<=maxSymbolValue; symbol++) { | |
132 | int nbOccurences; |
|
132 | int nbOccurrences; | |
133 | int const freq = normalizedCounter[symbol]; |
|
133 | int const freq = normalizedCounter[symbol]; | |
134 | for (nbOccurences=0; nbOccurences<freq; nbOccurences++) { |
|
134 | for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { | |
135 | tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; |
|
135 | tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; | |
136 | position = (position + step) & tableMask; |
|
136 | position = (position + step) & tableMask; | |
137 | while (position > highThreshold) |
|
137 | while (position > highThreshold) |
This diff has been collapsed as it changes many lines, (1558 lines changed) Show them Hide them | |||||
@@ -21,6 +21,8 b'' | |||||
21 | #define HUF_STATIC_LINKING_ONLY |
|
21 | #define HUF_STATIC_LINKING_ONLY | |
22 | #include "huf.h" |
|
22 | #include "huf.h" | |
23 | #include "zstd_compress_internal.h" |
|
23 | #include "zstd_compress_internal.h" | |
|
24 | #include "zstd_compress_sequences.h" | |||
|
25 | #include "zstd_compress_literals.h" | |||
24 | #include "zstd_fast.h" |
|
26 | #include "zstd_fast.h" | |
25 | #include "zstd_double_fast.h" |
|
27 | #include "zstd_double_fast.h" | |
26 | #include "zstd_lazy.h" |
|
28 | #include "zstd_lazy.h" | |
@@ -103,12 +105,31 b' ZSTD_CCtx* ZSTD_initStaticCCtx(void *wor' | |||||
103 | return cctx; |
|
105 | return cctx; | |
104 | } |
|
106 | } | |
105 |
|
107 | |||
|
108 | /** | |||
|
109 | * Clears and frees all of the dictionaries in the CCtx. | |||
|
110 | */ | |||
|
111 | static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) | |||
|
112 | { | |||
|
113 | ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); | |||
|
114 | ZSTD_freeCDict(cctx->localDict.cdict); | |||
|
115 | memset(&cctx->localDict, 0, sizeof(cctx->localDict)); | |||
|
116 | memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); | |||
|
117 | cctx->cdict = NULL; | |||
|
118 | } | |||
|
119 | ||||
|
120 | static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) | |||
|
121 | { | |||
|
122 | size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; | |||
|
123 | size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); | |||
|
124 | return bufferSize + cdictSize; | |||
|
125 | } | |||
|
126 | ||||
106 | static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) |
|
127 | static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) | |
107 | { |
|
128 | { | |
108 | assert(cctx != NULL); |
|
129 | assert(cctx != NULL); | |
109 | assert(cctx->staticSize == 0); |
|
130 | assert(cctx->staticSize == 0); | |
110 | ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; |
|
131 | ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; | |
111 | ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL; |
|
132 | ZSTD_clearAllDicts(cctx); | |
112 | #ifdef ZSTD_MULTITHREAD |
|
133 | #ifdef ZSTD_MULTITHREAD | |
113 | ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; |
|
134 | ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; | |
114 | #endif |
|
135 | #endif | |
@@ -117,7 +138,8 b' static void ZSTD_freeCCtxContent(ZSTD_CC' | |||||
117 | size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) |
|
138 | size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) | |
118 | { |
|
139 | { | |
119 | if (cctx==NULL) return 0; /* support free on NULL */ |
|
140 | if (cctx==NULL) return 0; /* support free on NULL */ | |
120 | if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */ |
|
141 | RETURN_ERROR_IF(cctx->staticSize, memory_allocation, | |
|
142 | "not compatible with static CCtx"); | |||
121 | ZSTD_freeCCtxContent(cctx); |
|
143 | ZSTD_freeCCtxContent(cctx); | |
122 | ZSTD_free(cctx, cctx->customMem); |
|
144 | ZSTD_free(cctx, cctx->customMem); | |
123 | return 0; |
|
145 | return 0; | |
@@ -139,7 +161,7 b' size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx*' | |||||
139 | { |
|
161 | { | |
140 | if (cctx==NULL) return 0; /* support sizeof on NULL */ |
|
162 | if (cctx==NULL) return 0; /* support sizeof on NULL */ | |
141 | return sizeof(*cctx) + cctx->workSpaceSize |
|
163 | return sizeof(*cctx) + cctx->workSpaceSize | |
142 |
+ ZSTD_sizeof_ |
|
164 | + ZSTD_sizeof_localDict(cctx->localDict) | |
143 | + ZSTD_sizeof_mtctx(cctx); |
|
165 | + ZSTD_sizeof_mtctx(cctx); | |
144 | } |
|
166 | } | |
145 |
|
167 | |||
@@ -195,7 +217,7 b' size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_p' | |||||
195 | } |
|
217 | } | |
196 |
|
218 | |||
197 | size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { |
|
219 | size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { | |
198 | if (!cctxParams) { return ERROR(GENERIC); } |
|
220 | RETURN_ERROR_IF(!cctxParams, GENERIC); | |
199 | memset(cctxParams, 0, sizeof(*cctxParams)); |
|
221 | memset(cctxParams, 0, sizeof(*cctxParams)); | |
200 | cctxParams->compressionLevel = compressionLevel; |
|
222 | cctxParams->compressionLevel = compressionLevel; | |
201 | cctxParams->fParams.contentSizeFlag = 1; |
|
223 | cctxParams->fParams.contentSizeFlag = 1; | |
@@ -204,8 +226,8 b' size_t ZSTD_CCtxParams_init(ZSTD_CCtx_pa' | |||||
204 |
|
226 | |||
205 | size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) |
|
227 | size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) | |
206 | { |
|
228 | { | |
207 | if (!cctxParams) { return ERROR(GENERIC); } |
|
229 | RETURN_ERROR_IF(!cctxParams, GENERIC); | |
208 |
|
|
230 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); | |
209 | memset(cctxParams, 0, sizeof(*cctxParams)); |
|
231 | memset(cctxParams, 0, sizeof(*cctxParams)); | |
210 | cctxParams->cParams = params.cParams; |
|
232 | cctxParams->cParams = params.cParams; | |
211 | cctxParams->fParams = params.fParams; |
|
233 | cctxParams->fParams = params.fParams; | |
@@ -359,6 +381,17 b' ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c' | |||||
359 | bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ |
|
381 | bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ | |
360 | return bounds; |
|
382 | return bounds; | |
361 |
|
383 | |||
|
384 | case ZSTD_c_literalCompressionMode: | |||
|
385 | ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); | |||
|
386 | bounds.lowerBound = ZSTD_lcm_auto; | |||
|
387 | bounds.upperBound = ZSTD_lcm_uncompressed; | |||
|
388 | return bounds; | |||
|
389 | ||||
|
390 | case ZSTD_c_targetCBlockSize: | |||
|
391 | bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; | |||
|
392 | bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; | |||
|
393 | return bounds; | |||
|
394 | ||||
362 | default: |
|
395 | default: | |
363 | { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; |
|
396 | { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; | |
364 | return boundError; |
|
397 | return boundError; | |
@@ -366,22 +399,22 b' ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c' | |||||
366 | } |
|
399 | } | |
367 | } |
|
400 | } | |
368 |
|
401 | |||
369 |
/* ZSTD_cParam_ |
|
402 | /* ZSTD_cParam_clampBounds: | |
370 | * @return 1 if value is within cParam bounds, |
|
403 | * Clamps the value into the bounded range. | |
371 | * 0 otherwise */ |
|
404 | */ | |
372 |
static |
|
405 | static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) | |
373 | { |
|
406 | { | |
374 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); |
|
407 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); | |
375 |
if (ZSTD_isError(bounds.error)) return |
|
408 | if (ZSTD_isError(bounds.error)) return bounds.error; | |
376 |
if (value < bounds.lowerBound) |
|
409 | if (*value < bounds.lowerBound) *value = bounds.lowerBound; | |
377 |
if (value > bounds.upperBound) |
|
410 | if (*value > bounds.upperBound) *value = bounds.upperBound; | |
378 |
return |
|
411 | return 0; | |
379 | } |
|
412 | } | |
380 |
|
413 | |||
381 |
#define BOUNDCHECK(cParam, val) { |
|
414 | #define BOUNDCHECK(cParam, val) { \ | |
382 |
|
|
415 | RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ | |
383 |
|
|
416 | parameter_outOfBound); \ | |
384 | } } |
|
417 | } | |
385 |
|
418 | |||
386 |
|
419 | |||
387 | static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) |
|
420 | static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) | |
@@ -413,6 +446,8 b' static int ZSTD_isUpdateAuthorized(ZSTD_' | |||||
413 | case ZSTD_c_ldmBucketSizeLog: |
|
446 | case ZSTD_c_ldmBucketSizeLog: | |
414 | case ZSTD_c_ldmHashRateLog: |
|
447 | case ZSTD_c_ldmHashRateLog: | |
415 | case ZSTD_c_forceAttachDict: |
|
448 | case ZSTD_c_forceAttachDict: | |
|
449 | case ZSTD_c_literalCompressionMode: | |||
|
450 | case ZSTD_c_targetCBlockSize: | |||
416 | default: |
|
451 | default: | |
417 | return 0; |
|
452 | return 0; | |
418 | } |
|
453 | } | |
@@ -425,18 +460,17 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*' | |||||
425 | if (ZSTD_isUpdateAuthorized(param)) { |
|
460 | if (ZSTD_isUpdateAuthorized(param)) { | |
426 | cctx->cParamsChanged = 1; |
|
461 | cctx->cParamsChanged = 1; | |
427 | } else { |
|
462 | } else { | |
428 |
|
|
463 | RETURN_ERROR(stage_wrong); | |
429 | } } |
|
464 | } } | |
430 |
|
465 | |||
431 | switch(param) |
|
466 | switch(param) | |
432 | { |
|
467 | { | |
433 |
case ZSTD_c_ |
|
468 | case ZSTD_c_nbWorkers: | |
434 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
469 | RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, | |
|
470 | "MT not compatible with static alloc"); | |||
|
471 | break; | |||
435 |
|
472 | |||
436 | case ZSTD_c_compressionLevel: |
|
473 | case ZSTD_c_compressionLevel: | |
437 | if (cctx->cdict) return ERROR(stage_wrong); |
|
|||
438 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
|||
439 |
|
||||
440 | case ZSTD_c_windowLog: |
|
474 | case ZSTD_c_windowLog: | |
441 | case ZSTD_c_hashLog: |
|
475 | case ZSTD_c_hashLog: | |
442 | case ZSTD_c_chainLog: |
|
476 | case ZSTD_c_chainLog: | |
@@ -444,49 +478,33 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*' | |||||
444 | case ZSTD_c_minMatch: |
|
478 | case ZSTD_c_minMatch: | |
445 | case ZSTD_c_targetLength: |
|
479 | case ZSTD_c_targetLength: | |
446 | case ZSTD_c_strategy: |
|
480 | case ZSTD_c_strategy: | |
447 | if (cctx->cdict) return ERROR(stage_wrong); |
|
481 | case ZSTD_c_ldmHashRateLog: | |
448 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
482 | case ZSTD_c_format: | |
449 |
|
||||
450 | case ZSTD_c_contentSizeFlag: |
|
483 | case ZSTD_c_contentSizeFlag: | |
451 | case ZSTD_c_checksumFlag: |
|
484 | case ZSTD_c_checksumFlag: | |
452 | case ZSTD_c_dictIDFlag: |
|
485 | case ZSTD_c_dictIDFlag: | |
453 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
486 | case ZSTD_c_forceMaxWindow: | |
454 |
|
||||
455 | case ZSTD_c_forceMaxWindow : /* Force back-references to remain < windowSize, |
|
|||
456 | * even when referencing into Dictionary content. |
|
|||
457 | * default : 0 when using a CDict, 1 when using a Prefix */ |
|
|||
458 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
|||
459 |
|
||||
460 | case ZSTD_c_forceAttachDict: |
|
487 | case ZSTD_c_forceAttachDict: | |
461 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
488 | case ZSTD_c_literalCompressionMode: | |
462 |
|
||||
463 | case ZSTD_c_nbWorkers: |
|
|||
464 | if ((value!=0) && cctx->staticSize) { |
|
|||
465 | return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ |
|
|||
466 | } |
|
|||
467 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
|||
468 |
|
||||
469 | case ZSTD_c_jobSize: |
|
489 | case ZSTD_c_jobSize: | |
470 | case ZSTD_c_overlapLog: |
|
490 | case ZSTD_c_overlapLog: | |
471 | case ZSTD_c_rsyncable: |
|
491 | case ZSTD_c_rsyncable: | |
472 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
|||
473 |
|
||||
474 | case ZSTD_c_enableLongDistanceMatching: |
|
492 | case ZSTD_c_enableLongDistanceMatching: | |
475 | case ZSTD_c_ldmHashLog: |
|
493 | case ZSTD_c_ldmHashLog: | |
476 | case ZSTD_c_ldmMinMatch: |
|
494 | case ZSTD_c_ldmMinMatch: | |
477 | case ZSTD_c_ldmBucketSizeLog: |
|
495 | case ZSTD_c_ldmBucketSizeLog: | |
478 |
case ZSTD_c_ |
|
496 | case ZSTD_c_targetCBlockSize: | |
479 | if (cctx->cdict) return ERROR(stage_wrong); |
|
497 | break; | |
480 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); |
|
498 | ||
481 |
|
499 | default: RETURN_ERROR(parameter_unsupported); | ||
482 | default: return ERROR(parameter_unsupported); |
|
|||
483 | } |
|
500 | } | |
|
501 | return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); | |||
484 | } |
|
502 | } | |
485 |
|
503 | |||
486 | size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams, |
|
504 | size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, | |
487 | ZSTD_cParameter param, int value) |
|
505 | ZSTD_cParameter param, int value) | |
488 | { |
|
506 | { | |
489 | DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%i, %i)", (int)param, value); |
|
507 | DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); | |
490 | switch(param) |
|
508 | switch(param) | |
491 | { |
|
509 | { | |
492 | case ZSTD_c_format : |
|
510 | case ZSTD_c_format : | |
@@ -495,11 +513,9 b' size_t ZSTD_CCtxParam_setParameter(ZSTD_' | |||||
495 | return (size_t)CCtxParams->format; |
|
513 | return (size_t)CCtxParams->format; | |
496 |
|
514 | |||
497 | case ZSTD_c_compressionLevel : { |
|
515 | case ZSTD_c_compressionLevel : { | |
498 | int cLevel = value; |
|
516 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); | |
499 | if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); |
|
517 | if (value) { /* 0 : does not change current level */ | |
500 | if (cLevel < ZSTD_minCLevel()) cLevel = ZSTD_minCLevel(); |
|
518 | CCtxParams->compressionLevel = value; | |
501 | if (cLevel) { /* 0 : does not change current level */ |
|
|||
502 | CCtxParams->compressionLevel = cLevel; |
|
|||
503 | } |
|
519 | } | |
504 | if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; |
|
520 | if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; | |
505 | return 0; /* return type (size_t) cannot represent negative values */ |
|
521 | return 0; /* return type (size_t) cannot represent negative values */ | |
@@ -573,33 +589,55 b' size_t ZSTD_CCtxParam_setParameter(ZSTD_' | |||||
573 | return CCtxParams->attachDictPref; |
|
589 | return CCtxParams->attachDictPref; | |
574 | } |
|
590 | } | |
575 |
|
591 | |||
|
592 | case ZSTD_c_literalCompressionMode : { | |||
|
593 | const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; | |||
|
594 | BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); | |||
|
595 | CCtxParams->literalCompressionMode = lcm; | |||
|
596 | return CCtxParams->literalCompressionMode; | |||
|
597 | } | |||
|
598 | ||||
576 | case ZSTD_c_nbWorkers : |
|
599 | case ZSTD_c_nbWorkers : | |
577 | #ifndef ZSTD_MULTITHREAD |
|
600 | #ifndef ZSTD_MULTITHREAD | |
578 | if (value!=0) return ERROR(parameter_unsupported); |
|
601 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
579 | return 0; |
|
602 | return 0; | |
580 | #else |
|
603 | #else | |
581 | return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value); |
|
604 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); | |
|
605 | CCtxParams->nbWorkers = value; | |||
|
606 | return CCtxParams->nbWorkers; | |||
582 | #endif |
|
607 | #endif | |
583 |
|
608 | |||
584 | case ZSTD_c_jobSize : |
|
609 | case ZSTD_c_jobSize : | |
585 | #ifndef ZSTD_MULTITHREAD |
|
610 | #ifndef ZSTD_MULTITHREAD | |
586 | return ERROR(parameter_unsupported); |
|
611 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
|
612 | return 0; | |||
587 | #else |
|
613 | #else | |
588 | return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value); |
|
614 | /* Adjust to the minimum non-default value. */ | |
|
615 | if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) | |||
|
616 | value = ZSTDMT_JOBSIZE_MIN; | |||
|
617 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); | |||
|
618 | assert(value >= 0); | |||
|
619 | CCtxParams->jobSize = value; | |||
|
620 | return CCtxParams->jobSize; | |||
589 | #endif |
|
621 | #endif | |
590 |
|
622 | |||
591 | case ZSTD_c_overlapLog : |
|
623 | case ZSTD_c_overlapLog : | |
592 | #ifndef ZSTD_MULTITHREAD |
|
624 | #ifndef ZSTD_MULTITHREAD | |
593 | return ERROR(parameter_unsupported); |
|
625 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
|
626 | return 0; | |||
594 | #else |
|
627 | #else | |
595 | return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapLog, value); |
|
628 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); | |
|
629 | CCtxParams->overlapLog = value; | |||
|
630 | return CCtxParams->overlapLog; | |||
596 | #endif |
|
631 | #endif | |
597 |
|
632 | |||
598 | case ZSTD_c_rsyncable : |
|
633 | case ZSTD_c_rsyncable : | |
599 | #ifndef ZSTD_MULTITHREAD |
|
634 | #ifndef ZSTD_MULTITHREAD | |
600 | return ERROR(parameter_unsupported); |
|
635 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
|
636 | return 0; | |||
601 | #else |
|
637 | #else | |
602 | return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_rsyncable, value); |
|
638 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); | |
|
639 | CCtxParams->rsyncable = value; | |||
|
640 | return CCtxParams->rsyncable; | |||
603 | #endif |
|
641 | #endif | |
604 |
|
642 | |||
605 | case ZSTD_c_enableLongDistanceMatching : |
|
643 | case ZSTD_c_enableLongDistanceMatching : | |
@@ -625,21 +663,27 b' size_t ZSTD_CCtxParam_setParameter(ZSTD_' | |||||
625 | return CCtxParams->ldmParams.bucketSizeLog; |
|
663 | return CCtxParams->ldmParams.bucketSizeLog; | |
626 |
|
664 | |||
627 | case ZSTD_c_ldmHashRateLog : |
|
665 | case ZSTD_c_ldmHashRateLog : | |
628 |
|
|
666 | RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, | |
629 |
|
|
667 | parameter_outOfBound); | |
630 | CCtxParams->ldmParams.hashRateLog = value; |
|
668 | CCtxParams->ldmParams.hashRateLog = value; | |
631 | return CCtxParams->ldmParams.hashRateLog; |
|
669 | return CCtxParams->ldmParams.hashRateLog; | |
632 |
|
670 | |||
633 | default: return ERROR(parameter_unsupported); |
|
671 | case ZSTD_c_targetCBlockSize : | |
|
672 | if (value!=0) /* 0 ==> default */ | |||
|
673 | BOUNDCHECK(ZSTD_c_targetCBlockSize, value); | |||
|
674 | CCtxParams->targetCBlockSize = value; | |||
|
675 | return CCtxParams->targetCBlockSize; | |||
|
676 | ||||
|
677 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); | |||
634 | } |
|
678 | } | |
635 | } |
|
679 | } | |
636 |
|
680 | |||
637 | size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) |
|
681 | size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) | |
638 | { |
|
682 | { | |
639 | return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value); |
|
683 | return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); | |
640 | } |
|
684 | } | |
641 |
|
685 | |||
642 | size_t ZSTD_CCtxParam_getParameter( |
|
686 | size_t ZSTD_CCtxParams_getParameter( | |
643 | ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) |
|
687 | ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) | |
644 | { |
|
688 | { | |
645 | switch(param) |
|
689 | switch(param) | |
@@ -651,13 +695,13 b' size_t ZSTD_CCtxParam_getParameter(' | |||||
651 | *value = CCtxParams->compressionLevel; |
|
695 | *value = CCtxParams->compressionLevel; | |
652 | break; |
|
696 | break; | |
653 | case ZSTD_c_windowLog : |
|
697 | case ZSTD_c_windowLog : | |
654 | *value = CCtxParams->cParams.windowLog; |
|
698 | *value = (int)CCtxParams->cParams.windowLog; | |
655 | break; |
|
699 | break; | |
656 | case ZSTD_c_hashLog : |
|
700 | case ZSTD_c_hashLog : | |
657 | *value = CCtxParams->cParams.hashLog; |
|
701 | *value = (int)CCtxParams->cParams.hashLog; | |
658 | break; |
|
702 | break; | |
659 | case ZSTD_c_chainLog : |
|
703 | case ZSTD_c_chainLog : | |
660 | *value = CCtxParams->cParams.chainLog; |
|
704 | *value = (int)CCtxParams->cParams.chainLog; | |
661 | break; |
|
705 | break; | |
662 | case ZSTD_c_searchLog : |
|
706 | case ZSTD_c_searchLog : | |
663 | *value = CCtxParams->cParams.searchLog; |
|
707 | *value = CCtxParams->cParams.searchLog; | |
@@ -686,6 +730,9 b' size_t ZSTD_CCtxParam_getParameter(' | |||||
686 | case ZSTD_c_forceAttachDict : |
|
730 | case ZSTD_c_forceAttachDict : | |
687 | *value = CCtxParams->attachDictPref; |
|
731 | *value = CCtxParams->attachDictPref; | |
688 | break; |
|
732 | break; | |
|
733 | case ZSTD_c_literalCompressionMode : | |||
|
734 | *value = CCtxParams->literalCompressionMode; | |||
|
735 | break; | |||
689 | case ZSTD_c_nbWorkers : |
|
736 | case ZSTD_c_nbWorkers : | |
690 | #ifndef ZSTD_MULTITHREAD |
|
737 | #ifndef ZSTD_MULTITHREAD | |
691 | assert(CCtxParams->nbWorkers == 0); |
|
738 | assert(CCtxParams->nbWorkers == 0); | |
@@ -694,7 +741,7 b' size_t ZSTD_CCtxParam_getParameter(' | |||||
694 | break; |
|
741 | break; | |
695 | case ZSTD_c_jobSize : |
|
742 | case ZSTD_c_jobSize : | |
696 | #ifndef ZSTD_MULTITHREAD |
|
743 | #ifndef ZSTD_MULTITHREAD | |
697 |
|
|
744 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); | |
698 | #else |
|
745 | #else | |
699 | assert(CCtxParams->jobSize <= INT_MAX); |
|
746 | assert(CCtxParams->jobSize <= INT_MAX); | |
700 | *value = (int)CCtxParams->jobSize; |
|
747 | *value = (int)CCtxParams->jobSize; | |
@@ -702,14 +749,14 b' size_t ZSTD_CCtxParam_getParameter(' | |||||
702 | #endif |
|
749 | #endif | |
703 | case ZSTD_c_overlapLog : |
|
750 | case ZSTD_c_overlapLog : | |
704 | #ifndef ZSTD_MULTITHREAD |
|
751 | #ifndef ZSTD_MULTITHREAD | |
705 |
|
|
752 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); | |
706 | #else |
|
753 | #else | |
707 | *value = CCtxParams->overlapLog; |
|
754 | *value = CCtxParams->overlapLog; | |
708 | break; |
|
755 | break; | |
709 | #endif |
|
756 | #endif | |
710 | case ZSTD_c_rsyncable : |
|
757 | case ZSTD_c_rsyncable : | |
711 | #ifndef ZSTD_MULTITHREAD |
|
758 | #ifndef ZSTD_MULTITHREAD | |
712 |
|
|
759 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); | |
713 | #else |
|
760 | #else | |
714 | *value = CCtxParams->rsyncable; |
|
761 | *value = CCtxParams->rsyncable; | |
715 | break; |
|
762 | break; | |
@@ -729,7 +776,10 b' size_t ZSTD_CCtxParam_getParameter(' | |||||
729 | case ZSTD_c_ldmHashRateLog : |
|
776 | case ZSTD_c_ldmHashRateLog : | |
730 | *value = CCtxParams->ldmParams.hashRateLog; |
|
777 | *value = CCtxParams->ldmParams.hashRateLog; | |
731 | break; |
|
778 | break; | |
732 | default: return ERROR(parameter_unsupported); |
|
779 | case ZSTD_c_targetCBlockSize : | |
|
780 | *value = (int)CCtxParams->targetCBlockSize; | |||
|
781 | break; | |||
|
782 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); | |||
733 | } |
|
783 | } | |
734 | return 0; |
|
784 | return 0; | |
735 | } |
|
785 | } | |
@@ -745,8 +795,8 b' size_t ZSTD_CCtx_setParametersUsingCCtxP' | |||||
745 | ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) |
|
795 | ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) | |
746 | { |
|
796 | { | |
747 | DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); |
|
797 | DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); | |
748 |
|
|
798 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
749 |
|
|
799 | RETURN_ERROR_IF(cctx->cdict, stage_wrong); | |
750 |
|
800 | |||
751 | cctx->requestedParams = *params; |
|
801 | cctx->requestedParams = *params; | |
752 | return 0; |
|
802 | return 0; | |
@@ -755,33 +805,71 b' size_t ZSTD_CCtx_setParametersUsingCCtxP' | |||||
755 | ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) |
|
805 | ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) | |
756 | { |
|
806 | { | |
757 | DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); |
|
807 | DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); | |
758 |
|
|
808 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
759 | cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; |
|
809 | cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; | |
760 | return 0; |
|
810 | return 0; | |
761 | } |
|
811 | } | |
762 |
|
812 | |||
|
813 | /** | |||
|
814 | * Initializes the local dict using the requested parameters. | |||
|
815 | * NOTE: This does not use the pledged src size, because it may be used for more | |||
|
816 | * than one compression. | |||
|
817 | */ | |||
|
818 | static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) | |||
|
819 | { | |||
|
820 | ZSTD_localDict* const dl = &cctx->localDict; | |||
|
821 | ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( | |||
|
822 | &cctx->requestedParams, 0, dl->dictSize); | |||
|
823 | if (dl->dict == NULL) { | |||
|
824 | /* No local dictionary. */ | |||
|
825 | assert(dl->dictBuffer == NULL); | |||
|
826 | assert(dl->cdict == NULL); | |||
|
827 | assert(dl->dictSize == 0); | |||
|
828 | return 0; | |||
|
829 | } | |||
|
830 | if (dl->cdict != NULL) { | |||
|
831 | assert(cctx->cdict == dl->cdict); | |||
|
832 | /* Local dictionary already initialized. */ | |||
|
833 | return 0; | |||
|
834 | } | |||
|
835 | assert(dl->dictSize > 0); | |||
|
836 | assert(cctx->cdict == NULL); | |||
|
837 | assert(cctx->prefixDict.dict == NULL); | |||
|
838 | ||||
|
839 | dl->cdict = ZSTD_createCDict_advanced( | |||
|
840 | dl->dict, | |||
|
841 | dl->dictSize, | |||
|
842 | ZSTD_dlm_byRef, | |||
|
843 | dl->dictContentType, | |||
|
844 | cParams, | |||
|
845 | cctx->customMem); | |||
|
846 | RETURN_ERROR_IF(!dl->cdict, memory_allocation); | |||
|
847 | cctx->cdict = dl->cdict; | |||
|
848 | return 0; | |||
|
849 | } | |||
|
850 | ||||
763 | size_t ZSTD_CCtx_loadDictionary_advanced( |
|
851 | size_t ZSTD_CCtx_loadDictionary_advanced( | |
764 | ZSTD_CCtx* cctx, const void* dict, size_t dictSize, |
|
852 | ZSTD_CCtx* cctx, const void* dict, size_t dictSize, | |
765 | ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) |
|
853 | ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) | |
766 | { |
|
854 | { | |
767 |
|
|
855 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
768 | if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */ |
|
856 | RETURN_ERROR_IF(cctx->staticSize, memory_allocation, | |
|
857 | "no malloc for static CCtx"); | |||
769 | DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); |
|
858 | DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); | |
770 |
ZSTD_ |
|
859 | ZSTD_clearAllDicts(cctx); /* in case one already exists */ | |
771 |
if (dict==NULL || dictSize==0) |
|
860 | if (dict == NULL || dictSize == 0) /* no dictionary mode */ | |
772 | cctx->cdictLocal = NULL; |
|
861 | return 0; | |
773 | cctx->cdict = NULL; |
|
862 | if (dictLoadMethod == ZSTD_dlm_byRef) { | |
|
863 | cctx->localDict.dict = dict; | |||
774 | } else { |
|
864 | } else { | |
775 | ZSTD_compressionParameters const cParams = |
|
865 | void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); | |
776 | ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); |
|
866 | RETURN_ERROR_IF(!dictBuffer, memory_allocation); | |
777 | cctx->cdictLocal = ZSTD_createCDict_advanced( |
|
867 | memcpy(dictBuffer, dict, dictSize); | |
778 | dict, dictSize, |
|
868 | cctx->localDict.dictBuffer = dictBuffer; | |
779 | dictLoadMethod, dictContentType, |
|
869 | cctx->localDict.dict = dictBuffer; | |
780 | cParams, cctx->customMem); |
|
|||
781 | cctx->cdict = cctx->cdictLocal; |
|
|||
782 | if (cctx->cdictLocal == NULL) |
|
|||
783 | return ERROR(memory_allocation); |
|
|||
784 | } |
|
870 | } | |
|
871 | cctx->localDict.dictSize = dictSize; | |||
|
872 | cctx->localDict.dictContentType = dictContentType; | |||
785 | return 0; |
|
873 | return 0; | |
786 | } |
|
874 | } | |
787 |
|
875 | |||
@@ -801,9 +889,10 b' ZSTDLIB_API size_t ZSTD_CCtx_loadDiction' | |||||
801 |
|
889 | |||
802 | size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) |
|
890 | size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) | |
803 | { |
|
891 | { | |
804 |
|
|
892 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
|
893 | /* Free the existing local cdict (if any) to save memory. */ | |||
|
894 | ZSTD_clearAllDicts(cctx); | |||
805 | cctx->cdict = cdict; |
|
895 | cctx->cdict = cdict; | |
806 | memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* exclusive */ |
|
|||
807 | return 0; |
|
896 | return 0; | |
808 | } |
|
897 | } | |
809 |
|
898 | |||
@@ -815,8 +904,8 b' size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cc' | |||||
815 | size_t ZSTD_CCtx_refPrefix_advanced( |
|
904 | size_t ZSTD_CCtx_refPrefix_advanced( | |
816 | ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) |
|
905 | ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) | |
817 | { |
|
906 | { | |
818 |
|
|
907 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
819 | cctx->cdict = NULL; /* prefix discards any prior cdict */ |
|
908 | ZSTD_clearAllDicts(cctx); | |
820 | cctx->prefixDict.dict = prefix; |
|
909 | cctx->prefixDict.dict = prefix; | |
821 | cctx->prefixDict.dictSize = prefixSize; |
|
910 | cctx->prefixDict.dictSize = prefixSize; | |
822 | cctx->prefixDict.dictContentType = dictContentType; |
|
911 | cctx->prefixDict.dictContentType = dictContentType; | |
@@ -834,8 +923,8 b' size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ' | |||||
834 | } |
|
923 | } | |
835 | if ( (reset == ZSTD_reset_parameters) |
|
924 | if ( (reset == ZSTD_reset_parameters) | |
836 | || (reset == ZSTD_reset_session_and_parameters) ) { |
|
925 | || (reset == ZSTD_reset_session_and_parameters) ) { | |
837 |
|
|
926 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
838 | cctx->cdict = NULL; |
|
927 | ZSTD_clearAllDicts(cctx); | |
839 | return ZSTD_CCtxParams_reset(&cctx->requestedParams); |
|
928 | return ZSTD_CCtxParams_reset(&cctx->requestedParams); | |
840 | } |
|
929 | } | |
841 | return 0; |
|
930 | return 0; | |
@@ -847,12 +936,12 b' size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ' | |||||
847 | @return : 0, or an error code if one value is beyond authorized range */ |
|
936 | @return : 0, or an error code if one value is beyond authorized range */ | |
848 | size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) |
|
937 | size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) | |
849 | { |
|
938 | { | |
850 | BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog); |
|
939 | BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); | |
851 | BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog); |
|
940 | BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); | |
852 | BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog); |
|
941 | BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); | |
853 | BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog); |
|
942 | BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); | |
854 | BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch); |
|
943 | BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); | |
855 | BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength); |
|
944 | BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); | |
856 | BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); |
|
945 | BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); | |
857 | return 0; |
|
946 | return 0; | |
858 | } |
|
947 | } | |
@@ -868,7 +957,7 b' ZSTD_clampCParams(ZSTD_compressionParame' | |||||
868 | if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ |
|
957 | if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ | |
869 | else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ |
|
958 | else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ | |
870 | } |
|
959 | } | |
871 |
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, |
|
960 | # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) | |
872 | CLAMP(ZSTD_c_windowLog, cParams.windowLog); |
|
961 | CLAMP(ZSTD_c_windowLog, cParams.windowLog); | |
873 | CLAMP(ZSTD_c_chainLog, cParams.chainLog); |
|
962 | CLAMP(ZSTD_c_chainLog, cParams.chainLog); | |
874 | CLAMP(ZSTD_c_hashLog, cParams.hashLog); |
|
963 | CLAMP(ZSTD_c_hashLog, cParams.hashLog); | |
@@ -888,10 +977,11 b' static U32 ZSTD_cycleLog(U32 hashLog, ZS' | |||||
888 | } |
|
977 | } | |
889 |
|
978 | |||
890 | /** ZSTD_adjustCParams_internal() : |
|
979 | /** ZSTD_adjustCParams_internal() : | |
891 |
|
|
980 | * optimize `cPar` for a specified input (`srcSize` and `dictSize`). | |
892 |
|
|
981 | * mostly downsize to reduce memory consumption and initialization latency. | |
893 | Both `srcSize` and `dictSize` are optional (use 0 if unknown). |
|
982 | * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. | |
894 | Note : cPar is assumed validated. Use ZSTD_checkCParams() to ensure this condition. */ |
|
983 | * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. | |
|
984 | * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ | |||
895 | static ZSTD_compressionParameters |
|
985 | static ZSTD_compressionParameters | |
896 | ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, |
|
986 | ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, | |
897 | unsigned long long srcSize, |
|
987 | unsigned long long srcSize, | |
@@ -901,7 +991,7 b' ZSTD_adjustCParams_internal(ZSTD_compres' | |||||
901 | static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); |
|
991 | static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); | |
902 | assert(ZSTD_checkCParams(cPar)==0); |
|
992 | assert(ZSTD_checkCParams(cPar)==0); | |
903 |
|
993 | |||
904 |
if (dictSize && (srcSize+1<2) /* |
|
994 | if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) | |
905 | srcSize = minSrcSize; /* presumed small when there is a dictionary */ |
|
995 | srcSize = minSrcSize; /* presumed small when there is a dictionary */ | |
906 | else if (srcSize == 0) |
|
996 | else if (srcSize == 0) | |
907 | srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ |
|
997 | srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ | |
@@ -922,7 +1012,7 b' ZSTD_adjustCParams_internal(ZSTD_compres' | |||||
922 | } |
|
1012 | } | |
923 |
|
1013 | |||
924 | if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) |
|
1014 | if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) | |
925 | cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ |
|
1015 | cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ | |
926 |
|
1016 | |||
927 | return cPar; |
|
1017 | return cPar; | |
928 | } |
|
1018 | } | |
@@ -932,7 +1022,7 b' ZSTD_adjustCParams(ZSTD_compressionParam' | |||||
932 | unsigned long long srcSize, |
|
1022 | unsigned long long srcSize, | |
933 | size_t dictSize) |
|
1023 | size_t dictSize) | |
934 | { |
|
1024 | { | |
935 | cPar = ZSTD_clampCParams(cPar); |
|
1025 | cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ | |
936 | return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); |
|
1026 | return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); | |
937 | } |
|
1027 | } | |
938 |
|
1028 | |||
@@ -973,8 +1063,7 b' ZSTD_sizeof_matchState(const ZSTD_compre' | |||||
973 |
|
1063 | |||
974 | size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) |
|
1064 | size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) | |
975 | { |
|
1065 | { | |
976 |
|
|
1066 | RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); | |
977 | if (params->nbWorkers > 0) { return ERROR(GENERIC); } |
|
|||
978 | { ZSTD_compressionParameters const cParams = |
|
1067 | { ZSTD_compressionParameters const cParams = | |
979 | ZSTD_getCParamsFromCCtxParams(params, 0, 0); |
|
1068 | ZSTD_getCParamsFromCCtxParams(params, 0, 0); | |
980 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); |
|
1069 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); | |
@@ -1022,10 +1111,12 b' size_t ZSTD_estimateCCtxSize(int compres' | |||||
1022 |
|
1111 | |||
1023 | size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) |
|
1112 | size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) | |
1024 | { |
|
1113 | { | |
1025 | if (params->nbWorkers > 0) { return ERROR(GENERIC); } |
|
1114 | RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); | |
1026 | { size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); |
|
1115 | { ZSTD_compressionParameters const cParams = | |
1027 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog); |
|
1116 | ZSTD_getCParamsFromCCtxParams(params, 0, 0); | |
1028 | size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize; |
|
1117 | size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); | |
|
1118 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); | |||
|
1119 | size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; | |||
1029 | size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; |
|
1120 | size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; | |
1030 | size_t const streamingSize = inBuffSize + outBuffSize; |
|
1121 | size_t const streamingSize = inBuffSize + outBuffSize; | |
1031 |
|
1122 | |||
@@ -1197,15 +1288,14 b' static void ZSTD_reset_compressedBlockSt' | |||||
1197 | } |
|
1288 | } | |
1198 |
|
1289 | |||
1199 | /*! ZSTD_invalidateMatchState() |
|
1290 | /*! ZSTD_invalidateMatchState() | |
1200 | * Invalidate all the matches in the match finder tables. |
|
1291 | * Invalidate all the matches in the match finder tables. | |
1201 | * Requires nextSrc and base to be set (can be NULL). |
|
1292 | * Requires nextSrc and base to be set (can be NULL). | |
1202 | */ |
|
1293 | */ | |
1203 | static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) |
|
1294 | static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) | |
1204 | { |
|
1295 | { | |
1205 | ZSTD_window_clear(&ms->window); |
|
1296 | ZSTD_window_clear(&ms->window); | |
1206 |
|
1297 | |||
1207 | ms->nextToUpdate = ms->window.dictLimit; |
|
1298 | ms->nextToUpdate = ms->window.dictLimit; | |
1208 | ms->nextToUpdate3 = ms->window.dictLimit; |
|
|||
1209 | ms->loadedDictEnd = 0; |
|
1299 | ms->loadedDictEnd = 0; | |
1210 | ms->opt.litLengthSum = 0; /* force reset of btopt stats */ |
|
1300 | ms->opt.litLengthSum = 0; /* force reset of btopt stats */ | |
1211 | ms->dictMatchState = NULL; |
|
1301 | ms->dictMatchState = NULL; | |
@@ -1242,15 +1332,17 b' static size_t ZSTD_continueCCtx(ZSTD_CCt' | |||||
1242 |
|
1332 | |||
1243 | typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; |
|
1333 | typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; | |
1244 |
|
1334 | |||
|
1335 | typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; | |||
|
1336 | ||||
1245 | static void* |
|
1337 | static void* | |
1246 | ZSTD_reset_matchState(ZSTD_matchState_t* ms, |
|
1338 | ZSTD_reset_matchState(ZSTD_matchState_t* ms, | |
1247 | void* ptr, |
|
1339 | void* ptr, | |
1248 | const ZSTD_compressionParameters* cParams, |
|
1340 | const ZSTD_compressionParameters* cParams, | |
1249 |
ZSTD_compResetPolicy_e const crp, |
|
1341 | ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho) | |
1250 | { |
|
1342 | { | |
1251 | size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); |
|
1343 | size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); | |
1252 | size_t const hSize = ((size_t)1) << cParams->hashLog; |
|
1344 | size_t const hSize = ((size_t)1) << cParams->hashLog; | |
1253 | U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; |
|
1345 | U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; | |
1254 | size_t const h3Size = ((size_t)1) << hashLog3; |
|
1346 | size_t const h3Size = ((size_t)1) << hashLog3; | |
1255 | size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); |
|
1347 | size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); | |
1256 |
|
1348 | |||
@@ -1264,7 +1356,7 b' ZSTD_reset_matchState(ZSTD_matchState_t*' | |||||
1264 | ZSTD_invalidateMatchState(ms); |
|
1356 | ZSTD_invalidateMatchState(ms); | |
1265 |
|
1357 | |||
1266 | /* opt parser space */ |
|
1358 | /* opt parser space */ | |
1267 | if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { |
|
1359 | if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { | |
1268 | DEBUGLOG(4, "reserving optimal parser space"); |
|
1360 | DEBUGLOG(4, "reserving optimal parser space"); | |
1269 | ms->opt.litFreq = (unsigned*)ptr; |
|
1361 | ms->opt.litFreq = (unsigned*)ptr; | |
1270 | ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); |
|
1362 | ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); | |
@@ -1292,6 +1384,19 b' ZSTD_reset_matchState(ZSTD_matchState_t*' | |||||
1292 | return ptr; |
|
1384 | return ptr; | |
1293 | } |
|
1385 | } | |
1294 |
|
1386 | |||
|
1387 | /* ZSTD_indexTooCloseToMax() : | |||
|
1388 | * minor optimization : prefer memset() rather than reduceIndex() | |||
|
1389 | * which is measurably slow in some circumstances (reported for Visual Studio). | |||
|
1390 | * Works when re-using a context for a lot of smallish inputs : | |||
|
1391 | * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, | |||
|
1392 | * memset() will be triggered before reduceIndex(). | |||
|
1393 | */ | |||
|
1394 | #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) | |||
|
1395 | static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) | |||
|
1396 | { | |||
|
1397 | return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); | |||
|
1398 | } | |||
|
1399 | ||||
1295 | #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ |
|
1400 | #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ | |
1296 | #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large |
|
1401 | #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large | |
1297 | * during at least this number of times, |
|
1402 | * during at least this number of times, | |
@@ -1303,7 +1408,7 b' ZSTD_reset_matchState(ZSTD_matchState_t*' | |||||
1303 | note : `params` are assumed fully validated at this stage */ |
|
1408 | note : `params` are assumed fully validated at this stage */ | |
1304 | static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, |
|
1409 | static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, | |
1305 | ZSTD_CCtx_params params, |
|
1410 | ZSTD_CCtx_params params, | |
1306 | U64 pledgedSrcSize, |
|
1411 | U64 const pledgedSrcSize, | |
1307 | ZSTD_compResetPolicy_e const crp, |
|
1412 | ZSTD_compResetPolicy_e const crp, | |
1308 | ZSTD_buffered_policy_e const zbuff) |
|
1413 | ZSTD_buffered_policy_e const zbuff) | |
1309 | { |
|
1414 | { | |
@@ -1315,13 +1420,21 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||||
1315 | if (ZSTD_equivalentParams(zc->appliedParams, params, |
|
1420 | if (ZSTD_equivalentParams(zc->appliedParams, params, | |
1316 | zc->inBuffSize, |
|
1421 | zc->inBuffSize, | |
1317 | zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, |
|
1422 | zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, | |
1318 | zbuff, pledgedSrcSize)) { |
|
1423 | zbuff, pledgedSrcSize) ) { | |
1319 |
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode |
|
1424 | DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode"); | |
1320 | zc->appliedParams.cParams.windowLog, zc->blockSize); |
|
|||
1321 | zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ |
|
1425 | zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ | |
1322 | if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) |
|
1426 | if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) { | |
|
1427 | DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)", | |||
|
1428 | zc->appliedParams.cParams.windowLog, zc->blockSize); | |||
|
1429 | if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { | |||
|
1430 | /* prefer a reset, faster than a rescale */ | |||
|
1431 | ZSTD_reset_matchState(&zc->blockState.matchState, | |||
|
1432 | zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, | |||
|
1433 | ¶ms.cParams, | |||
|
1434 | crp, ZSTD_resetTarget_CCtx); | |||
|
1435 | } | |||
1323 | return ZSTD_continueCCtx(zc, params, pledgedSrcSize); |
|
1436 | return ZSTD_continueCCtx(zc, params, pledgedSrcSize); | |
1324 | } } |
|
1437 | } } } | |
1325 | DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); |
|
1438 | DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); | |
1326 |
|
1439 | |||
1327 | if (params.ldmParams.enableLdm) { |
|
1440 | if (params.ldmParams.enableLdm) { | |
@@ -1364,16 +1477,16 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||||
1364 | DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); |
|
1477 | DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); | |
1365 |
|
1478 | |||
1366 | if (workSpaceTooSmall || workSpaceWasteful) { |
|
1479 | if (workSpaceTooSmall || workSpaceWasteful) { | |
1367 |
DEBUGLOG(4, " |
|
1480 | DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB", | |
1368 | zc->workSpaceSize >> 10, |
|
1481 | zc->workSpaceSize >> 10, | |
1369 | neededSpace >> 10); |
|
1482 | neededSpace >> 10); | |
1370 | /* static cctx : no resize, error out */ |
|
1483 | ||
1371 |
|
|
1484 | RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); | |
1372 |
|
1485 | |||
1373 | zc->workSpaceSize = 0; |
|
1486 | zc->workSpaceSize = 0; | |
1374 | ZSTD_free(zc->workSpace, zc->customMem); |
|
1487 | ZSTD_free(zc->workSpace, zc->customMem); | |
1375 | zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); |
|
1488 | zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); | |
1376 |
|
|
1489 | RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); | |
1377 | zc->workSpaceSize = neededSpace; |
|
1490 | zc->workSpaceSize = neededSpace; | |
1378 | zc->workSpaceOversizedDuration = 0; |
|
1491 | zc->workSpaceOversizedDuration = 0; | |
1379 |
|
1492 | |||
@@ -1406,7 +1519,10 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||||
1406 |
|
1519 | |||
1407 | ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); |
|
1520 | ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); | |
1408 |
|
1521 | |||
1409 | ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; |
|
1522 | ptr = ZSTD_reset_matchState(&zc->blockState.matchState, | |
|
1523 | zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, | |||
|
1524 | ¶ms.cParams, | |||
|
1525 | crp, ZSTD_resetTarget_CCtx); | |||
1410 |
|
1526 | |||
1411 | /* ldm hash table */ |
|
1527 | /* ldm hash table */ | |
1412 | /* initialize bucketOffsets table later for pointer alignment */ |
|
1528 | /* initialize bucketOffsets table later for pointer alignment */ | |
@@ -1424,8 +1540,6 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||||
1424 | } |
|
1540 | } | |
1425 | assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ |
|
1541 | assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ | |
1426 |
|
1542 | |||
1427 | ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); |
|
|||
1428 |
|
||||
1429 | /* sequences storage */ |
|
1543 | /* sequences storage */ | |
1430 | zc->seqStore.maxNbSeq = maxNbSeq; |
|
1544 | zc->seqStore.maxNbSeq = maxNbSeq; | |
1431 | zc->seqStore.sequencesStart = (seqDef*)ptr; |
|
1545 | zc->seqStore.sequencesStart = (seqDef*)ptr; | |
@@ -1502,15 +1616,14 b' static int ZSTD_shouldAttachDict(const Z' | |||||
1502 | * handled in _enforceMaxDist */ |
|
1616 | * handled in _enforceMaxDist */ | |
1503 | } |
|
1617 | } | |
1504 |
|
1618 | |||
1505 | static size_t ZSTD_resetCCtx_byAttachingCDict( |
|
1619 | static size_t | |
1506 | ZSTD_CCtx* cctx, |
|
1620 | ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, | |
1507 | const ZSTD_CDict* cdict, |
|
1621 | const ZSTD_CDict* cdict, | |
1508 | ZSTD_CCtx_params params, |
|
1622 | ZSTD_CCtx_params params, | |
1509 | U64 pledgedSrcSize, |
|
1623 | U64 pledgedSrcSize, | |
1510 | ZSTD_buffered_policy_e zbuff) |
|
1624 | ZSTD_buffered_policy_e zbuff) | |
1511 | { |
|
1625 | { | |
1512 | { |
|
1626 | { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; | |
1513 | const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; |
|
|||
1514 | unsigned const windowLog = params.cParams.windowLog; |
|
1627 | unsigned const windowLog = params.cParams.windowLog; | |
1515 | assert(windowLog != 0); |
|
1628 | assert(windowLog != 0); | |
1516 | /* Resize working context table params for input only, since the dict |
|
1629 | /* Resize working context table params for input only, since the dict | |
@@ -1522,8 +1635,7 b' static size_t ZSTD_resetCCtx_byAttaching' | |||||
1522 | assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); |
|
1635 | assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); | |
1523 | } |
|
1636 | } | |
1524 |
|
1637 | |||
1525 | { |
|
1638 | { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc | |
1526 | const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc |
|
|||
1527 | - cdict->matchState.window.base); |
|
1639 | - cdict->matchState.window.base); | |
1528 | const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; |
|
1640 | const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; | |
1529 | if (cdictLen == 0) { |
|
1641 | if (cdictLen == 0) { | |
@@ -1540,9 +1652,9 b' static size_t ZSTD_resetCCtx_byAttaching' | |||||
1540 | cctx->blockState.matchState.window.base + cdictEnd; |
|
1652 | cctx->blockState.matchState.window.base + cdictEnd; | |
1541 | ZSTD_window_clear(&cctx->blockState.matchState.window); |
|
1653 | ZSTD_window_clear(&cctx->blockState.matchState.window); | |
1542 | } |
|
1654 | } | |
|
1655 | /* loadedDictEnd is expressed within the referential of the active context */ | |||
1543 | cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; |
|
1656 | cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; | |
1544 |
|
|
1657 | } } | |
1545 | } |
|
|||
1546 |
|
1658 | |||
1547 | cctx->dictID = cdict->dictID; |
|
1659 | cctx->dictID = cdict->dictID; | |
1548 |
|
1660 | |||
@@ -1596,7 +1708,6 b' static size_t ZSTD_resetCCtx_byCopyingCD' | |||||
1596 | ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; |
|
1708 | ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; | |
1597 | dstMatchState->window = srcMatchState->window; |
|
1709 | dstMatchState->window = srcMatchState->window; | |
1598 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; |
|
1710 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; | |
1599 | dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; |
|
|||
1600 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; |
|
1711 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; | |
1601 | } |
|
1712 | } | |
1602 |
|
1713 | |||
@@ -1644,7 +1755,7 b' static size_t ZSTD_copyCCtx_internal(ZST' | |||||
1644 | ZSTD_buffered_policy_e zbuff) |
|
1755 | ZSTD_buffered_policy_e zbuff) | |
1645 | { |
|
1756 | { | |
1646 | DEBUGLOG(5, "ZSTD_copyCCtx_internal"); |
|
1757 | DEBUGLOG(5, "ZSTD_copyCCtx_internal"); | |
1647 |
|
|
1758 | RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); | |
1648 |
|
1759 | |||
1649 | memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); |
|
1760 | memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); | |
1650 | { ZSTD_CCtx_params params = dstCCtx->requestedParams; |
|
1761 | { ZSTD_CCtx_params params = dstCCtx->requestedParams; | |
@@ -1676,7 +1787,6 b' static size_t ZSTD_copyCCtx_internal(ZST' | |||||
1676 | ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; |
|
1787 | ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; | |
1677 | dstMatchState->window = srcMatchState->window; |
|
1788 | dstMatchState->window = srcMatchState->window; | |
1678 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; |
|
1789 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; | |
1679 | dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; |
|
|||
1680 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; |
|
1790 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; | |
1681 | } |
|
1791 | } | |
1682 | dstCCtx->dictID = srcCCtx->dictID; |
|
1792 | dstCCtx->dictID = srcCCtx->dictID; | |
@@ -1746,16 +1856,15 b' static void ZSTD_reduceTable_btlazy2(U32' | |||||
1746 |
|
1856 | |||
1747 | /*! ZSTD_reduceIndex() : |
|
1857 | /*! ZSTD_reduceIndex() : | |
1748 | * rescale all indexes to avoid future overflow (indexes are U32) */ |
|
1858 | * rescale all indexes to avoid future overflow (indexes are U32) */ | |
1749 |
static void ZSTD_reduceIndex (ZSTD_ |
|
1859 | static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) | |
1750 | { |
|
1860 | { | |
1751 | ZSTD_matchState_t* const ms = &zc->blockState.matchState; |
|
1861 | { U32 const hSize = (U32)1 << params->cParams.hashLog; | |
1752 | { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; |
|
|||
1753 | ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); |
|
1862 | ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); | |
1754 | } |
|
1863 | } | |
1755 |
|
1864 | |||
1756 |
if ( |
|
1865 | if (params->cParams.strategy != ZSTD_fast) { | |
1757 |
U32 const chainSize = (U32)1 << |
|
1866 | U32 const chainSize = (U32)1 << params->cParams.chainLog; | |
1758 |
if ( |
|
1867 | if (params->cParams.strategy == ZSTD_btlazy2) | |
1759 | ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); |
|
1868 | ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); | |
1760 | else |
|
1869 | else | |
1761 | ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); |
|
1870 | ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); | |
@@ -1777,161 +1886,13 b' static void ZSTD_reduceIndex (ZSTD_CCtx*' | |||||
1777 | static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) |
|
1886 | static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) | |
1778 | { |
|
1887 | { | |
1779 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); |
|
1888 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); | |
1780 |
|
|
1889 | RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, | |
|
1890 | dstSize_tooSmall); | |||
1781 | MEM_writeLE24(dst, cBlockHeader24); |
|
1891 | MEM_writeLE24(dst, cBlockHeader24); | |
1782 | memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); |
|
1892 | memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); | |
1783 | return ZSTD_blockHeaderSize + srcSize; |
|
1893 | return ZSTD_blockHeaderSize + srcSize; | |
1784 | } |
|
1894 | } | |
1785 |
|
1895 | |||
1786 | static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
|
|||
1787 | { |
|
|||
1788 | BYTE* const ostart = (BYTE* const)dst; |
|
|||
1789 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); |
|
|||
1790 |
|
||||
1791 | if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); |
|
|||
1792 |
|
||||
1793 | switch(flSize) |
|
|||
1794 | { |
|
|||
1795 | case 1: /* 2 - 1 - 5 */ |
|
|||
1796 | ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); |
|
|||
1797 | break; |
|
|||
1798 | case 2: /* 2 - 2 - 12 */ |
|
|||
1799 | MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); |
|
|||
1800 | break; |
|
|||
1801 | case 3: /* 2 - 2 - 20 */ |
|
|||
1802 | MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); |
|
|||
1803 | break; |
|
|||
1804 | default: /* not necessary : flSize is {1,2,3} */ |
|
|||
1805 | assert(0); |
|
|||
1806 | } |
|
|||
1807 |
|
||||
1808 | memcpy(ostart + flSize, src, srcSize); |
|
|||
1809 | return srcSize + flSize; |
|
|||
1810 | } |
|
|||
1811 |
|
||||
1812 | static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
|
|||
1813 | { |
|
|||
1814 | BYTE* const ostart = (BYTE* const)dst; |
|
|||
1815 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); |
|
|||
1816 |
|
||||
1817 | (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ |
|
|||
1818 |
|
||||
1819 | switch(flSize) |
|
|||
1820 | { |
|
|||
1821 | case 1: /* 2 - 1 - 5 */ |
|
|||
1822 | ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); |
|
|||
1823 | break; |
|
|||
1824 | case 2: /* 2 - 2 - 12 */ |
|
|||
1825 | MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); |
|
|||
1826 | break; |
|
|||
1827 | case 3: /* 2 - 2 - 20 */ |
|
|||
1828 | MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); |
|
|||
1829 | break; |
|
|||
1830 | default: /* not necessary : flSize is {1,2,3} */ |
|
|||
1831 | assert(0); |
|
|||
1832 | } |
|
|||
1833 |
|
||||
1834 | ostart[flSize] = *(const BYTE*)src; |
|
|||
1835 | return flSize+1; |
|
|||
1836 | } |
|
|||
1837 |
|
||||
1838 |
|
||||
1839 | /* ZSTD_minGain() : |
|
|||
1840 | * minimum compression required |
|
|||
1841 | * to generate a compress block or a compressed literals section. |
|
|||
1842 | * note : use same formula for both situations */ |
|
|||
1843 | static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) |
|
|||
1844 | { |
|
|||
1845 | U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; |
|
|||
1846 | ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); |
|
|||
1847 | assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); |
|
|||
1848 | return (srcSize >> minlog) + 2; |
|
|||
1849 | } |
|
|||
1850 |
|
||||
1851 | static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, |
|
|||
1852 | ZSTD_hufCTables_t* nextHuf, |
|
|||
1853 | ZSTD_strategy strategy, int disableLiteralCompression, |
|
|||
1854 | void* dst, size_t dstCapacity, |
|
|||
1855 | const void* src, size_t srcSize, |
|
|||
1856 | void* workspace, size_t wkspSize, |
|
|||
1857 | const int bmi2) |
|
|||
1858 | { |
|
|||
1859 | size_t const minGain = ZSTD_minGain(srcSize, strategy); |
|
|||
1860 | size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); |
|
|||
1861 | BYTE* const ostart = (BYTE*)dst; |
|
|||
1862 | U32 singleStream = srcSize < 256; |
|
|||
1863 | symbolEncodingType_e hType = set_compressed; |
|
|||
1864 | size_t cLitSize; |
|
|||
1865 |
|
||||
1866 | DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", |
|
|||
1867 | disableLiteralCompression); |
|
|||
1868 |
|
||||
1869 | /* Prepare nextEntropy assuming reusing the existing table */ |
|
|||
1870 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
|
|||
1871 |
|
||||
1872 | if (disableLiteralCompression) |
|
|||
1873 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
|
|||
1874 |
|
||||
1875 | /* small ? don't even attempt compression (speed opt) */ |
|
|||
1876 | # define COMPRESS_LITERALS_SIZE_MIN 63 |
|
|||
1877 | { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; |
|
|||
1878 | if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
|
|||
1879 | } |
|
|||
1880 |
|
||||
1881 | if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ |
|
|||
1882 | { HUF_repeat repeat = prevHuf->repeatMode; |
|
|||
1883 | int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; |
|
|||
1884 | if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; |
|
|||
1885 | cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, |
|
|||
1886 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) |
|
|||
1887 | : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, |
|
|||
1888 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); |
|
|||
1889 | if (repeat != HUF_repeat_none) { |
|
|||
1890 | /* reused the existing table */ |
|
|||
1891 | hType = set_repeat; |
|
|||
1892 | } |
|
|||
1893 | } |
|
|||
1894 |
|
||||
1895 | if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { |
|
|||
1896 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
|
|||
1897 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); |
|
|||
1898 | } |
|
|||
1899 | if (cLitSize==1) { |
|
|||
1900 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
|
|||
1901 | return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); |
|
|||
1902 | } |
|
|||
1903 |
|
||||
1904 | if (hType == set_compressed) { |
|
|||
1905 | /* using a newly constructed table */ |
|
|||
1906 | nextHuf->repeatMode = HUF_repeat_check; |
|
|||
1907 | } |
|
|||
1908 |
|
||||
1909 | /* Build header */ |
|
|||
1910 | switch(lhSize) |
|
|||
1911 | { |
|
|||
1912 | case 3: /* 2 - 2 - 10 - 10 */ |
|
|||
1913 | { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); |
|
|||
1914 | MEM_writeLE24(ostart, lhc); |
|
|||
1915 | break; |
|
|||
1916 | } |
|
|||
1917 | case 4: /* 2 - 2 - 14 - 14 */ |
|
|||
1918 | { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); |
|
|||
1919 | MEM_writeLE32(ostart, lhc); |
|
|||
1920 | break; |
|
|||
1921 | } |
|
|||
1922 | case 5: /* 2 - 2 - 18 - 18 */ |
|
|||
1923 | { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); |
|
|||
1924 | MEM_writeLE32(ostart, lhc); |
|
|||
1925 | ostart[4] = (BYTE)(cLitSize >> 10); |
|
|||
1926 | break; |
|
|||
1927 | } |
|
|||
1928 | default: /* not possible : lhSize is {3,4,5} */ |
|
|||
1929 | assert(0); |
|
|||
1930 | } |
|
|||
1931 | return lhSize+cLitSize; |
|
|||
1932 | } |
|
|||
1933 |
|
||||
1934 |
|
||||
1935 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) |
|
1896 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) | |
1936 | { |
|
1897 | { | |
1937 | const seqDef* const sequences = seqStorePtr->sequencesStart; |
|
1898 | const seqDef* const sequences = seqStorePtr->sequencesStart; | |
@@ -1954,418 +1915,19 b' void ZSTD_seqToCodes(const seqStore_t* s' | |||||
1954 | mlCodeTable[seqStorePtr->longLengthPos] = MaxML; |
|
1915 | mlCodeTable[seqStorePtr->longLengthPos] = MaxML; | |
1955 | } |
|
1916 | } | |
1956 |
|
1917 | |||
1957 |
|
1918 | static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) | ||
1958 | /** |
|
|||
1959 | * -log2(x / 256) lookup table for x in [0, 256). |
|
|||
1960 | * If x == 0: Return 0 |
|
|||
1961 | * Else: Return floor(-log2(x / 256) * 256) |
|
|||
1962 | */ |
|
|||
1963 | static unsigned const kInverseProbabiltyLog256[256] = { |
|
|||
1964 | 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, |
|
|||
1965 | 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, |
|
|||
1966 | 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, |
|
|||
1967 | 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, |
|
|||
1968 | 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, |
|
|||
1969 | 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, |
|
|||
1970 | 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, |
|
|||
1971 | 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, |
|
|||
1972 | 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, |
|
|||
1973 | 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, |
|
|||
1974 | 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, |
|
|||
1975 | 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, |
|
|||
1976 | 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, |
|
|||
1977 | 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, |
|
|||
1978 | 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, |
|
|||
1979 | 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, |
|
|||
1980 | 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, |
|
|||
1981 | 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, |
|
|||
1982 | 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, |
|
|||
1983 | 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, |
|
|||
1984 | 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, |
|
|||
1985 | 5, 4, 2, 1, |
|
|||
1986 | }; |
|
|||
1987 |
|
||||
1988 |
|
||||
1989 | /** |
|
|||
1990 | * Returns the cost in bits of encoding the distribution described by count |
|
|||
1991 | * using the entropy bound. |
|
|||
1992 | */ |
|
|||
1993 | static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) |
|
|||
1994 | { |
|
|||
1995 | unsigned cost = 0; |
|
|||
1996 | unsigned s; |
|
|||
1997 | for (s = 0; s <= max; ++s) { |
|
|||
1998 | unsigned norm = (unsigned)((256 * count[s]) / total); |
|
|||
1999 | if (count[s] != 0 && norm == 0) |
|
|||
2000 | norm = 1; |
|
|||
2001 | assert(count[s] < total); |
|
|||
2002 | cost += count[s] * kInverseProbabiltyLog256[norm]; |
|
|||
2003 | } |
|
|||
2004 | return cost >> 8; |
|
|||
2005 | } |
|
|||
2006 |
|
||||
2007 |
|
||||
2008 | /** |
|
|||
2009 | * Returns the cost in bits of encoding the distribution in count using the |
|
|||
2010 | * table described by norm. The max symbol support by norm is assumed >= max. |
|
|||
2011 | * norm must be valid for every symbol with non-zero probability in count. |
|
|||
2012 | */ |
|
|||
2013 | static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, |
|
|||
2014 | unsigned const* count, unsigned const max) |
|
|||
2015 | { |
|
1919 | { | |
2016 | unsigned const shift = 8 - accuracyLog; |
|
1920 | switch (cctxParams->literalCompressionMode) { | |
2017 | size_t cost = 0; |
|
1921 | case ZSTD_lcm_huffman: | |
2018 | unsigned s; |
|
1922 | return 0; | |
2019 | assert(accuracyLog <= 8); |
|
1923 | case ZSTD_lcm_uncompressed: | |
2020 | for (s = 0; s <= max; ++s) { |
|
1924 | return 1; | |
2021 | unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; |
|
1925 | default: | |
2022 | unsigned const norm256 = normAcc << shift; |
|
1926 | assert(0 /* impossible: pre-validated */); | |
2023 | assert(norm256 > 0); |
|
1927 | /* fall-through */ | |
2024 | assert(norm256 < 256); |
|
1928 | case ZSTD_lcm_auto: | |
2025 | cost += count[s] * kInverseProbabiltyLog256[norm256]; |
|
1929 | return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); | |
2026 | } |
|
|||
2027 | return cost >> 8; |
|
|||
2028 | } |
|
|||
2029 |
|
||||
2030 |
|
||||
2031 | static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { |
|
|||
2032 | void const* ptr = ctable; |
|
|||
2033 | U16 const* u16ptr = (U16 const*)ptr; |
|
|||
2034 | U32 const maxSymbolValue = MEM_read16(u16ptr + 1); |
|
|||
2035 | return maxSymbolValue; |
|
|||
2036 | } |
|
|||
2037 |
|
||||
2038 |
|
||||
2039 | /** |
|
|||
2040 | * Returns the cost in bits of encoding the distribution in count using ctable. |
|
|||
2041 | * Returns an error if ctable cannot represent all the symbols in count. |
|
|||
2042 | */ |
|
|||
2043 | static size_t ZSTD_fseBitCost( |
|
|||
2044 | FSE_CTable const* ctable, |
|
|||
2045 | unsigned const* count, |
|
|||
2046 | unsigned const max) |
|
|||
2047 | { |
|
|||
2048 | unsigned const kAccuracyLog = 8; |
|
|||
2049 | size_t cost = 0; |
|
|||
2050 | unsigned s; |
|
|||
2051 | FSE_CState_t cstate; |
|
|||
2052 | FSE_initCState(&cstate, ctable); |
|
|||
2053 | if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { |
|
|||
2054 | DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", |
|
|||
2055 | ZSTD_getFSEMaxSymbolValue(ctable), max); |
|
|||
2056 | return ERROR(GENERIC); |
|
|||
2057 | } |
|
|||
2058 | for (s = 0; s <= max; ++s) { |
|
|||
2059 | unsigned const tableLog = cstate.stateLog; |
|
|||
2060 | unsigned const badCost = (tableLog + 1) << kAccuracyLog; |
|
|||
2061 | unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); |
|
|||
2062 | if (count[s] == 0) |
|
|||
2063 | continue; |
|
|||
2064 | if (bitCost >= badCost) { |
|
|||
2065 | DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); |
|
|||
2066 | return ERROR(GENERIC); |
|
|||
2067 | } |
|
|||
2068 | cost += count[s] * bitCost; |
|
|||
2069 | } |
|
|||
2070 | return cost >> kAccuracyLog; |
|
|||
2071 | } |
|
|||
2072 |
|
||||
2073 | /** |
|
|||
2074 | * Returns the cost in bytes of encoding the normalized count header. |
|
|||
2075 | * Returns an error if any of the helper functions return an error. |
|
|||
2076 | */ |
|
|||
2077 | static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, |
|
|||
2078 | size_t const nbSeq, unsigned const FSELog) |
|
|||
2079 | { |
|
|||
2080 | BYTE wksp[FSE_NCOUNTBOUND]; |
|
|||
2081 | S16 norm[MaxSeq + 1]; |
|
|||
2082 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); |
|
|||
2083 | CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); |
|
|||
2084 | return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); |
|
|||
2085 | } |
|
|||
2086 |
|
||||
2087 |
|
||||
2088 | typedef enum { |
|
|||
2089 | ZSTD_defaultDisallowed = 0, |
|
|||
2090 | ZSTD_defaultAllowed = 1 |
|
|||
2091 | } ZSTD_defaultPolicy_e; |
|
|||
2092 |
|
||||
2093 | MEM_STATIC symbolEncodingType_e |
|
|||
2094 | ZSTD_selectEncodingType( |
|
|||
2095 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, |
|
|||
2096 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, |
|
|||
2097 | FSE_CTable const* prevCTable, |
|
|||
2098 | short const* defaultNorm, U32 defaultNormLog, |
|
|||
2099 | ZSTD_defaultPolicy_e const isDefaultAllowed, |
|
|||
2100 | ZSTD_strategy const strategy) |
|
|||
2101 | { |
|
|||
2102 | ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); |
|
|||
2103 | if (mostFrequent == nbSeq) { |
|
|||
2104 | *repeatMode = FSE_repeat_none; |
|
|||
2105 | if (isDefaultAllowed && nbSeq <= 2) { |
|
|||
2106 | /* Prefer set_basic over set_rle when there are 2 or less symbols, |
|
|||
2107 | * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. |
|
|||
2108 | * If basic encoding isn't possible, always choose RLE. |
|
|||
2109 | */ |
|
|||
2110 | DEBUGLOG(5, "Selected set_basic"); |
|
|||
2111 | return set_basic; |
|
|||
2112 | } |
|
|||
2113 | DEBUGLOG(5, "Selected set_rle"); |
|
|||
2114 | return set_rle; |
|
|||
2115 | } |
|
1930 | } | |
2116 | if (strategy < ZSTD_lazy) { |
|
|||
2117 | if (isDefaultAllowed) { |
|
|||
2118 | size_t const staticFse_nbSeq_max = 1000; |
|
|||
2119 | size_t const mult = 10 - strategy; |
|
|||
2120 | size_t const baseLog = 3; |
|
|||
2121 | size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ |
|
|||
2122 | assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ |
|
|||
2123 | assert(mult <= 9 && mult >= 7); |
|
|||
2124 | if ( (*repeatMode == FSE_repeat_valid) |
|
|||
2125 | && (nbSeq < staticFse_nbSeq_max) ) { |
|
|||
2126 | DEBUGLOG(5, "Selected set_repeat"); |
|
|||
2127 | return set_repeat; |
|
|||
2128 | } |
|
|||
2129 | if ( (nbSeq < dynamicFse_nbSeq_min) |
|
|||
2130 | || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { |
|
|||
2131 | DEBUGLOG(5, "Selected set_basic"); |
|
|||
2132 | /* The format allows default tables to be repeated, but it isn't useful. |
|
|||
2133 | * When using simple heuristics to select encoding type, we don't want |
|
|||
2134 | * to confuse these tables with dictionaries. When running more careful |
|
|||
2135 | * analysis, we don't need to waste time checking both repeating tables |
|
|||
2136 | * and default tables. |
|
|||
2137 | */ |
|
|||
2138 | *repeatMode = FSE_repeat_none; |
|
|||
2139 | return set_basic; |
|
|||
2140 | } |
|
|||
2141 | } |
|
|||
2142 | } else { |
|
|||
2143 | size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); |
|
|||
2144 | size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); |
|
|||
2145 | size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); |
|
|||
2146 | size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); |
|
|||
2147 |
|
||||
2148 | if (isDefaultAllowed) { |
|
|||
2149 | assert(!ZSTD_isError(basicCost)); |
|
|||
2150 | assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); |
|
|||
2151 | } |
|
|||
2152 | assert(!ZSTD_isError(NCountCost)); |
|
|||
2153 | assert(compressedCost < ERROR(maxCode)); |
|
|||
2154 | DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", |
|
|||
2155 | (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); |
|
|||
2156 | if (basicCost <= repeatCost && basicCost <= compressedCost) { |
|
|||
2157 | DEBUGLOG(5, "Selected set_basic"); |
|
|||
2158 | assert(isDefaultAllowed); |
|
|||
2159 | *repeatMode = FSE_repeat_none; |
|
|||
2160 | return set_basic; |
|
|||
2161 | } |
|
|||
2162 | if (repeatCost <= compressedCost) { |
|
|||
2163 | DEBUGLOG(5, "Selected set_repeat"); |
|
|||
2164 | assert(!ZSTD_isError(repeatCost)); |
|
|||
2165 | return set_repeat; |
|
|||
2166 | } |
|
|||
2167 | assert(compressedCost < basicCost && compressedCost < repeatCost); |
|
|||
2168 | } |
|
|||
2169 | DEBUGLOG(5, "Selected set_compressed"); |
|
|||
2170 | *repeatMode = FSE_repeat_check; |
|
|||
2171 | return set_compressed; |
|
|||
2172 | } |
|
|||
2173 |
|
||||
2174 | MEM_STATIC size_t |
|
|||
2175 | ZSTD_buildCTable(void* dst, size_t dstCapacity, |
|
|||
2176 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, |
|
|||
2177 | unsigned* count, U32 max, |
|
|||
2178 | const BYTE* codeTable, size_t nbSeq, |
|
|||
2179 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, |
|
|||
2180 | const FSE_CTable* prevCTable, size_t prevCTableSize, |
|
|||
2181 | void* workspace, size_t workspaceSize) |
|
|||
2182 | { |
|
|||
2183 | BYTE* op = (BYTE*)dst; |
|
|||
2184 | const BYTE* const oend = op + dstCapacity; |
|
|||
2185 | DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); |
|
|||
2186 |
|
||||
2187 | switch (type) { |
|
|||
2188 | case set_rle: |
|
|||
2189 | CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max)); |
|
|||
2190 | if (dstCapacity==0) return ERROR(dstSize_tooSmall); |
|
|||
2191 | *op = codeTable[0]; |
|
|||
2192 | return 1; |
|
|||
2193 | case set_repeat: |
|
|||
2194 | memcpy(nextCTable, prevCTable, prevCTableSize); |
|
|||
2195 | return 0; |
|
|||
2196 | case set_basic: |
|
|||
2197 | CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ |
|
|||
2198 | return 0; |
|
|||
2199 | case set_compressed: { |
|
|||
2200 | S16 norm[MaxSeq + 1]; |
|
|||
2201 | size_t nbSeq_1 = nbSeq; |
|
|||
2202 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); |
|
|||
2203 | if (count[codeTable[nbSeq-1]] > 1) { |
|
|||
2204 | count[codeTable[nbSeq-1]]--; |
|
|||
2205 | nbSeq_1--; |
|
|||
2206 | } |
|
|||
2207 | assert(nbSeq_1 > 1); |
|
|||
2208 | CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); |
|
|||
2209 | { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ |
|
|||
2210 | if (FSE_isError(NCountSize)) return NCountSize; |
|
|||
2211 | CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); |
|
|||
2212 | return NCountSize; |
|
|||
2213 | } |
|
|||
2214 | } |
|
|||
2215 | default: return assert(0), ERROR(GENERIC); |
|
|||
2216 | } |
|
|||
2217 | } |
|
|||
2218 |
|
||||
2219 | FORCE_INLINE_TEMPLATE size_t |
|
|||
2220 | ZSTD_encodeSequences_body( |
|
|||
2221 | void* dst, size_t dstCapacity, |
|
|||
2222 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, |
|
|||
2223 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, |
|
|||
2224 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, |
|
|||
2225 | seqDef const* sequences, size_t nbSeq, int longOffsets) |
|
|||
2226 | { |
|
|||
2227 | BIT_CStream_t blockStream; |
|
|||
2228 | FSE_CState_t stateMatchLength; |
|
|||
2229 | FSE_CState_t stateOffsetBits; |
|
|||
2230 | FSE_CState_t stateLitLength; |
|
|||
2231 |
|
||||
2232 | CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */ |
|
|||
2233 | DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", |
|
|||
2234 | (int)(blockStream.endPtr - blockStream.startPtr), |
|
|||
2235 | (unsigned)dstCapacity); |
|
|||
2236 |
|
||||
2237 | /* first symbols */ |
|
|||
2238 | FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); |
|
|||
2239 | FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); |
|
|||
2240 | FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); |
|
|||
2241 | BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); |
|
|||
2242 | if (MEM_32bits()) BIT_flushBits(&blockStream); |
|
|||
2243 | BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); |
|
|||
2244 | if (MEM_32bits()) BIT_flushBits(&blockStream); |
|
|||
2245 | if (longOffsets) { |
|
|||
2246 | U32 const ofBits = ofCodeTable[nbSeq-1]; |
|
|||
2247 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); |
|
|||
2248 | if (extraBits) { |
|
|||
2249 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); |
|
|||
2250 | BIT_flushBits(&blockStream); |
|
|||
2251 | } |
|
|||
2252 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, |
|
|||
2253 | ofBits - extraBits); |
|
|||
2254 | } else { |
|
|||
2255 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); |
|
|||
2256 | } |
|
|||
2257 | BIT_flushBits(&blockStream); |
|
|||
2258 |
|
||||
2259 | { size_t n; |
|
|||
2260 | for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ |
|
|||
2261 | BYTE const llCode = llCodeTable[n]; |
|
|||
2262 | BYTE const ofCode = ofCodeTable[n]; |
|
|||
2263 | BYTE const mlCode = mlCodeTable[n]; |
|
|||
2264 | U32 const llBits = LL_bits[llCode]; |
|
|||
2265 | U32 const ofBits = ofCode; |
|
|||
2266 | U32 const mlBits = ML_bits[mlCode]; |
|
|||
2267 | DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", |
|
|||
2268 | (unsigned)sequences[n].litLength, |
|
|||
2269 | (unsigned)sequences[n].matchLength + MINMATCH, |
|
|||
2270 | (unsigned)sequences[n].offset); |
|
|||
2271 | /* 32b*/ /* 64b*/ |
|
|||
2272 | /* (7)*/ /* (7)*/ |
|
|||
2273 | FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ |
|
|||
2274 | FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ |
|
|||
2275 | if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ |
|
|||
2276 | FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ |
|
|||
2277 | if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) |
|
|||
2278 | BIT_flushBits(&blockStream); /* (7)*/ |
|
|||
2279 | BIT_addBits(&blockStream, sequences[n].litLength, llBits); |
|
|||
2280 | if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); |
|
|||
2281 | BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); |
|
|||
2282 | if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); |
|
|||
2283 | if (longOffsets) { |
|
|||
2284 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); |
|
|||
2285 | if (extraBits) { |
|
|||
2286 | BIT_addBits(&blockStream, sequences[n].offset, extraBits); |
|
|||
2287 | BIT_flushBits(&blockStream); /* (7)*/ |
|
|||
2288 | } |
|
|||
2289 | BIT_addBits(&blockStream, sequences[n].offset >> extraBits, |
|
|||
2290 | ofBits - extraBits); /* 31 */ |
|
|||
2291 | } else { |
|
|||
2292 | BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ |
|
|||
2293 | } |
|
|||
2294 | BIT_flushBits(&blockStream); /* (7)*/ |
|
|||
2295 | DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); |
|
|||
2296 | } } |
|
|||
2297 |
|
||||
2298 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); |
|
|||
2299 | FSE_flushCState(&blockStream, &stateMatchLength); |
|
|||
2300 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); |
|
|||
2301 | FSE_flushCState(&blockStream, &stateOffsetBits); |
|
|||
2302 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); |
|
|||
2303 | FSE_flushCState(&blockStream, &stateLitLength); |
|
|||
2304 |
|
||||
2305 | { size_t const streamSize = BIT_closeCStream(&blockStream); |
|
|||
2306 | if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ |
|
|||
2307 | return streamSize; |
|
|||
2308 | } |
|
|||
2309 | } |
|
|||
2310 |
|
||||
2311 | static size_t |
|
|||
2312 | ZSTD_encodeSequences_default( |
|
|||
2313 | void* dst, size_t dstCapacity, |
|
|||
2314 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, |
|
|||
2315 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, |
|
|||
2316 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, |
|
|||
2317 | seqDef const* sequences, size_t nbSeq, int longOffsets) |
|
|||
2318 | { |
|
|||
2319 | return ZSTD_encodeSequences_body(dst, dstCapacity, |
|
|||
2320 | CTable_MatchLength, mlCodeTable, |
|
|||
2321 | CTable_OffsetBits, ofCodeTable, |
|
|||
2322 | CTable_LitLength, llCodeTable, |
|
|||
2323 | sequences, nbSeq, longOffsets); |
|
|||
2324 | } |
|
|||
2325 |
|
||||
2326 |
|
||||
2327 | #if DYNAMIC_BMI2 |
|
|||
2328 |
|
||||
2329 | static TARGET_ATTRIBUTE("bmi2") size_t |
|
|||
2330 | ZSTD_encodeSequences_bmi2( |
|
|||
2331 | void* dst, size_t dstCapacity, |
|
|||
2332 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, |
|
|||
2333 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, |
|
|||
2334 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, |
|
|||
2335 | seqDef const* sequences, size_t nbSeq, int longOffsets) |
|
|||
2336 | { |
|
|||
2337 | return ZSTD_encodeSequences_body(dst, dstCapacity, |
|
|||
2338 | CTable_MatchLength, mlCodeTable, |
|
|||
2339 | CTable_OffsetBits, ofCodeTable, |
|
|||
2340 | CTable_LitLength, llCodeTable, |
|
|||
2341 | sequences, nbSeq, longOffsets); |
|
|||
2342 | } |
|
|||
2343 |
|
||||
2344 | #endif |
|
|||
2345 |
|
||||
2346 | static size_t ZSTD_encodeSequences( |
|
|||
2347 | void* dst, size_t dstCapacity, |
|
|||
2348 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, |
|
|||
2349 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, |
|
|||
2350 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, |
|
|||
2351 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) |
|
|||
2352 | { |
|
|||
2353 | DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); |
|
|||
2354 | #if DYNAMIC_BMI2 |
|
|||
2355 | if (bmi2) { |
|
|||
2356 | return ZSTD_encodeSequences_bmi2(dst, dstCapacity, |
|
|||
2357 | CTable_MatchLength, mlCodeTable, |
|
|||
2358 | CTable_OffsetBits, ofCodeTable, |
|
|||
2359 | CTable_LitLength, llCodeTable, |
|
|||
2360 | sequences, nbSeq, longOffsets); |
|
|||
2361 | } |
|
|||
2362 | #endif |
|
|||
2363 | (void)bmi2; |
|
|||
2364 | return ZSTD_encodeSequences_default(dst, dstCapacity, |
|
|||
2365 | CTable_MatchLength, mlCodeTable, |
|
|||
2366 | CTable_OffsetBits, ofCodeTable, |
|
|||
2367 | CTable_LitLength, llCodeTable, |
|
|||
2368 | sequences, nbSeq, longOffsets); |
|
|||
2369 | } |
|
1931 | } | |
2370 |
|
1932 | |||
2371 | /* ZSTD_compressSequences_internal(): |
|
1933 | /* ZSTD_compressSequences_internal(): | |
@@ -2393,46 +1955,48 b' ZSTD_compressSequences_internal(seqStore' | |||||
2393 | BYTE* const ostart = (BYTE*)dst; |
|
1955 | BYTE* const ostart = (BYTE*)dst; | |
2394 | BYTE* const oend = ostart + dstCapacity; |
|
1956 | BYTE* const oend = ostart + dstCapacity; | |
2395 | BYTE* op = ostart; |
|
1957 | BYTE* op = ostart; | |
2396 | size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; |
|
1958 | size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); | |
2397 | BYTE* seqHead; |
|
1959 | BYTE* seqHead; | |
2398 | BYTE* lastNCount = NULL; |
|
1960 | BYTE* lastNCount = NULL; | |
2399 |
|
1961 | |||
|
1962 | DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); | |||
2400 | ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); |
|
1963 | ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); | |
2401 | DEBUGLOG(5, "ZSTD_compressSequences_internal"); |
|
|||
2402 |
|
1964 | |||
2403 | /* Compress literals */ |
|
1965 | /* Compress literals */ | |
2404 | { const BYTE* const literals = seqStorePtr->litStart; |
|
1966 | { const BYTE* const literals = seqStorePtr->litStart; | |
2405 | size_t const litSize = seqStorePtr->lit - literals; |
|
1967 | size_t const litSize = (size_t)(seqStorePtr->lit - literals); | |
2406 | int const disableLiteralCompression = (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); |
|
|||
2407 | size_t const cSize = ZSTD_compressLiterals( |
|
1968 | size_t const cSize = ZSTD_compressLiterals( | |
2408 | &prevEntropy->huf, &nextEntropy->huf, |
|
1969 | &prevEntropy->huf, &nextEntropy->huf, | |
2409 |
cctxParams->cParams.strategy, |
|
1970 | cctxParams->cParams.strategy, | |
|
1971 | ZSTD_disableLiteralsCompression(cctxParams), | |||
2410 | op, dstCapacity, |
|
1972 | op, dstCapacity, | |
2411 | literals, litSize, |
|
1973 | literals, litSize, | |
2412 | workspace, wkspSize, |
|
1974 | workspace, wkspSize, | |
2413 | bmi2); |
|
1975 | bmi2); | |
2414 | if (ZSTD_isError(cSize)) |
|
1976 | FORWARD_IF_ERROR(cSize); | |
2415 | return cSize; |
|
|||
2416 | assert(cSize <= dstCapacity); |
|
1977 | assert(cSize <= dstCapacity); | |
2417 | op += cSize; |
|
1978 | op += cSize; | |
2418 | } |
|
1979 | } | |
2419 |
|
1980 | |||
2420 | /* Sequences Header */ |
|
1981 | /* Sequences Header */ | |
2421 |
|
|
1982 | RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, | |
|
1983 | dstSize_tooSmall); | |||
2422 | if (nbSeq < 0x7F) |
|
1984 | if (nbSeq < 0x7F) | |
2423 | *op++ = (BYTE)nbSeq; |
|
1985 | *op++ = (BYTE)nbSeq; | |
2424 | else if (nbSeq < LONGNBSEQ) |
|
1986 | else if (nbSeq < LONGNBSEQ) | |
2425 | op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; |
|
1987 | op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; | |
2426 | else |
|
1988 | else | |
2427 | op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; |
|
1989 | op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; | |
|
1990 | assert(op <= oend); | |||
2428 | if (nbSeq==0) { |
|
1991 | if (nbSeq==0) { | |
2429 | /* Copy the old tables over as if we repeated them */ |
|
1992 | /* Copy the old tables over as if we repeated them */ | |
2430 | memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); |
|
1993 | memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); | |
2431 | return op - ostart; |
|
1994 | return (size_t)(op - ostart); | |
2432 | } |
|
1995 | } | |
2433 |
|
1996 | |||
2434 | /* seqHead : flags for FSE encoding type */ |
|
1997 | /* seqHead : flags for FSE encoding type */ | |
2435 | seqHead = op++; |
|
1998 | seqHead = op++; | |
|
1999 | assert(op <= oend); | |||
2436 |
|
2000 | |||
2437 | /* convert length/distances into codes */ |
|
2001 | /* convert length/distances into codes */ | |
2438 | ZSTD_seqToCodes(seqStorePtr); |
|
2002 | ZSTD_seqToCodes(seqStorePtr); | |
@@ -2448,14 +2012,15 b' ZSTD_compressSequences_internal(seqStore' | |||||
2448 | ZSTD_defaultAllowed, strategy); |
|
2012 | ZSTD_defaultAllowed, strategy); | |
2449 | assert(set_basic < set_compressed && set_rle < set_compressed); |
|
2013 | assert(set_basic < set_compressed && set_rle < set_compressed); | |
2450 | assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2014 | assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |
2451 | { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, |
|
2015 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, | |
2452 | count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, |
|
2016 | count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, | |
2453 | prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), |
|
2017 | prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), | |
2454 | workspace, wkspSize); |
|
2018 | workspace, wkspSize); | |
2455 | if (ZSTD_isError(countSize)) return countSize; |
|
2019 | FORWARD_IF_ERROR(countSize); | |
2456 | if (LLtype == set_compressed) |
|
2020 | if (LLtype == set_compressed) | |
2457 | lastNCount = op; |
|
2021 | lastNCount = op; | |
2458 | op += countSize; |
|
2022 | op += countSize; | |
|
2023 | assert(op <= oend); | |||
2459 | } } |
|
2024 | } } | |
2460 | /* build CTable for Offsets */ |
|
2025 | /* build CTable for Offsets */ | |
2461 | { unsigned max = MaxOff; |
|
2026 | { unsigned max = MaxOff; | |
@@ -2470,14 +2035,15 b' ZSTD_compressSequences_internal(seqStore' | |||||
2470 | OF_defaultNorm, OF_defaultNormLog, |
|
2035 | OF_defaultNorm, OF_defaultNormLog, | |
2471 | defaultPolicy, strategy); |
|
2036 | defaultPolicy, strategy); | |
2472 | assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2037 | assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |
2473 | { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, |
|
2038 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, | |
2474 | count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, |
|
2039 | count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, | |
2475 | prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), |
|
2040 | prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), | |
2476 | workspace, wkspSize); |
|
2041 | workspace, wkspSize); | |
2477 | if (ZSTD_isError(countSize)) return countSize; |
|
2042 | FORWARD_IF_ERROR(countSize); | |
2478 | if (Offtype == set_compressed) |
|
2043 | if (Offtype == set_compressed) | |
2479 | lastNCount = op; |
|
2044 | lastNCount = op; | |
2480 | op += countSize; |
|
2045 | op += countSize; | |
|
2046 | assert(op <= oend); | |||
2481 | } } |
|
2047 | } } | |
2482 | /* build CTable for MatchLengths */ |
|
2048 | /* build CTable for MatchLengths */ | |
2483 | { unsigned max = MaxML; |
|
2049 | { unsigned max = MaxML; | |
@@ -2490,29 +2056,31 b' ZSTD_compressSequences_internal(seqStore' | |||||
2490 | ML_defaultNorm, ML_defaultNormLog, |
|
2056 | ML_defaultNorm, ML_defaultNormLog, | |
2491 | ZSTD_defaultAllowed, strategy); |
|
2057 | ZSTD_defaultAllowed, strategy); | |
2492 | assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2058 | assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |
2493 | { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, |
|
2059 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, | |
2494 | count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, |
|
2060 | count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, | |
2495 | prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), |
|
2061 | prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), | |
2496 | workspace, wkspSize); |
|
2062 | workspace, wkspSize); | |
2497 | if (ZSTD_isError(countSize)) return countSize; |
|
2063 | FORWARD_IF_ERROR(countSize); | |
2498 | if (MLtype == set_compressed) |
|
2064 | if (MLtype == set_compressed) | |
2499 | lastNCount = op; |
|
2065 | lastNCount = op; | |
2500 | op += countSize; |
|
2066 | op += countSize; | |
|
2067 | assert(op <= oend); | |||
2501 | } } |
|
2068 | } } | |
2502 |
|
2069 | |||
2503 | *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); |
|
2070 | *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); | |
2504 |
|
2071 | |||
2505 | { size_t const bitstreamSize = ZSTD_encodeSequences( |
|
2072 | { size_t const bitstreamSize = ZSTD_encodeSequences( | |
2506 | op, oend - op, |
|
2073 | op, (size_t)(oend - op), | |
2507 | CTable_MatchLength, mlCodeTable, |
|
2074 | CTable_MatchLength, mlCodeTable, | |
2508 | CTable_OffsetBits, ofCodeTable, |
|
2075 | CTable_OffsetBits, ofCodeTable, | |
2509 | CTable_LitLength, llCodeTable, |
|
2076 | CTable_LitLength, llCodeTable, | |
2510 | sequences, nbSeq, |
|
2077 | sequences, nbSeq, | |
2511 | longOffsets, bmi2); |
|
2078 | longOffsets, bmi2); | |
2512 | if (ZSTD_isError(bitstreamSize)) return bitstreamSize; |
|
2079 | FORWARD_IF_ERROR(bitstreamSize); | |
2513 | op += bitstreamSize; |
|
2080 | op += bitstreamSize; | |
|
2081 | assert(op <= oend); | |||
2514 | /* zstd versions <= 1.3.4 mistakenly report corruption when |
|
2082 | /* zstd versions <= 1.3.4 mistakenly report corruption when | |
2515 |
* FSE_readNCount() rec |
|
2083 | * FSE_readNCount() receives a buffer < 4 bytes. | |
2516 | * Fixed by https://github.com/facebook/zstd/pull/1146. |
|
2084 | * Fixed by https://github.com/facebook/zstd/pull/1146. | |
2517 | * This can happen when the last set_compressed table present is 2 |
|
2085 | * This can happen when the last set_compressed table present is 2 | |
2518 | * bytes and the bitstream is only one byte. |
|
2086 | * bytes and the bitstream is only one byte. | |
@@ -2529,7 +2097,7 b' ZSTD_compressSequences_internal(seqStore' | |||||
2529 | } |
|
2097 | } | |
2530 |
|
2098 | |||
2531 | DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); |
|
2099 | DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); | |
2532 | return op - ostart; |
|
2100 | return (size_t)(op - ostart); | |
2533 | } |
|
2101 | } | |
2534 |
|
2102 | |||
2535 | MEM_STATIC size_t |
|
2103 | MEM_STATIC size_t | |
@@ -2552,7 +2120,7 b' ZSTD_compressSequences(seqStore_t* seqSt' | |||||
2552 | */ |
|
2120 | */ | |
2553 | if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) |
|
2121 | if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) | |
2554 | return 0; /* block not compressed */ |
|
2122 | return 0; /* block not compressed */ | |
2555 | if (ZSTD_isError(cSize)) return cSize; |
|
2123 | FORWARD_IF_ERROR(cSize); | |
2556 |
|
2124 | |||
2557 | /* Check compressibility */ |
|
2125 | /* Check compressibility */ | |
2558 | { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); |
|
2126 | { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); | |
@@ -2622,27 +2190,24 b' void ZSTD_resetSeqStore(seqStore_t* ssPt' | |||||
2622 | ssPtr->longLengthID = 0; |
|
2190 | ssPtr->longLengthID = 0; | |
2623 | } |
|
2191 | } | |
2624 |
|
2192 | |||
2625 | static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, |
|
2193 | typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; | |
2626 | void* dst, size_t dstCapacity, |
|
2194 | ||
2627 | const void* src, size_t srcSize) |
|
2195 | static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) | |
2628 | { |
|
2196 | { | |
2629 | ZSTD_matchState_t* const ms = &zc->blockState.matchState; |
|
2197 | ZSTD_matchState_t* const ms = &zc->blockState.matchState; | |
2630 | size_t cSize; |
|
2198 | DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); | |
2631 | DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", |
|
|||
2632 | (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate); |
|
|||
2633 | assert(srcSize <= ZSTD_BLOCKSIZE_MAX); |
|
2199 | assert(srcSize <= ZSTD_BLOCKSIZE_MAX); | |
2634 |
|
||||
2635 | /* Assert that we have correctly flushed the ctx params into the ms's copy */ |
|
2200 | /* Assert that we have correctly flushed the ctx params into the ms's copy */ | |
2636 | ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); |
|
2201 | ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); | |
2637 |
|
||||
2638 | if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { |
|
2202 | if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { | |
2639 | ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); |
|
2203 | ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); | |
2640 | cSize = 0; |
|
2204 | return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ | |
2641 | goto out; /* don't even attempt compression below a certain srcSize */ |
|
|||
2642 | } |
|
2205 | } | |
2643 | ZSTD_resetSeqStore(&(zc->seqStore)); |
|
2206 | ZSTD_resetSeqStore(&(zc->seqStore)); | |
2644 |
|
|
2207 | /* required for optimal parser to read stats from dictionary */ | |
2645 |
|
2208 | ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; | ||
|
2209 | /* tell the optimal parser how we expect to compress literals */ | |||
|
2210 | ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; | |||
2646 | /* a gap between an attached dict and the current window is not safe, |
|
2211 | /* a gap between an attached dict and the current window is not safe, | |
2647 | * they must remain adjacent, |
|
2212 | * they must remain adjacent, | |
2648 | * and when that stops being the case, the dict must be unset */ |
|
2213 | * and when that stops being the case, the dict must be unset */ | |
@@ -2679,7 +2244,7 b' static size_t ZSTD_compressBlock_interna' | |||||
2679 | ldmSeqStore.seq = zc->ldmSequences; |
|
2244 | ldmSeqStore.seq = zc->ldmSequences; | |
2680 | ldmSeqStore.capacity = zc->maxNbLdmSequences; |
|
2245 | ldmSeqStore.capacity = zc->maxNbLdmSequences; | |
2681 | /* Updates ldmSeqStore.size */ |
|
2246 | /* Updates ldmSeqStore.size */ | |
2682 |
|
|
2247 | FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, | |
2683 | &zc->appliedParams.ldmParams, |
|
2248 | &zc->appliedParams.ldmParams, | |
2684 | src, srcSize)); |
|
2249 | src, srcSize)); | |
2685 | /* Updates ldmSeqStore.pos */ |
|
2250 | /* Updates ldmSeqStore.pos */ | |
@@ -2696,6 +2261,22 b' static size_t ZSTD_compressBlock_interna' | |||||
2696 | { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; |
|
2261 | { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; | |
2697 | ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); |
|
2262 | ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); | |
2698 | } } |
|
2263 | } } | |
|
2264 | return ZSTDbss_compress; | |||
|
2265 | } | |||
|
2266 | ||||
|
2267 | static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, | |||
|
2268 | void* dst, size_t dstCapacity, | |||
|
2269 | const void* src, size_t srcSize) | |||
|
2270 | { | |||
|
2271 | size_t cSize; | |||
|
2272 | DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", | |||
|
2273 | (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, | |||
|
2274 | (unsigned)zc->blockState.matchState.nextToUpdate); | |||
|
2275 | ||||
|
2276 | { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); | |||
|
2277 | FORWARD_IF_ERROR(bss); | |||
|
2278 | if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } | |||
|
2279 | } | |||
2699 |
|
2280 | |||
2700 | /* encode sequences and literals */ |
|
2281 | /* encode sequences and literals */ | |
2701 | cSize = ZSTD_compressSequences(&zc->seqStore, |
|
2282 | cSize = ZSTD_compressSequences(&zc->seqStore, | |
@@ -2724,6 +2305,25 b' out:' | |||||
2724 | } |
|
2305 | } | |
2725 |
|
2306 | |||
2726 |
|
2307 | |||
|
2308 | static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend) | |||
|
2309 | { | |||
|
2310 | if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { | |||
|
2311 | U32 const maxDist = (U32)1 << params->cParams.windowLog; | |||
|
2312 | U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); | |||
|
2313 | U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); | |||
|
2314 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); | |||
|
2315 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); | |||
|
2316 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); | |||
|
2317 | ZSTD_reduceIndex(ms, params, correction); | |||
|
2318 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; | |||
|
2319 | else ms->nextToUpdate -= correction; | |||
|
2320 | /* invalidate dictionaries on overflow correction */ | |||
|
2321 | ms->loadedDictEnd = 0; | |||
|
2322 | ms->dictMatchState = NULL; | |||
|
2323 | } | |||
|
2324 | } | |||
|
2325 | ||||
|
2326 | ||||
2727 | /*! ZSTD_compress_frameChunk() : |
|
2327 | /*! ZSTD_compress_frameChunk() : | |
2728 | * Compress a chunk of data into one or multiple blocks. |
|
2328 | * Compress a chunk of data into one or multiple blocks. | |
2729 | * All blocks will be terminated, all input will be consumed. |
|
2329 | * All blocks will be terminated, all input will be consumed. | |
@@ -2742,7 +2342,7 b' static size_t ZSTD_compress_frameChunk (' | |||||
2742 | BYTE* const ostart = (BYTE*)dst; |
|
2342 | BYTE* const ostart = (BYTE*)dst; | |
2743 | BYTE* op = ostart; |
|
2343 | BYTE* op = ostart; | |
2744 | U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; |
|
2344 | U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; | |
2745 |
assert(cctx->appliedParams.cParams.windowLog <= |
|
2345 | assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); | |
2746 |
|
2346 | |||
2747 | DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); |
|
2347 | DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); | |
2748 | if (cctx->appliedParams.fParams.checksumFlag && srcSize) |
|
2348 | if (cctx->appliedParams.fParams.checksumFlag && srcSize) | |
@@ -2752,33 +2352,25 b' static size_t ZSTD_compress_frameChunk (' | |||||
2752 | ZSTD_matchState_t* const ms = &cctx->blockState.matchState; |
|
2352 | ZSTD_matchState_t* const ms = &cctx->blockState.matchState; | |
2753 | U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); |
|
2353 | U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); | |
2754 |
|
2354 | |||
2755 |
|
|
2355 | RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, | |
2756 | return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ |
|
2356 | dstSize_tooSmall, | |
|
2357 | "not enough space to store compressed block"); | |||
2757 | if (remaining < blockSize) blockSize = remaining; |
|
2358 | if (remaining < blockSize) blockSize = remaining; | |
2758 |
|
2359 | |||
2759 |
|
|
2360 | ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize); | |
2760 | U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); |
|
2361 | ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); | |
2761 | U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); |
|
2362 | ||
2762 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); |
|
2363 | /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ | |
2763 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); |
|
|||
2764 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); |
|
|||
2765 | ZSTD_reduceIndex(cctx, correction); |
|
|||
2766 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; |
|
|||
2767 | else ms->nextToUpdate -= correction; |
|
|||
2768 | ms->loadedDictEnd = 0; |
|
|||
2769 | ms->dictMatchState = NULL; |
|
|||
2770 | } |
|
|||
2771 | ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); |
|
|||
2772 | if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; |
|
2364 | if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; | |
2773 |
|
2365 | |||
2774 | { size_t cSize = ZSTD_compressBlock_internal(cctx, |
|
2366 | { size_t cSize = ZSTD_compressBlock_internal(cctx, | |
2775 | op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, |
|
2367 | op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, | |
2776 | ip, blockSize); |
|
2368 | ip, blockSize); | |
2777 | if (ZSTD_isError(cSize)) return cSize; |
|
2369 | FORWARD_IF_ERROR(cSize); | |
2778 |
|
2370 | |||
2779 | if (cSize == 0) { /* block is not compressible */ |
|
2371 | if (cSize == 0) { /* block is not compressible */ | |
2780 | cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); |
|
2372 | cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); | |
2781 | if (ZSTD_isError(cSize)) return cSize; |
|
2373 | FORWARD_IF_ERROR(cSize); | |
2782 | } else { |
|
2374 | } else { | |
2783 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); |
|
2375 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); | |
2784 | MEM_writeLE24(op, cBlockHeader24); |
|
2376 | MEM_writeLE24(op, cBlockHeader24); | |
@@ -2796,7 +2388,7 b' static size_t ZSTD_compress_frameChunk (' | |||||
2796 | } } |
|
2388 | } } | |
2797 |
|
2389 | |||
2798 | if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; |
|
2390 | if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; | |
2799 | return op-ostart; |
|
2391 | return (size_t)(op-ostart); | |
2800 | } |
|
2392 | } | |
2801 |
|
2393 | |||
2802 |
|
2394 | |||
@@ -2811,11 +2403,11 b' static size_t ZSTD_writeFrameHeader(void' | |||||
2811 | BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); |
|
2403 | BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); | |
2812 | U32 const fcsCode = params.fParams.contentSizeFlag ? |
|
2404 | U32 const fcsCode = params.fParams.contentSizeFlag ? | |
2813 | (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ |
|
2405 | (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ | |
2814 | BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); |
|
2406 | BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); | |
2815 | size_t pos=0; |
|
2407 | size_t pos=0; | |
2816 |
|
2408 | |||
2817 | assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); |
|
2409 | assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); | |
2818 |
|
|
2410 | RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); | |
2819 | DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", |
|
2411 | DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", | |
2820 | !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); |
|
2412 | !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); | |
2821 |
|
2413 | |||
@@ -2823,7 +2415,7 b' static size_t ZSTD_writeFrameHeader(void' | |||||
2823 | MEM_writeLE32(dst, ZSTD_MAGICNUMBER); |
|
2415 | MEM_writeLE32(dst, ZSTD_MAGICNUMBER); | |
2824 | pos = 4; |
|
2416 | pos = 4; | |
2825 | } |
|
2417 | } | |
2826 | op[pos++] = frameHeaderDecriptionByte; |
|
2418 | op[pos++] = frameHeaderDescriptionByte; | |
2827 | if (!singleSegment) op[pos++] = windowLogByte; |
|
2419 | if (!singleSegment) op[pos++] = windowLogByte; | |
2828 | switch(dictIDSizeCode) |
|
2420 | switch(dictIDSizeCode) | |
2829 | { |
|
2421 | { | |
@@ -2847,11 +2439,11 b' static size_t ZSTD_writeFrameHeader(void' | |||||
2847 | /* ZSTD_writeLastEmptyBlock() : |
|
2439 | /* ZSTD_writeLastEmptyBlock() : | |
2848 | * output an empty Block with end-of-frame mark to complete a frame |
|
2440 | * output an empty Block with end-of-frame mark to complete a frame | |
2849 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) |
|
2441 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) | |
2850 | * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize) |
|
2442 | * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) | |
2851 | */ |
|
2443 | */ | |
2852 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) |
|
2444 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) | |
2853 | { |
|
2445 | { | |
2854 |
|
|
2446 | RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall); | |
2855 | { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ |
|
2447 | { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ | |
2856 | MEM_writeLE24(dst, cBlockHeader24); |
|
2448 | MEM_writeLE24(dst, cBlockHeader24); | |
2857 | return ZSTD_blockHeaderSize; |
|
2449 | return ZSTD_blockHeaderSize; | |
@@ -2860,10 +2452,9 b' size_t ZSTD_writeLastEmptyBlock(void* ds' | |||||
2860 |
|
2452 | |||
2861 | size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) |
|
2453 | size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) | |
2862 | { |
|
2454 | { | |
2863 |
|
|
2455 | RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong); | |
2864 | return ERROR(stage_wrong); |
|
2456 | RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, | |
2865 | if (cctx->appliedParams.ldmParams.enableLdm) |
|
2457 | parameter_unsupported); | |
2866 | return ERROR(parameter_unsupported); |
|
|||
2867 | cctx->externSeqStore.seq = seq; |
|
2458 | cctx->externSeqStore.seq = seq; | |
2868 | cctx->externSeqStore.size = nbSeq; |
|
2459 | cctx->externSeqStore.size = nbSeq; | |
2869 | cctx->externSeqStore.capacity = nbSeq; |
|
2460 | cctx->externSeqStore.capacity = nbSeq; | |
@@ -2882,12 +2473,14 b' static size_t ZSTD_compressContinue_inte' | |||||
2882 |
|
2473 | |||
2883 | DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", |
|
2474 | DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", | |
2884 | cctx->stage, (unsigned)srcSize); |
|
2475 | cctx->stage, (unsigned)srcSize); | |
2885 | if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ |
|
2476 | RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, | |
|
2477 | "missing init (ZSTD_compressBegin)"); | |||
2886 |
|
2478 | |||
2887 | if (frame && (cctx->stage==ZSTDcs_init)) { |
|
2479 | if (frame && (cctx->stage==ZSTDcs_init)) { | |
2888 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, |
|
2480 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, | |
2889 | cctx->pledgedSrcSizePlusOne-1, cctx->dictID); |
|
2481 | cctx->pledgedSrcSizePlusOne-1, cctx->dictID); | |
2890 | if (ZSTD_isError(fhSize)) return fhSize; |
|
2482 | FORWARD_IF_ERROR(fhSize); | |
|
2483 | assert(fhSize <= dstCapacity); | |||
2891 | dstCapacity -= fhSize; |
|
2484 | dstCapacity -= fhSize; | |
2892 | dst = (char*)dst + fhSize; |
|
2485 | dst = (char*)dst + fhSize; | |
2893 | cctx->stage = ZSTDcs_ongoing; |
|
2486 | cctx->stage = ZSTDcs_ongoing; | |
@@ -2904,35 +2497,25 b' static size_t ZSTD_compressContinue_inte' | |||||
2904 |
|
2497 | |||
2905 | if (!frame) { |
|
2498 | if (!frame) { | |
2906 | /* overflow check and correction for block mode */ |
|
2499 | /* overflow check and correction for block mode */ | |
2907 |
|
|
2500 | ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize); | |
2908 | U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); |
|
|||
2909 | U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src); |
|
|||
2910 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); |
|
|||
2911 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); |
|
|||
2912 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); |
|
|||
2913 | ZSTD_reduceIndex(cctx, correction); |
|
|||
2914 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; |
|
|||
2915 | else ms->nextToUpdate -= correction; |
|
|||
2916 | ms->loadedDictEnd = 0; |
|
|||
2917 | ms->dictMatchState = NULL; |
|
|||
2918 | } |
|
|||
2919 | } |
|
2501 | } | |
2920 |
|
2502 | |||
2921 | DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); |
|
2503 | DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); | |
2922 | { size_t const cSize = frame ? |
|
2504 | { size_t const cSize = frame ? | |
2923 | ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : |
|
2505 | ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : | |
2924 | ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); |
|
2506 | ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); | |
2925 | if (ZSTD_isError(cSize)) return cSize; |
|
2507 | FORWARD_IF_ERROR(cSize); | |
2926 | cctx->consumedSrcSize += srcSize; |
|
2508 | cctx->consumedSrcSize += srcSize; | |
2927 | cctx->producedCSize += (cSize + fhSize); |
|
2509 | cctx->producedCSize += (cSize + fhSize); | |
2928 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); |
|
2510 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); | |
2929 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ |
|
2511 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ | |
2930 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); |
|
2512 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); | |
2931 | if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) { |
|
2513 | RETURN_ERROR_IF( | |
2932 | DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u", |
|
2514 | cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, | |
2933 | (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); |
|
2515 | srcSize_wrong, | |
2934 | return ERROR(srcSize_wrong); |
|
2516 | "error : pledgedSrcSize = %u, while realSrcSize >= %u", | |
2935 | } |
|
2517 | (unsigned)cctx->pledgedSrcSizePlusOne-1, | |
|
2518 | (unsigned)cctx->consumedSrcSize); | |||
2936 | } |
|
2519 | } | |
2937 | return cSize + fhSize; |
|
2520 | return cSize + fhSize; | |
2938 | } |
|
2521 | } | |
@@ -2956,8 +2539,9 b' size_t ZSTD_getBlockSize(const ZSTD_CCtx' | |||||
2956 |
|
2539 | |||
2957 | size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
|
2540 | size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |
2958 | { |
|
2541 | { | |
2959 | size_t const blockSizeMax = ZSTD_getBlockSize(cctx); |
|
2542 | DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); | |
2960 | if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); |
|
2543 | { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); | |
|
2544 | RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); } | |||
2961 |
|
2545 | |||
2962 | return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); |
|
2546 | return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); | |
2963 | } |
|
2547 | } | |
@@ -2970,7 +2554,7 b' static size_t ZSTD_loadDictionaryContent' | |||||
2970 | const void* src, size_t srcSize, |
|
2554 | const void* src, size_t srcSize, | |
2971 | ZSTD_dictTableLoadMethod_e dtlm) |
|
2555 | ZSTD_dictTableLoadMethod_e dtlm) | |
2972 | { |
|
2556 | { | |
2973 |
const BYTE* |
|
2557 | const BYTE* ip = (const BYTE*) src; | |
2974 | const BYTE* const iend = ip + srcSize; |
|
2558 | const BYTE* const iend = ip + srcSize; | |
2975 |
|
2559 | |||
2976 | ZSTD_window_update(&ms->window, src, srcSize); |
|
2560 | ZSTD_window_update(&ms->window, src, srcSize); | |
@@ -2981,32 +2565,42 b' static size_t ZSTD_loadDictionaryContent' | |||||
2981 |
|
2565 | |||
2982 | if (srcSize <= HASH_READ_SIZE) return 0; |
|
2566 | if (srcSize <= HASH_READ_SIZE) return 0; | |
2983 |
|
2567 | |||
2984 | switch(params->cParams.strategy) |
|
2568 | while (iend - ip > HASH_READ_SIZE) { | |
2985 | { |
|
2569 | size_t const remaining = (size_t)(iend - ip); | |
2986 | case ZSTD_fast: |
|
2570 | size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); | |
2987 | ZSTD_fillHashTable(ms, iend, dtlm); |
|
2571 | const BYTE* const ichunk = ip + chunk; | |
2988 | break; |
|
2572 | ||
2989 | case ZSTD_dfast: |
|
2573 | ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk); | |
2990 | ZSTD_fillDoubleHashTable(ms, iend, dtlm); |
|
2574 | ||
2991 | break; |
|
2575 | switch(params->cParams.strategy) | |
2992 |
|
2576 | { | ||
2993 |
case ZSTD_ |
|
2577 | case ZSTD_fast: | |
2994 | case ZSTD_lazy: |
|
2578 | ZSTD_fillHashTable(ms, ichunk, dtlm); | |
2995 | case ZSTD_lazy2: |
|
2579 | break; | |
2996 | if (srcSize >= HASH_READ_SIZE) |
|
2580 | case ZSTD_dfast: | |
2997 | ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); |
|
2581 | ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); | |
2998 | break; |
|
2582 | break; | |
2999 |
|
2583 | |||
3000 | case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ |
|
2584 | case ZSTD_greedy: | |
3001 |
case ZSTD_ |
|
2585 | case ZSTD_lazy: | |
3002 |
case ZSTD_ |
|
2586 | case ZSTD_lazy2: | |
3003 | case ZSTD_btultra2: |
|
2587 | if (chunk >= HASH_READ_SIZE) | |
3004 | if (srcSize >= HASH_READ_SIZE) |
|
2588 | ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); | |
3005 | ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); |
|
2589 | break; | |
3006 | break; |
|
2590 | ||
3007 |
|
2591 | case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ | ||
3008 | default: |
|
2592 | case ZSTD_btopt: | |
3009 | assert(0); /* not possible : not a valid strategy id */ |
|
2593 | case ZSTD_btultra: | |
|
2594 | case ZSTD_btultra2: | |||
|
2595 | if (chunk >= HASH_READ_SIZE) | |||
|
2596 | ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); | |||
|
2597 | break; | |||
|
2598 | ||||
|
2599 | default: | |||
|
2600 | assert(0); /* not possible : not a valid strategy id */ | |||
|
2601 | } | |||
|
2602 | ||||
|
2603 | ip = ichunk; | |||
3010 | } |
|
2604 | } | |
3011 |
|
2605 | |||
3012 | ms->nextToUpdate = (U32)(iend - ms->window.base); |
|
2606 | ms->nextToUpdate = (U32)(iend - ms->window.base); | |
@@ -3020,9 +2614,9 b' static size_t ZSTD_loadDictionaryContent' | |||||
3020 | NOTE: This behavior is not standard and could be improved in the future. */ |
|
2614 | NOTE: This behavior is not standard and could be improved in the future. */ | |
3021 | static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { |
|
2615 | static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { | |
3022 | U32 s; |
|
2616 | U32 s; | |
3023 |
|
|
2617 | RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); | |
3024 | for (s = 0; s <= maxSymbolValue; ++s) { |
|
2618 | for (s = 0; s <= maxSymbolValue; ++s) { | |
3025 |
|
|
2619 | RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); | |
3026 | } |
|
2620 | } | |
3027 | return 0; |
|
2621 | return 0; | |
3028 | } |
|
2622 | } | |
@@ -3060,53 +2654,56 b' static size_t ZSTD_loadZstdDictionary(ZS' | |||||
3060 |
|
2654 | |||
3061 | { unsigned maxSymbolValue = 255; |
|
2655 | { unsigned maxSymbolValue = 255; | |
3062 | size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); |
|
2656 | size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); | |
3063 |
|
|
2657 | RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); | |
3064 |
|
|
2658 | RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); | |
3065 | dictPtr += hufHeaderSize; |
|
2659 | dictPtr += hufHeaderSize; | |
3066 | } |
|
2660 | } | |
3067 |
|
2661 | |||
3068 | { unsigned offcodeLog; |
|
2662 | { unsigned offcodeLog; | |
3069 | size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); |
|
2663 | size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); | |
3070 |
|
|
2664 | RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); | |
3071 |
|
|
2665 | RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); | |
3072 | /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ |
|
2666 | /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ | |
3073 | /* fill all offset symbols to avoid garbage at end of table */ |
|
2667 | /* fill all offset symbols to avoid garbage at end of table */ | |
3074 | CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, |
|
2668 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( | |
3075 | offcodeNCount, MaxOff, offcodeLog, |
|
2669 | bs->entropy.fse.offcodeCTable, | |
3076 | workspace, HUF_WORKSPACE_SIZE), |
|
2670 | offcodeNCount, MaxOff, offcodeLog, | |
3077 | dictionary_corrupted); |
|
2671 | workspace, HUF_WORKSPACE_SIZE)), | |
|
2672 | dictionary_corrupted); | |||
3078 | dictPtr += offcodeHeaderSize; |
|
2673 | dictPtr += offcodeHeaderSize; | |
3079 | } |
|
2674 | } | |
3080 |
|
2675 | |||
3081 | { short matchlengthNCount[MaxML+1]; |
|
2676 | { short matchlengthNCount[MaxML+1]; | |
3082 | unsigned matchlengthMaxValue = MaxML, matchlengthLog; |
|
2677 | unsigned matchlengthMaxValue = MaxML, matchlengthLog; | |
3083 | size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); |
|
2678 | size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); | |
3084 |
|
|
2679 | RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); | |
3085 |
|
|
2680 | RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); | |
3086 | /* Every match length code must have non-zero probability */ |
|
2681 | /* Every match length code must have non-zero probability */ | |
3087 |
|
|
2682 | FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); | |
3088 | CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, |
|
2683 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( | |
3089 | matchlengthNCount, matchlengthMaxValue, matchlengthLog, |
|
2684 | bs->entropy.fse.matchlengthCTable, | |
3090 | workspace, HUF_WORKSPACE_SIZE), |
|
2685 | matchlengthNCount, matchlengthMaxValue, matchlengthLog, | |
3091 | dictionary_corrupted); |
|
2686 | workspace, HUF_WORKSPACE_SIZE)), | |
|
2687 | dictionary_corrupted); | |||
3092 | dictPtr += matchlengthHeaderSize; |
|
2688 | dictPtr += matchlengthHeaderSize; | |
3093 | } |
|
2689 | } | |
3094 |
|
2690 | |||
3095 | { short litlengthNCount[MaxLL+1]; |
|
2691 | { short litlengthNCount[MaxLL+1]; | |
3096 | unsigned litlengthMaxValue = MaxLL, litlengthLog; |
|
2692 | unsigned litlengthMaxValue = MaxLL, litlengthLog; | |
3097 | size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); |
|
2693 | size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); | |
3098 |
|
|
2694 | RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); | |
3099 |
|
|
2695 | RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); | |
3100 | /* Every literal length code must have non-zero probability */ |
|
2696 | /* Every literal length code must have non-zero probability */ | |
3101 |
|
|
2697 | FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); | |
3102 | CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, |
|
2698 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( | |
3103 | litlengthNCount, litlengthMaxValue, litlengthLog, |
|
2699 | bs->entropy.fse.litlengthCTable, | |
3104 | workspace, HUF_WORKSPACE_SIZE), |
|
2700 | litlengthNCount, litlengthMaxValue, litlengthLog, | |
3105 | dictionary_corrupted); |
|
2701 | workspace, HUF_WORKSPACE_SIZE)), | |
|
2702 | dictionary_corrupted); | |||
3106 | dictPtr += litlengthHeaderSize; |
|
2703 | dictPtr += litlengthHeaderSize; | |
3107 | } |
|
2704 | } | |
3108 |
|
2705 | |||
3109 |
|
|
2706 | RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); | |
3110 | bs->rep[0] = MEM_readLE32(dictPtr+0); |
|
2707 | bs->rep[0] = MEM_readLE32(dictPtr+0); | |
3111 | bs->rep[1] = MEM_readLE32(dictPtr+4); |
|
2708 | bs->rep[1] = MEM_readLE32(dictPtr+4); | |
3112 | bs->rep[2] = MEM_readLE32(dictPtr+8); |
|
2709 | bs->rep[2] = MEM_readLE32(dictPtr+8); | |
@@ -3119,19 +2716,19 b' static size_t ZSTD_loadZstdDictionary(ZS' | |||||
3119 | offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ |
|
2716 | offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ | |
3120 | } |
|
2717 | } | |
3121 | /* All offset values <= dictContentSize + 128 KB must be representable */ |
|
2718 | /* All offset values <= dictContentSize + 128 KB must be representable */ | |
3122 |
|
|
2719 | FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); | |
3123 | /* All repCodes must be <= dictContentSize and != 0*/ |
|
2720 | /* All repCodes must be <= dictContentSize and != 0*/ | |
3124 | { U32 u; |
|
2721 | { U32 u; | |
3125 | for (u=0; u<3; u++) { |
|
2722 | for (u=0; u<3; u++) { | |
3126 |
|
|
2723 | RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); | |
3127 |
|
|
2724 | RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); | |
3128 | } } |
|
2725 | } } | |
3129 |
|
2726 | |||
3130 | bs->entropy.huf.repeatMode = HUF_repeat_valid; |
|
2727 | bs->entropy.huf.repeatMode = HUF_repeat_valid; | |
3131 | bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; |
|
2728 | bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; | |
3132 | bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; |
|
2729 | bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; | |
3133 | bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; |
|
2730 | bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; | |
3134 |
|
|
2731 | FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); | |
3135 | return dictID; |
|
2732 | return dictID; | |
3136 | } |
|
2733 | } | |
3137 | } |
|
2734 | } | |
@@ -3161,8 +2758,7 b' ZSTD_compress_insertDictionary(ZSTD_comp' | |||||
3161 | DEBUGLOG(4, "raw content dictionary detected"); |
|
2758 | DEBUGLOG(4, "raw content dictionary detected"); | |
3162 | return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); |
|
2759 | return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); | |
3163 | } |
|
2760 | } | |
3164 |
|
|
2761 | RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); | |
3165 | return ERROR(dictionary_wrong); |
|
|||
3166 | assert(0); /* impossible */ |
|
2762 | assert(0); /* impossible */ | |
3167 | } |
|
2763 | } | |
3168 |
|
2764 | |||
@@ -3189,14 +2785,13 b' static size_t ZSTD_compressBegin_interna' | |||||
3189 | return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); |
|
2785 | return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); | |
3190 | } |
|
2786 | } | |
3191 |
|
2787 | |||
3192 |
|
|
2788 | FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, | |
3193 | ZSTDcrp_continue, zbuff) ); |
|
2789 | ZSTDcrp_continue, zbuff) ); | |
3194 | { |
|
2790 | { size_t const dictID = ZSTD_compress_insertDictionary( | |
3195 | size_t const dictID = ZSTD_compress_insertDictionary( |
|
|||
3196 | cctx->blockState.prevCBlock, &cctx->blockState.matchState, |
|
2791 | cctx->blockState.prevCBlock, &cctx->blockState.matchState, | |
3197 | ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); |
|
2792 | ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); | |
3198 | if (ZSTD_isError(dictID)) return dictID; |
|
2793 | FORWARD_IF_ERROR(dictID); | |
3199 |
assert(dictID <= |
|
2794 | assert(dictID <= UINT_MAX); | |
3200 | cctx->dictID = (U32)dictID; |
|
2795 | cctx->dictID = (U32)dictID; | |
3201 | } |
|
2796 | } | |
3202 | return 0; |
|
2797 | return 0; | |
@@ -3212,7 +2807,7 b' size_t ZSTD_compressBegin_advanced_inter' | |||||
3212 | { |
|
2807 | { | |
3213 | DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); |
|
2808 | DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); | |
3214 | /* compression parameters verification and optimization */ |
|
2809 | /* compression parameters verification and optimization */ | |
3215 |
|
|
2810 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); | |
3216 | return ZSTD_compressBegin_internal(cctx, |
|
2811 | return ZSTD_compressBegin_internal(cctx, | |
3217 | dict, dictSize, dictContentType, dtlm, |
|
2812 | dict, dictSize, dictContentType, dtlm, | |
3218 | cdict, |
|
2813 | cdict, | |
@@ -3260,12 +2855,12 b' static size_t ZSTD_writeEpilogue(ZSTD_CC' | |||||
3260 | size_t fhSize = 0; |
|
2855 | size_t fhSize = 0; | |
3261 |
|
2856 | |||
3262 | DEBUGLOG(4, "ZSTD_writeEpilogue"); |
|
2857 | DEBUGLOG(4, "ZSTD_writeEpilogue"); | |
3263 |
|
|
2858 | RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); | |
3264 |
|
2859 | |||
3265 | /* special case : empty frame */ |
|
2860 | /* special case : empty frame */ | |
3266 | if (cctx->stage == ZSTDcs_init) { |
|
2861 | if (cctx->stage == ZSTDcs_init) { | |
3267 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); |
|
2862 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); | |
3268 | if (ZSTD_isError(fhSize)) return fhSize; |
|
2863 | FORWARD_IF_ERROR(fhSize); | |
3269 | dstCapacity -= fhSize; |
|
2864 | dstCapacity -= fhSize; | |
3270 | op += fhSize; |
|
2865 | op += fhSize; | |
3271 | cctx->stage = ZSTDcs_ongoing; |
|
2866 | cctx->stage = ZSTDcs_ongoing; | |
@@ -3274,7 +2869,7 b' static size_t ZSTD_writeEpilogue(ZSTD_CC' | |||||
3274 | if (cctx->stage != ZSTDcs_ending) { |
|
2869 | if (cctx->stage != ZSTDcs_ending) { | |
3275 | /* write one last empty block, make it the "last" block */ |
|
2870 | /* write one last empty block, make it the "last" block */ | |
3276 | U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; |
|
2871 | U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; | |
3277 |
|
|
2872 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); | |
3278 | MEM_writeLE32(op, cBlockHeader24); |
|
2873 | MEM_writeLE32(op, cBlockHeader24); | |
3279 | op += ZSTD_blockHeaderSize; |
|
2874 | op += ZSTD_blockHeaderSize; | |
3280 | dstCapacity -= ZSTD_blockHeaderSize; |
|
2875 | dstCapacity -= ZSTD_blockHeaderSize; | |
@@ -3282,7 +2877,7 b' static size_t ZSTD_writeEpilogue(ZSTD_CC' | |||||
3282 |
|
2877 | |||
3283 | if (cctx->appliedParams.fParams.checksumFlag) { |
|
2878 | if (cctx->appliedParams.fParams.checksumFlag) { | |
3284 | U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); |
|
2879 | U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); | |
3285 |
|
|
2880 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); | |
3286 | DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); |
|
2881 | DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); | |
3287 | MEM_writeLE32(op, checksum); |
|
2882 | MEM_writeLE32(op, checksum); | |
3288 | op += 4; |
|
2883 | op += 4; | |
@@ -3300,18 +2895,20 b' size_t ZSTD_compressEnd (ZSTD_CCtx* cctx' | |||||
3300 | size_t const cSize = ZSTD_compressContinue_internal(cctx, |
|
2895 | size_t const cSize = ZSTD_compressContinue_internal(cctx, | |
3301 | dst, dstCapacity, src, srcSize, |
|
2896 | dst, dstCapacity, src, srcSize, | |
3302 | 1 /* frame mode */, 1 /* last chunk */); |
|
2897 | 1 /* frame mode */, 1 /* last chunk */); | |
3303 | if (ZSTD_isError(cSize)) return cSize; |
|
2898 | FORWARD_IF_ERROR(cSize); | |
3304 | endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); |
|
2899 | endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); | |
3305 | if (ZSTD_isError(endResult)) return endResult; |
|
2900 | FORWARD_IF_ERROR(endResult); | |
3306 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); |
|
2901 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); | |
3307 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ |
|
2902 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ | |
3308 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); |
|
2903 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); | |
3309 | DEBUGLOG(4, "end of frame : controlling src size"); |
|
2904 | DEBUGLOG(4, "end of frame : controlling src size"); | |
3310 | if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) { |
|
2905 | RETURN_ERROR_IF( | |
3311 | DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u", |
|
2906 | cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, | |
3312 | (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); |
|
2907 | srcSize_wrong, | |
3313 | return ERROR(srcSize_wrong); |
|
2908 | "error : pledgedSrcSize = %u, while realSrcSize = %u", | |
3314 | } } |
|
2909 | (unsigned)cctx->pledgedSrcSizePlusOne-1, | |
|
2910 | (unsigned)cctx->consumedSrcSize); | |||
|
2911 | } | |||
3315 | return cSize + endResult; |
|
2912 | return cSize + endResult; | |
3316 | } |
|
2913 | } | |
3317 |
|
2914 | |||
@@ -3339,7 +2936,7 b' size_t ZSTD_compress_advanced (ZSTD_CCtx' | |||||
3339 | ZSTD_parameters params) |
|
2936 | ZSTD_parameters params) | |
3340 | { |
|
2937 | { | |
3341 | DEBUGLOG(4, "ZSTD_compress_advanced"); |
|
2938 | DEBUGLOG(4, "ZSTD_compress_advanced"); | |
3342 |
|
|
2939 | FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); | |
3343 | return ZSTD_compress_internal(cctx, |
|
2940 | return ZSTD_compress_internal(cctx, | |
3344 | dst, dstCapacity, |
|
2941 | dst, dstCapacity, | |
3345 | src, srcSize, |
|
2942 | src, srcSize, | |
@@ -3356,7 +2953,7 b' size_t ZSTD_compress_advanced_internal(' | |||||
3356 | ZSTD_CCtx_params params) |
|
2953 | ZSTD_CCtx_params params) | |
3357 | { |
|
2954 | { | |
3358 | DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); |
|
2955 | DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); | |
3359 |
|
|
2956 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, | |
3360 | dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, |
|
2957 | dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, | |
3361 | params, srcSize, ZSTDb_not_buffered) ); |
|
2958 | params, srcSize, ZSTDb_not_buffered) ); | |
3362 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); |
|
2959 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); | |
@@ -3440,17 +3037,17 b' static size_t ZSTD_initCDict_internal(' | |||||
3440 | void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); |
|
3037 | void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); | |
3441 | cdict->dictBuffer = internalBuffer; |
|
3038 | cdict->dictBuffer = internalBuffer; | |
3442 | cdict->dictContent = internalBuffer; |
|
3039 | cdict->dictContent = internalBuffer; | |
3443 |
|
|
3040 | RETURN_ERROR_IF(!internalBuffer, memory_allocation); | |
3444 | memcpy(internalBuffer, dictBuffer, dictSize); |
|
3041 | memcpy(internalBuffer, dictBuffer, dictSize); | |
3445 | } |
|
3042 | } | |
3446 | cdict->dictContentSize = dictSize; |
|
3043 | cdict->dictContentSize = dictSize; | |
3447 |
|
3044 | |||
3448 | /* Reset the state to no dictionary */ |
|
3045 | /* Reset the state to no dictionary */ | |
3449 | ZSTD_reset_compressedBlockState(&cdict->cBlockState); |
|
3046 | ZSTD_reset_compressedBlockState(&cdict->cBlockState); | |
3450 | { void* const end = ZSTD_reset_matchState( |
|
3047 | { void* const end = ZSTD_reset_matchState(&cdict->matchState, | |
3451 | &cdict->matchState, |
|
3048 | (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, | |
3452 | (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, |
|
3049 | &cParams, | |
3453 |
|
|
3050 | ZSTDcrp_continue, ZSTD_resetTarget_CDict); | |
3454 | assert(end == (char*)cdict->workspace + cdict->workspaceSize); |
|
3051 | assert(end == (char*)cdict->workspace + cdict->workspaceSize); | |
3455 | (void)end; |
|
3052 | (void)end; | |
3456 | } |
|
3053 | } | |
@@ -3466,7 +3063,7 b' static size_t ZSTD_initCDict_internal(' | |||||
3466 | &cdict->cBlockState, &cdict->matchState, ¶ms, |
|
3063 | &cdict->cBlockState, &cdict->matchState, ¶ms, | |
3467 | cdict->dictContent, cdict->dictContentSize, |
|
3064 | cdict->dictContent, cdict->dictContentSize, | |
3468 | dictContentType, ZSTD_dtlm_full, cdict->workspace); |
|
3065 | dictContentType, ZSTD_dtlm_full, cdict->workspace); | |
3469 | if (ZSTD_isError(dictID)) return dictID; |
|
3066 | FORWARD_IF_ERROR(dictID); | |
3470 | assert(dictID <= (size_t)(U32)-1); |
|
3067 | assert(dictID <= (size_t)(U32)-1); | |
3471 | cdict->dictID = (U32)dictID; |
|
3068 | cdict->dictID = (U32)dictID; | |
3472 | } |
|
3069 | } | |
@@ -3596,7 +3193,7 b' size_t ZSTD_compressBegin_usingCDict_adv' | |||||
3596 | ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) |
|
3193 | ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) | |
3597 | { |
|
3194 | { | |
3598 | DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); |
|
3195 | DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); | |
3599 |
|
|
3196 | RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); | |
3600 | { ZSTD_CCtx_params params = cctx->requestedParams; |
|
3197 | { ZSTD_CCtx_params params = cctx->requestedParams; | |
3601 | params.cParams = ZSTD_getCParamsFromCDict(cdict); |
|
3198 | params.cParams = ZSTD_getCParamsFromCDict(cdict); | |
3602 | /* Increase window log to fit the entire dictionary and source if the |
|
3199 | /* Increase window log to fit the entire dictionary and source if the | |
@@ -3632,7 +3229,7 b' size_t ZSTD_compress_usingCDict_advanced' | |||||
3632 | const void* src, size_t srcSize, |
|
3229 | const void* src, size_t srcSize, | |
3633 | const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) |
|
3230 | const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) | |
3634 | { |
|
3231 | { | |
3635 |
|
|
3232 | FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ | |
3636 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); |
|
3233 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); | |
3637 | } |
|
3234 | } | |
3638 |
|
3235 | |||
@@ -3700,7 +3297,7 b' static size_t ZSTD_resetCStream_internal' | |||||
3700 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); |
|
3297 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); | |
3701 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
|
3298 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ | |
3702 |
|
3299 | |||
3703 |
|
|
3300 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, | |
3704 | dict, dictSize, dictContentType, ZSTD_dtlm_fast, |
|
3301 | dict, dictSize, dictContentType, ZSTD_dtlm_fast, | |
3705 | cdict, |
|
3302 | cdict, | |
3706 | params, pledgedSrcSize, |
|
3303 | params, pledgedSrcSize, | |
@@ -3718,13 +3315,17 b' static size_t ZSTD_resetCStream_internal' | |||||
3718 |
|
3315 | |||
3719 | /* ZSTD_resetCStream(): |
|
3316 | /* ZSTD_resetCStream(): | |
3720 | * pledgedSrcSize == 0 means "unknown" */ |
|
3317 | * pledgedSrcSize == 0 means "unknown" */ | |
3721 |
size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long p |
|
3318 | size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) | |
3722 | { |
|
3319 | { | |
3723 | ZSTD_CCtx_params params = zcs->requestedParams; |
|
3320 | /* temporary : 0 interpreted as "unknown" during transition period. | |
|
3321 | * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. | |||
|
3322 | * 0 will be interpreted as "empty" in the future. | |||
|
3323 | */ | |||
|
3324 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; | |||
3724 | DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); |
|
3325 | DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); | |
3725 | if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
3326 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
3726 | params.fParams.contentSizeFlag = 1; |
|
3327 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |
3727 | return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); |
|
3328 | return 0; | |
3728 | } |
|
3329 | } | |
3729 |
|
3330 | |||
3730 | /*! ZSTD_initCStream_internal() : |
|
3331 | /*! ZSTD_initCStream_internal() : | |
@@ -3736,32 +3337,18 b' size_t ZSTD_initCStream_internal(ZSTD_CS' | |||||
3736 | ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) |
|
3337 | ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) | |
3737 | { |
|
3338 | { | |
3738 | DEBUGLOG(4, "ZSTD_initCStream_internal"); |
|
3339 | DEBUGLOG(4, "ZSTD_initCStream_internal"); | |
3739 | params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); |
|
3340 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3341 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |||
3740 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); |
|
3342 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); | |
|
3343 | zcs->requestedParams = params; | |||
3741 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
|
3344 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ | |
3742 |
|
3345 | if (dict) { | ||
3743 | if (dict && dictSize >= 8) { |
|
3346 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); | |
3744 | DEBUGLOG(4, "loading dictionary of size %u", (unsigned)dictSize); |
|
|||
3745 | if (zcs->staticSize) { /* static CCtx : never uses malloc */ |
|
|||
3746 | /* incompatible with internal cdict creation */ |
|
|||
3747 | return ERROR(memory_allocation); |
|
|||
3748 | } |
|
|||
3749 | ZSTD_freeCDict(zcs->cdictLocal); |
|
|||
3750 | zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, |
|
|||
3751 | ZSTD_dlm_byCopy, ZSTD_dct_auto, |
|
|||
3752 | params.cParams, zcs->customMem); |
|
|||
3753 | zcs->cdict = zcs->cdictLocal; |
|
|||
3754 | if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); |
|
|||
3755 | } else { |
|
3347 | } else { | |
3756 | if (cdict) { |
|
3348 | /* Dictionary is cleared if !cdict */ | |
3757 | params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict; it includes windowLog */ |
|
3349 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); | |
3758 | } |
|
|||
3759 | ZSTD_freeCDict(zcs->cdictLocal); |
|
|||
3760 | zcs->cdictLocal = NULL; |
|
|||
3761 | zcs->cdict = cdict; |
|
|||
3762 | } |
|
3350 | } | |
3763 |
|
3351 | return 0; | ||
3764 | return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); |
|
|||
3765 | } |
|
3352 | } | |
3766 |
|
3353 | |||
3767 | /* ZSTD_initCStream_usingCDict_advanced() : |
|
3354 | /* ZSTD_initCStream_usingCDict_advanced() : | |
@@ -3772,22 +3359,20 b' size_t ZSTD_initCStream_usingCDict_advan' | |||||
3772 | unsigned long long pledgedSrcSize) |
|
3359 | unsigned long long pledgedSrcSize) | |
3773 | { |
|
3360 | { | |
3774 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); |
|
3361 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); | |
3775 | if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ |
|
3362 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
3776 | { ZSTD_CCtx_params params = zcs->requestedParams; |
|
3363 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |
3777 | params.cParams = ZSTD_getCParamsFromCDict(cdict); |
|
3364 | zcs->requestedParams.fParams = fParams; | |
3778 | params.fParams = fParams; |
|
3365 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); | |
3779 | return ZSTD_initCStream_internal(zcs, |
|
3366 | return 0; | |
3780 | NULL, 0, cdict, |
|
|||
3781 | params, pledgedSrcSize); |
|
|||
3782 | } |
|
|||
3783 | } |
|
3367 | } | |
3784 |
|
3368 | |||
3785 | /* note : cdict must outlive compression session */ |
|
3369 | /* note : cdict must outlive compression session */ | |
3786 | size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) |
|
3370 | size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) | |
3787 | { |
|
3371 | { | |
3788 | ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; |
|
|||
3789 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); |
|
3372 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); | |
3790 | return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ |
|
3373 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3374 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); | |||
|
3375 | return 0; | |||
3791 | } |
|
3376 | } | |
3792 |
|
3377 | |||
3793 |
|
3378 | |||
@@ -3797,33 +3382,53 b' size_t ZSTD_initCStream_usingCDict(ZSTD_' | |||||
3797 | * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ |
|
3382 | * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ | |
3798 | size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, |
|
3383 | size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, | |
3799 | const void* dict, size_t dictSize, |
|
3384 | const void* dict, size_t dictSize, | |
3800 |
ZSTD_parameters params, unsigned long long p |
|
3385 | ZSTD_parameters params, unsigned long long pss) | |
3801 | { |
|
3386 | { | |
3802 | DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", |
|
3387 | /* for compatibility with older programs relying on this behavior. | |
3803 | (unsigned)pledgedSrcSize, params.fParams.contentSizeFlag); |
|
3388 | * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. | |
3804 | CHECK_F( ZSTD_checkCParams(params.cParams) ); |
|
3389 | * This line will be removed in the future. | |
3805 | if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ |
|
3390 | */ | |
|
3391 | U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; | |||
|
3392 | DEBUGLOG(4, "ZSTD_initCStream_advanced"); | |||
|
3393 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |||
|
3394 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |||
|
3395 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); | |||
3806 | zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); |
|
3396 | zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); | |
3807 | return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, zcs->requestedParams, pledgedSrcSize); |
|
3397 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); | |
|
3398 | return 0; | |||
3808 | } |
|
3399 | } | |
3809 |
|
3400 | |||
3810 | size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) |
|
3401 | size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) | |
3811 | { |
|
3402 | { | |
3812 | ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel); |
|
3403 | DEBUGLOG(4, "ZSTD_initCStream_usingDict"); | |
3813 | return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, zcs->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN); |
|
3404 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3405 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); | |||
|
3406 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); | |||
|
3407 | return 0; | |||
3814 | } |
|
3408 | } | |
3815 |
|
3409 | |||
3816 | size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) |
|
3410 | size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) | |
3817 | { |
|
3411 | { | |
3818 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */ |
|
3412 | /* temporary : 0 interpreted as "unknown" during transition period. | |
3819 | ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel); |
|
3413 | * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. | |
3820 | return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, zcs->requestedParams, pledgedSrcSize); |
|
3414 | * 0 will be interpreted as "empty" in the future. | |
|
3415 | */ | |||
|
3416 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; | |||
|
3417 | DEBUGLOG(4, "ZSTD_initCStream_srcSize"); | |||
|
3418 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |||
|
3419 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); | |||
|
3420 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); | |||
|
3421 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |||
|
3422 | return 0; | |||
3821 | } |
|
3423 | } | |
3822 |
|
3424 | |||
3823 | size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) |
|
3425 | size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) | |
3824 | { |
|
3426 | { | |
3825 | DEBUGLOG(4, "ZSTD_initCStream"); |
|
3427 | DEBUGLOG(4, "ZSTD_initCStream"); | |
3826 | return ZSTD_initCStream_srcSize(zcs, compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN); |
|
3428 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3429 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); | |||
|
3430 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); | |||
|
3431 | return 0; | |||
3827 | } |
|
3432 | } | |
3828 |
|
3433 | |||
3829 | /*====== Compression ======*/ |
|
3434 | /*====== Compression ======*/ | |
@@ -3847,10 +3452,10 b' static size_t ZSTD_limitCopy(void* dst, ' | |||||
3847 | * internal function for all *compressStream*() variants |
|
3452 | * internal function for all *compressStream*() variants | |
3848 | * non-static, because can be called from zstdmt_compress.c |
|
3453 | * non-static, because can be called from zstdmt_compress.c | |
3849 | * @return : hint size for next input */ |
|
3454 | * @return : hint size for next input */ | |
3850 | size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, |
|
3455 | static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, | |
3851 | ZSTD_outBuffer* output, |
|
3456 | ZSTD_outBuffer* output, | |
3852 | ZSTD_inBuffer* input, |
|
3457 | ZSTD_inBuffer* input, | |
3853 | ZSTD_EndDirective const flushMode) |
|
3458 | ZSTD_EndDirective const flushMode) | |
3854 | { |
|
3459 | { | |
3855 | const char* const istart = (const char*)input->src; |
|
3460 | const char* const istart = (const char*)input->src; | |
3856 | const char* const iend = istart + input->size; |
|
3461 | const char* const iend = istart + input->size; | |
@@ -3873,8 +3478,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||||
3873 | switch(zcs->streamStage) |
|
3478 | switch(zcs->streamStage) | |
3874 | { |
|
3479 | { | |
3875 | case zcss_init: |
|
3480 | case zcss_init: | |
3876 |
|
|
3481 | RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); | |
3877 | return ERROR(init_missing); |
|
|||
3878 |
|
3482 | |||
3879 | case zcss_load: |
|
3483 | case zcss_load: | |
3880 | if ( (flushMode == ZSTD_e_end) |
|
3484 | if ( (flushMode == ZSTD_e_end) | |
@@ -3884,7 +3488,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||||
3884 | size_t const cSize = ZSTD_compressEnd(zcs, |
|
3488 | size_t const cSize = ZSTD_compressEnd(zcs, | |
3885 | op, oend-op, ip, iend-ip); |
|
3489 | op, oend-op, ip, iend-ip); | |
3886 | DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); |
|
3490 | DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); | |
3887 | if (ZSTD_isError(cSize)) return cSize; |
|
3491 | FORWARD_IF_ERROR(cSize); | |
3888 | ip = iend; |
|
3492 | ip = iend; | |
3889 | op += cSize; |
|
3493 | op += cSize; | |
3890 | zcs->frameEnded = 1; |
|
3494 | zcs->frameEnded = 1; | |
@@ -3925,7 +3529,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||||
3925 | zcs->inBuff + zcs->inToCompress, iSize) : |
|
3529 | zcs->inBuff + zcs->inToCompress, iSize) : | |
3926 | ZSTD_compressContinue(zcs, cDst, oSize, |
|
3530 | ZSTD_compressContinue(zcs, cDst, oSize, | |
3927 | zcs->inBuff + zcs->inToCompress, iSize); |
|
3531 | zcs->inBuff + zcs->inToCompress, iSize); | |
3928 | if (ZSTD_isError(cSize)) return cSize; |
|
3532 | FORWARD_IF_ERROR(cSize); | |
3929 | zcs->frameEnded = lastBlock; |
|
3533 | zcs->frameEnded = lastBlock; | |
3930 | /* prepare next block */ |
|
3534 | /* prepare next block */ | |
3931 | zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; |
|
3535 | zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; | |
@@ -3953,7 +3557,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||||
3953 | case zcss_flush: |
|
3557 | case zcss_flush: | |
3954 | DEBUGLOG(5, "flush stage"); |
|
3558 | DEBUGLOG(5, "flush stage"); | |
3955 | { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; |
|
3559 | { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; | |
3956 | size_t const flushed = ZSTD_limitCopy(op, oend-op, |
|
3560 | size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), | |
3957 | zcs->outBuff + zcs->outBuffFlushedSize, toFlush); |
|
3561 | zcs->outBuff + zcs->outBuffFlushedSize, toFlush); | |
3958 | DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", |
|
3562 | DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", | |
3959 | (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); |
|
3563 | (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); | |
@@ -4001,7 +3605,7 b' static size_t ZSTD_nextInputSizeHint_MTo' | |||||
4001 |
|
3605 | |||
4002 | size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) |
|
3606 | size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) | |
4003 | { |
|
3607 | { | |
4004 |
|
|
3608 | FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); | |
4005 | return ZSTD_nextInputSizeHint_MTorST(zcs); |
|
3609 | return ZSTD_nextInputSizeHint_MTorST(zcs); | |
4006 | } |
|
3610 | } | |
4007 |
|
3611 | |||
@@ -4013,14 +3617,15 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||||
4013 | { |
|
3617 | { | |
4014 | DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); |
|
3618 | DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); | |
4015 | /* check conditions */ |
|
3619 | /* check conditions */ | |
4016 |
|
|
3620 | RETURN_ERROR_IF(output->pos > output->size, GENERIC); | |
4017 |
|
|
3621 | RETURN_ERROR_IF(input->pos > input->size, GENERIC); | |
4018 | assert(cctx!=NULL); |
|
3622 | assert(cctx!=NULL); | |
4019 |
|
3623 | |||
4020 | /* transparent initialization stage */ |
|
3624 | /* transparent initialization stage */ | |
4021 | if (cctx->streamStage == zcss_init) { |
|
3625 | if (cctx->streamStage == zcss_init) { | |
4022 | ZSTD_CCtx_params params = cctx->requestedParams; |
|
3626 | ZSTD_CCtx_params params = cctx->requestedParams; | |
4023 | ZSTD_prefixDict const prefixDict = cctx->prefixDict; |
|
3627 | ZSTD_prefixDict const prefixDict = cctx->prefixDict; | |
|
3628 | FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ | |||
4024 | memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ |
|
3629 | memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ | |
4025 | assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ |
|
3630 | assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ | |
4026 | DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); |
|
3631 | DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); | |
@@ -4039,11 +3644,11 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||||
4039 | DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", |
|
3644 | DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", | |
4040 | params.nbWorkers); |
|
3645 | params.nbWorkers); | |
4041 | cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); |
|
3646 | cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); | |
4042 |
|
|
3647 | RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); | |
4043 | } |
|
3648 | } | |
4044 | /* mt compression */ |
|
3649 | /* mt compression */ | |
4045 | DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); |
|
3650 | DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); | |
4046 |
|
|
3651 | FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( | |
4047 | cctx->mtctx, |
|
3652 | cctx->mtctx, | |
4048 | prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, |
|
3653 | prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, | |
4049 | cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); |
|
3654 | cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); | |
@@ -4051,7 +3656,7 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||||
4051 | cctx->appliedParams.nbWorkers = params.nbWorkers; |
|
3656 | cctx->appliedParams.nbWorkers = params.nbWorkers; | |
4052 | } else |
|
3657 | } else | |
4053 | #endif |
|
3658 | #endif | |
4054 |
{ |
|
3659 | { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, | |
4055 | prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, |
|
3660 | prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, | |
4056 | cctx->cdict, |
|
3661 | cctx->cdict, | |
4057 | params, cctx->pledgedSrcSizePlusOne-1) ); |
|
3662 | params, cctx->pledgedSrcSizePlusOne-1) ); | |
@@ -4063,20 +3668,30 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||||
4063 | /* compression stage */ |
|
3668 | /* compression stage */ | |
4064 | #ifdef ZSTD_MULTITHREAD |
|
3669 | #ifdef ZSTD_MULTITHREAD | |
4065 | if (cctx->appliedParams.nbWorkers > 0) { |
|
3670 | if (cctx->appliedParams.nbWorkers > 0) { | |
|
3671 | int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); | |||
|
3672 | size_t flushMin; | |||
|
3673 | assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); | |||
4066 | if (cctx->cParamsChanged) { |
|
3674 | if (cctx->cParamsChanged) { | |
4067 | ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); |
|
3675 | ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); | |
4068 | cctx->cParamsChanged = 0; |
|
3676 | cctx->cParamsChanged = 0; | |
4069 | } |
|
3677 | } | |
4070 | { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); |
|
3678 | do { | |
|
3679 | flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); | |||
4071 | if ( ZSTD_isError(flushMin) |
|
3680 | if ( ZSTD_isError(flushMin) | |
4072 | || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ |
|
3681 | || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ | |
4073 | ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); |
|
3682 | ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); | |
4074 | } |
|
3683 | } | |
4075 | DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); |
|
3684 | FORWARD_IF_ERROR(flushMin); | |
4076 | return flushMin; |
|
3685 | } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); | |
4077 | } } |
|
3686 | DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); | |
|
3687 | /* Either we don't require maximum forward progress, we've finished the | |||
|
3688 | * flush, or we are out of output space. | |||
|
3689 | */ | |||
|
3690 | assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); | |||
|
3691 | return flushMin; | |||
|
3692 | } | |||
4078 | #endif |
|
3693 | #endif | |
4079 |
|
|
3694 | FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); | |
4080 | DEBUGLOG(5, "completed ZSTD_compressStream2"); |
|
3695 | DEBUGLOG(5, "completed ZSTD_compressStream2"); | |
4081 | return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ |
|
3696 | return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ | |
4082 | } |
|
3697 | } | |
@@ -4107,10 +3722,10 b' size_t ZSTD_compress2(ZSTD_CCtx* cctx,' | |||||
4107 | dst, dstCapacity, &oPos, |
|
3722 | dst, dstCapacity, &oPos, | |
4108 | src, srcSize, &iPos, |
|
3723 | src, srcSize, &iPos, | |
4109 | ZSTD_e_end); |
|
3724 | ZSTD_e_end); | |
4110 | if (ZSTD_isError(result)) return result; |
|
3725 | FORWARD_IF_ERROR(result); | |
4111 | if (result != 0) { /* compression not completed, due to lack of output space */ |
|
3726 | if (result != 0) { /* compression not completed, due to lack of output space */ | |
4112 | assert(oPos == dstCapacity); |
|
3727 | assert(oPos == dstCapacity); | |
4113 |
|
|
3728 | RETURN_ERROR(dstSize_tooSmall); | |
4114 | } |
|
3729 | } | |
4115 | assert(iPos == srcSize); /* all input is expected consumed */ |
|
3730 | assert(iPos == srcSize); /* all input is expected consumed */ | |
4116 | return oPos; |
|
3731 | return oPos; | |
@@ -4132,11 +3747,11 b' size_t ZSTD_endStream(ZSTD_CStream* zcs,' | |||||
4132 | { |
|
3747 | { | |
4133 | ZSTD_inBuffer input = { NULL, 0, 0 }; |
|
3748 | ZSTD_inBuffer input = { NULL, 0, 0 }; | |
4134 | size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); |
|
3749 | size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); | |
4135 |
|
|
3750 | FORWARD_IF_ERROR( remainingToFlush ); | |
4136 | if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ |
|
3751 | if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ | |
4137 | /* single thread mode : attempt to calculate remaining to flush more precisely */ |
|
3752 | /* single thread mode : attempt to calculate remaining to flush more precisely */ | |
4138 | { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; |
|
3753 | { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; | |
4139 | size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; |
|
3754 | size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); | |
4140 | size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; |
|
3755 | size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; | |
4141 | DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); |
|
3756 | DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); | |
4142 | return toFlush; |
|
3757 | return toFlush; | |
@@ -4151,7 +3766,7 b' int ZSTD_maxCLevel(void) { return ZSTD_M' | |||||
4151 | int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } |
|
3766 | int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } | |
4152 |
|
3767 | |||
4153 | static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { |
|
3768 | static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { | |
4154 | { /* "default" - guarantees a monotonically increasing memory budget */ |
|
3769 | { /* "default" - for any srcSize > 256 KB */ | |
4155 | /* W, C, H, S, L, TL, strat */ |
|
3770 | /* W, C, H, S, L, TL, strat */ | |
4156 | { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ |
|
3771 | { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ | |
4157 | { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ |
|
3772 | { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ | |
@@ -4258,13 +3873,13 b' static const ZSTD_compressionParameters ' | |||||
4258 | }; |
|
3873 | }; | |
4259 |
|
3874 | |||
4260 | /*! ZSTD_getCParams() : |
|
3875 | /*! ZSTD_getCParams() : | |
4261 |
* |
|
3876 | * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. | |
4262 |
* |
|
3877 | * Size values are optional, provide 0 if not known or unused */ | |
4263 | ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) |
|
3878 | ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) | |
4264 | { |
|
3879 | { | |
4265 | size_t const addedSize = srcSizeHint ? 0 : 500; |
|
3880 | size_t const addedSize = srcSizeHint ? 0 : 500; | |
4266 |
U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : |
|
3881 | U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ | |
4267 |
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); |
|
3882 | U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); | |
4268 | int row = compressionLevel; |
|
3883 | int row = compressionLevel; | |
4269 | DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); |
|
3884 | DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); | |
4270 | if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ |
|
3885 | if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ | |
@@ -4272,13 +3887,14 b' ZSTD_compressionParameters ZSTD_getCPara' | |||||
4272 | if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; |
|
3887 | if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; | |
4273 | { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; |
|
3888 | { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; | |
4274 | if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ |
|
3889 | if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ | |
4275 | return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); |
|
3890 | return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ | |
4276 | } |
|
3891 | } | |
4277 | } |
|
3892 | } | |
4278 |
|
3893 | |||
4279 | /*! ZSTD_getParams() : |
|
3894 | /*! ZSTD_getParams() : | |
4280 | * same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). |
|
3895 | * same idea as ZSTD_getCParams() | |
4281 | * All fields of `ZSTD_frameParameters` are set to default (0) */ |
|
3896 | * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). | |
|
3897 | * Fields of `ZSTD_frameParameters` are set to default values */ | |||
4282 | ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { |
|
3898 | ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { | |
4283 | ZSTD_parameters params; |
|
3899 | ZSTD_parameters params; | |
4284 | ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); |
|
3900 | ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); |
@@ -33,13 +33,13 b' extern "C" {' | |||||
33 | ***************************************/ |
|
33 | ***************************************/ | |
34 | #define kSearchStrength 8 |
|
34 | #define kSearchStrength 8 | |
35 | #define HASH_READ_SIZE 8 |
|
35 | #define HASH_READ_SIZE 8 | |
36 |
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 |
|
36 | #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". | |
37 | It could be confused for a real successor at index "1", if sorted as larger than its predecessor. |
|
37 | It could be confused for a real successor at index "1", if sorted as larger than its predecessor. | |
38 | It's not a big deal though : candidate will just be sorted again. |
|
38 | It's not a big deal though : candidate will just be sorted again. | |
39 |
Addition |
|
39 | Additionally, candidate position 1 will be lost. | |
40 | But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. |
|
40 | But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. | |
41 |
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mis |
|
41 | The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. | |
42 |
|
|
42 | This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ | |
43 |
|
43 | |||
44 |
|
44 | |||
45 | /*-************************************* |
|
45 | /*-************************************* | |
@@ -55,6 +55,14 b' typedef struct ZSTD_prefixDict_s {' | |||||
55 | } ZSTD_prefixDict; |
|
55 | } ZSTD_prefixDict; | |
56 |
|
56 | |||
57 | typedef struct { |
|
57 | typedef struct { | |
|
58 | void* dictBuffer; | |||
|
59 | void const* dict; | |||
|
60 | size_t dictSize; | |||
|
61 | ZSTD_dictContentType_e dictContentType; | |||
|
62 | ZSTD_CDict* cdict; | |||
|
63 | } ZSTD_localDict; | |||
|
64 | ||||
|
65 | typedef struct { | |||
58 | U32 CTable[HUF_CTABLE_SIZE_U32(255)]; |
|
66 | U32 CTable[HUF_CTABLE_SIZE_U32(255)]; | |
59 | HUF_repeat repeatMode; |
|
67 | HUF_repeat repeatMode; | |
60 | } ZSTD_hufCTables_t; |
|
68 | } ZSTD_hufCTables_t; | |
@@ -107,6 +115,7 b' typedef struct {' | |||||
107 | U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ |
|
115 | U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ | |
108 | ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ |
|
116 | ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ | |
109 | const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ |
|
117 | const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ | |
|
118 | ZSTD_literalCompressionMode_e literalCompressionMode; | |||
110 | } optState_t; |
|
119 | } optState_t; | |
111 |
|
120 | |||
112 | typedef struct { |
|
121 | typedef struct { | |
@@ -119,21 +128,26 b' typedef struct {' | |||||
119 | BYTE const* base; /* All regular indexes relative to this position */ |
|
128 | BYTE const* base; /* All regular indexes relative to this position */ | |
120 | BYTE const* dictBase; /* extDict indexes relative to this position */ |
|
129 | BYTE const* dictBase; /* extDict indexes relative to this position */ | |
121 | U32 dictLimit; /* below that point, need extDict */ |
|
130 | U32 dictLimit; /* below that point, need extDict */ | |
122 | U32 lowLimit; /* below that point, no more data */ |
|
131 | U32 lowLimit; /* below that point, no more valid data */ | |
123 | } ZSTD_window_t; |
|
132 | } ZSTD_window_t; | |
124 |
|
133 | |||
125 | typedef struct ZSTD_matchState_t ZSTD_matchState_t; |
|
134 | typedef struct ZSTD_matchState_t ZSTD_matchState_t; | |
126 | struct ZSTD_matchState_t { |
|
135 | struct ZSTD_matchState_t { | |
127 | ZSTD_window_t window; /* State for window round buffer management */ |
|
136 | ZSTD_window_t window; /* State for window round buffer management */ | |
128 |
U32 loadedDictEnd; /* index of end of dictionary |
|
137 | U32 loadedDictEnd; /* index of end of dictionary, within context's referential. | |
|
138 | * When loadedDictEnd != 0, a dictionary is in use, and still valid. | |||
|
139 | * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. | |||
|
140 | * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). | |||
|
141 | * When dict referential is copied into active context (i.e. not attached), | |||
|
142 | * loadedDictEnd == dictSize, since referential starts from zero. | |||
|
143 | */ | |||
129 | U32 nextToUpdate; /* index from which to continue table update */ |
|
144 | U32 nextToUpdate; /* index from which to continue table update */ | |
130 | U32 nextToUpdate3; /* index from which to continue table update */ |
|
145 | U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ | |
131 | U32 hashLog3; /* dispatch table : larger == faster, more memory */ |
|
|||
132 | U32* hashTable; |
|
146 | U32* hashTable; | |
133 | U32* hashTable3; |
|
147 | U32* hashTable3; | |
134 | U32* chainTable; |
|
148 | U32* chainTable; | |
135 | optState_t opt; /* optimal parser state */ |
|
149 | optState_t opt; /* optimal parser state */ | |
136 |
const ZSTD_matchState_t |
|
150 | const ZSTD_matchState_t* dictMatchState; | |
137 | ZSTD_compressionParameters cParams; |
|
151 | ZSTD_compressionParameters cParams; | |
138 | }; |
|
152 | }; | |
139 |
|
153 | |||
@@ -186,8 +200,12 b' struct ZSTD_CCtx_params_s {' | |||||
186 | int compressionLevel; |
|
200 | int compressionLevel; | |
187 | int forceWindow; /* force back-references to respect limit of |
|
201 | int forceWindow; /* force back-references to respect limit of | |
188 | * 1<<wLog, even for dictionary */ |
|
202 | * 1<<wLog, even for dictionary */ | |
|
203 | size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. | |||
|
204 | * No target when targetCBlockSize == 0. | |||
|
205 | * There is no guarantee on compressed block size */ | |||
189 |
|
206 | |||
190 | ZSTD_dictAttachPref_e attachDictPref; |
|
207 | ZSTD_dictAttachPref_e attachDictPref; | |
|
208 | ZSTD_literalCompressionMode_e literalCompressionMode; | |||
191 |
|
209 | |||
192 | /* Multithreading: used to pass parameters to mtctx */ |
|
210 | /* Multithreading: used to pass parameters to mtctx */ | |
193 | int nbWorkers; |
|
211 | int nbWorkers; | |
@@ -243,7 +261,7 b' struct ZSTD_CCtx_s {' | |||||
243 | U32 frameEnded; |
|
261 | U32 frameEnded; | |
244 |
|
262 | |||
245 | /* Dictionary */ |
|
263 | /* Dictionary */ | |
246 |
ZSTD_ |
|
264 | ZSTD_localDict localDict; | |
247 | const ZSTD_CDict* cdict; |
|
265 | const ZSTD_CDict* cdict; | |
248 | ZSTD_prefixDict prefixDict; /* single-usage dictionary */ |
|
266 | ZSTD_prefixDict prefixDict; /* single-usage dictionary */ | |
249 |
|
267 | |||
@@ -295,6 +313,30 b' MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)' | |||||
295 | return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; |
|
313 | return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; | |
296 | } |
|
314 | } | |
297 |
|
315 | |||
|
316 | /* ZSTD_cParam_withinBounds: | |||
|
317 | * @return 1 if value is within cParam bounds, | |||
|
318 | * 0 otherwise */ | |||
|
319 | MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) | |||
|
320 | { | |||
|
321 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); | |||
|
322 | if (ZSTD_isError(bounds.error)) return 0; | |||
|
323 | if (value < bounds.lowerBound) return 0; | |||
|
324 | if (value > bounds.upperBound) return 0; | |||
|
325 | return 1; | |||
|
326 | } | |||
|
327 | ||||
|
328 | /* ZSTD_minGain() : | |||
|
329 | * minimum compression required | |||
|
330 | * to generate a compress block or a compressed literals section. | |||
|
331 | * note : use same formula for both situations */ | |||
|
332 | MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) | |||
|
333 | { | |||
|
334 | U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; | |||
|
335 | ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); | |||
|
336 | assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); | |||
|
337 | return (srcSize >> minlog) + 2; | |||
|
338 | } | |||
|
339 | ||||
298 | /*! ZSTD_storeSeq() : |
|
340 | /*! ZSTD_storeSeq() : | |
299 | * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. |
|
341 | * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. | |
300 | * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). |
|
342 | * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). | |
@@ -314,7 +356,7 b' MEM_STATIC void ZSTD_storeSeq(seqStore_t' | |||||
314 | /* copy Literals */ |
|
356 | /* copy Literals */ | |
315 | assert(seqStorePtr->maxNbLit <= 128 KB); |
|
357 | assert(seqStorePtr->maxNbLit <= 128 KB); | |
316 | assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); |
|
358 | assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); | |
317 | ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); |
|
359 | ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); | |
318 | seqStorePtr->lit += litLength; |
|
360 | seqStorePtr->lit += litLength; | |
319 |
|
361 | |||
320 | /* literal Length */ |
|
362 | /* literal Length */ | |
@@ -554,6 +596,9 b' MEM_STATIC U64 ZSTD_rollingHash_rotate(U' | |||||
554 | /*-************************************* |
|
596 | /*-************************************* | |
555 | * Round buffer management |
|
597 | * Round buffer management | |
556 | ***************************************/ |
|
598 | ***************************************/ | |
|
599 | #if (ZSTD_WINDOWLOG_MAX_64 > 31) | |||
|
600 | # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" | |||
|
601 | #endif | |||
557 | /* Max current allowed */ |
|
602 | /* Max current allowed */ | |
558 | #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) |
|
603 | #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) | |
559 | /* Maximum chunk size before overflow correction needs to be called again */ |
|
604 | /* Maximum chunk size before overflow correction needs to be called again */ | |
@@ -665,31 +710,49 b' MEM_STATIC U32 ZSTD_window_correctOverfl' | |||||
665 | * Updates lowLimit so that: |
|
710 | * Updates lowLimit so that: | |
666 | * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd |
|
711 | * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd | |
667 | * |
|
712 | * | |
668 | * This allows a simple check that index >= lowLimit to see if index is valid. |
|
713 | * It ensures index is valid as long as index >= lowLimit. | |
669 |
* This must be called before a block compression call |
|
714 | * This must be called before a block compression call. | |
670 | * source end. |
|
715 | * | |
|
716 | * loadedDictEnd is only defined if a dictionary is in use for current compression. | |||
|
717 | * As the name implies, loadedDictEnd represents the index at end of dictionary. | |||
|
718 | * The value lies within context's referential, it can be directly compared to blockEndIdx. | |||
671 | * |
|
719 | * | |
672 | * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. |
|
720 | * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. | |
673 | * This is because dictionaries are allowed to be referenced as long as the last |
|
721 | * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. | |
674 | * byte of the dictionary is in the window, but once they are out of range, |
|
722 | * This is because dictionaries are allowed to be referenced fully | |
675 | * they cannot be referenced. If loadedDictEndPtr is NULL, we use |
|
723 | * as long as the last byte of the dictionary is in the window. | |
676 | * loadedDictEnd == 0. |
|
724 | * Once input has progressed beyond window size, dictionary cannot be referenced anymore. | |
677 | * |
|
725 | * | |
678 |
* In normal dict mode, the dict is between lowLimit and dictLimit. |
|
726 | * In normal dict mode, the dictionary lies between lowLimit and dictLimit. | |
679 |
* dictMatchState mode, lowLimit and dictLimit are the same, |
|
727 | * In dictMatchState mode, lowLimit and dictLimit are the same, | |
680 | * is below them. forceWindow and dictMatchState are therefore incompatible. |
|
728 | * and the dictionary is below them. | |
|
729 | * forceWindow and dictMatchState are therefore incompatible. | |||
681 | */ |
|
730 | */ | |
682 | MEM_STATIC void |
|
731 | MEM_STATIC void | |
683 | ZSTD_window_enforceMaxDist(ZSTD_window_t* window, |
|
732 | ZSTD_window_enforceMaxDist(ZSTD_window_t* window, | |
684 |
|
|
733 | const void* blockEnd, | |
685 | U32 maxDist, |
|
734 | U32 maxDist, | |
686 | U32* loadedDictEndPtr, |
|
735 | U32* loadedDictEndPtr, | |
687 | const ZSTD_matchState_t** dictMatchStatePtr) |
|
736 | const ZSTD_matchState_t** dictMatchStatePtr) | |
688 | { |
|
737 | { | |
689 |
U32 const blockEndIdx = (U32)((BYTE const*) |
|
738 | U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); | |
690 | U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; |
|
739 | U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; | |
691 | DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", |
|
740 | DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", | |
692 | (unsigned)blockEndIdx, (unsigned)maxDist); |
|
741 | (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); | |
|
742 | ||||
|
743 | /* - When there is no dictionary : loadedDictEnd == 0. | |||
|
744 | In which case, the test (blockEndIdx > maxDist) is merely to avoid | |||
|
745 | overflowing next operation `newLowLimit = blockEndIdx - maxDist`. | |||
|
746 | - When there is a standard dictionary : | |||
|
747 | Index referential is copied from the dictionary, | |||
|
748 | which means it starts from 0. | |||
|
749 | In which case, loadedDictEnd == dictSize, | |||
|
750 | and it makes sense to compare `blockEndIdx > maxDist + dictSize` | |||
|
751 | since `blockEndIdx` also starts from zero. | |||
|
752 | - When there is an attached dictionary : | |||
|
753 | loadedDictEnd is expressed within the referential of the context, | |||
|
754 | so it can be directly compared against blockEndIdx. | |||
|
755 | */ | |||
693 | if (blockEndIdx > maxDist + loadedDictEnd) { |
|
756 | if (blockEndIdx > maxDist + loadedDictEnd) { | |
694 | U32 const newLowLimit = blockEndIdx - maxDist; |
|
757 | U32 const newLowLimit = blockEndIdx - maxDist; | |
695 | if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; |
|
758 | if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; | |
@@ -698,11 +761,45 b' ZSTD_window_enforceMaxDist(ZSTD_window_t' | |||||
698 | (unsigned)window->dictLimit, (unsigned)window->lowLimit); |
|
761 | (unsigned)window->dictLimit, (unsigned)window->lowLimit); | |
699 | window->dictLimit = window->lowLimit; |
|
762 | window->dictLimit = window->lowLimit; | |
700 | } |
|
763 | } | |
701 | if (loadedDictEndPtr) |
|
764 | /* On reaching window size, dictionaries are invalidated */ | |
|
765 | if (loadedDictEndPtr) *loadedDictEndPtr = 0; | |||
|
766 | if (dictMatchStatePtr) *dictMatchStatePtr = NULL; | |||
|
767 | } | |||
|
768 | } | |||
|
769 | ||||
|
770 | /* Similar to ZSTD_window_enforceMaxDist(), | |||
|
771 | * but only invalidates dictionary | |||
|
772 | * when input progresses beyond window size. | |||
|
773 | * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) | |||
|
774 | * loadedDictEnd uses same referential as window->base | |||
|
775 | * maxDist is the window size */ | |||
|
776 | MEM_STATIC void | |||
|
777 | ZSTD_checkDictValidity(const ZSTD_window_t* window, | |||
|
778 | const void* blockEnd, | |||
|
779 | U32 maxDist, | |||
|
780 | U32* loadedDictEndPtr, | |||
|
781 | const ZSTD_matchState_t** dictMatchStatePtr) | |||
|
782 | { | |||
|
783 | assert(loadedDictEndPtr != NULL); | |||
|
784 | assert(dictMatchStatePtr != NULL); | |||
|
785 | { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); | |||
|
786 | U32 const loadedDictEnd = *loadedDictEndPtr; | |||
|
787 | DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", | |||
|
788 | (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); | |||
|
789 | assert(blockEndIdx >= loadedDictEnd); | |||
|
790 | ||||
|
791 | if (blockEndIdx > loadedDictEnd + maxDist) { | |||
|
792 | /* On reaching window size, dictionaries are invalidated. | |||
|
793 | * For simplification, if window size is reached anywhere within next block, | |||
|
794 | * the dictionary is invalidated for the full block. | |||
|
795 | */ | |||
|
796 | DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); | |||
702 | *loadedDictEndPtr = 0; |
|
797 | *loadedDictEndPtr = 0; | |
703 | if (dictMatchStatePtr) |
|
|||
704 | *dictMatchStatePtr = NULL; |
|
798 | *dictMatchStatePtr = NULL; | |
705 | } |
|
799 | } else { | |
|
800 | if (*loadedDictEndPtr != 0) { | |||
|
801 | DEBUGLOG(6, "dictionary considered valid for current block"); | |||
|
802 | } } } | |||
706 | } |
|
803 | } | |
707 |
|
804 | |||
708 | /** |
|
805 | /** | |
@@ -744,6 +841,17 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w' | |||||
744 | return contiguous; |
|
841 | return contiguous; | |
745 | } |
|
842 | } | |
746 |
|
843 | |||
|
844 | MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) | |||
|
845 | { | |||
|
846 | U32 const maxDistance = 1U << windowLog; | |||
|
847 | U32 const lowestValid = ms->window.lowLimit; | |||
|
848 | U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; | |||
|
849 | U32 const isDictionary = (ms->loadedDictEnd != 0); | |||
|
850 | U32 const matchLowest = isDictionary ? lowestValid : withinWindow; | |||
|
851 | return matchLowest; | |||
|
852 | } | |||
|
853 | ||||
|
854 | ||||
747 |
|
855 | |||
748 | /* debug functions */ |
|
856 | /* debug functions */ | |
749 | #if (DEBUGLEVEL>=2) |
|
857 | #if (DEBUGLEVEL>=2) | |
@@ -806,13 +914,6 b' size_t ZSTD_initCStream_internal(ZSTD_CS' | |||||
806 |
|
914 | |||
807 | void ZSTD_resetSeqStore(seqStore_t* ssPtr); |
|
915 | void ZSTD_resetSeqStore(seqStore_t* ssPtr); | |
808 |
|
916 | |||
809 | /*! ZSTD_compressStream_generic() : |
|
|||
810 | * Private use only. To be called from zstdmt_compress.c in single-thread mode. */ |
|
|||
811 | size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, |
|
|||
812 | ZSTD_outBuffer* output, |
|
|||
813 | ZSTD_inBuffer* input, |
|
|||
814 | ZSTD_EndDirective const flushMode); |
|
|||
815 |
|
||||
816 | /*! ZSTD_getCParamsFromCDict() : |
|
917 | /*! ZSTD_getCParamsFromCDict() : | |
817 | * as the name implies */ |
|
918 | * as the name implies */ | |
818 | ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); |
|
919 | ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); | |
@@ -839,7 +940,7 b' size_t ZSTD_compress_advanced_internal(Z' | |||||
839 | /* ZSTD_writeLastEmptyBlock() : |
|
940 | /* ZSTD_writeLastEmptyBlock() : | |
840 | * output an empty Block with end-of-frame mark to complete a frame |
|
941 | * output an empty Block with end-of-frame mark to complete a frame | |
841 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) |
|
942 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) | |
842 | * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize) |
|
943 | * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) | |
843 | */ |
|
944 | */ | |
844 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); |
|
945 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); | |
845 |
|
946 |
@@ -43,8 +43,7 b' void ZSTD_fillDoubleHashTable(ZSTD_match' | |||||
43 | /* Only load extra positions for ZSTD_dtlm_full */ |
|
43 | /* Only load extra positions for ZSTD_dtlm_full */ | |
44 | if (dtlm == ZSTD_dtlm_fast) |
|
44 | if (dtlm == ZSTD_dtlm_fast) | |
45 | break; |
|
45 | break; | |
46 | } |
|
46 | } } | |
47 | } |
|
|||
48 | } |
|
47 | } | |
49 |
|
48 | |||
50 |
|
49 | |||
@@ -63,7 +62,11 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
63 | const BYTE* const istart = (const BYTE*)src; |
|
62 | const BYTE* const istart = (const BYTE*)src; | |
64 | const BYTE* ip = istart; |
|
63 | const BYTE* ip = istart; | |
65 | const BYTE* anchor = istart; |
|
64 | const BYTE* anchor = istart; | |
66 | const U32 prefixLowestIndex = ms->window.dictLimit; |
|
65 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |
|
66 | const U32 lowestValid = ms->window.dictLimit; | |||
|
67 | const U32 maxDistance = 1U << cParams->windowLog; | |||
|
68 | /* presumes that, if there is a dictionary, it must be using Attach mode */ | |||
|
69 | const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; | |||
67 | const BYTE* const prefixLowest = base + prefixLowestIndex; |
|
70 | const BYTE* const prefixLowest = base + prefixLowestIndex; | |
68 | const BYTE* const iend = istart + srcSize; |
|
71 | const BYTE* const iend = istart + srcSize; | |
69 | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
|
72 | const BYTE* const ilimit = iend - HASH_READ_SIZE; | |
@@ -95,8 +98,15 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
95 | dictCParams->chainLog : hBitsS; |
|
98 | dictCParams->chainLog : hBitsS; | |
96 | const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); |
|
99 | const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); | |
97 |
|
100 | |||
|
101 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); | |||
|
102 | ||||
98 | assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); |
|
103 | assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); | |
99 |
|
104 | |||
|
105 | /* if a dictionary is attached, it must be within window range */ | |||
|
106 | if (dictMode == ZSTD_dictMatchState) { | |||
|
107 | assert(lowestValid + maxDistance >= endIndex); | |||
|
108 | } | |||
|
109 | ||||
100 | /* init */ |
|
110 | /* init */ | |
101 | ip += (dictAndPrefixLength == 0); |
|
111 | ip += (dictAndPrefixLength == 0); | |
102 | if (dictMode == ZSTD_noDict) { |
|
112 | if (dictMode == ZSTD_noDict) { | |
@@ -138,7 +148,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
138 | const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; |
|
148 | const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; | |
139 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; |
|
149 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; | |
140 | ip++; |
|
150 | ip++; | |
141 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); |
|
151 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
142 | goto _match_stored; |
|
152 | goto _match_stored; | |
143 | } |
|
153 | } | |
144 |
|
154 | |||
@@ -147,7 +157,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
147 | && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { |
|
157 | && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { | |
148 | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; |
|
158 | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; | |
149 | ip++; |
|
159 | ip++; | |
150 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); |
|
160 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
151 | goto _match_stored; |
|
161 | goto _match_stored; | |
152 | } |
|
162 | } | |
153 |
|
163 | |||
@@ -170,8 +180,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
170 | offset = (U32)(current - dictMatchIndexL - dictIndexDelta); |
|
180 | offset = (U32)(current - dictMatchIndexL - dictIndexDelta); | |
171 | while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ |
|
181 | while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ | |
172 | goto _match_found; |
|
182 | goto _match_found; | |
173 |
|
|
183 | } } | |
174 | } |
|
|||
175 |
|
184 | |||
176 | if (matchIndexS > prefixLowestIndex) { |
|
185 | if (matchIndexS > prefixLowestIndex) { | |
177 | /* check prefix short match */ |
|
186 | /* check prefix short match */ | |
@@ -186,16 +195,14 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
186 |
|
195 | |||
187 | if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { |
|
196 | if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { | |
188 | goto _search_next_long; |
|
197 | goto _search_next_long; | |
189 |
|
|
198 | } } | |
190 | } |
|
|||
191 |
|
199 | |||
192 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
|
200 | ip += ((ip-anchor) >> kSearchStrength) + 1; | |
193 | continue; |
|
201 | continue; | |
194 |
|
202 | |||
195 | _search_next_long: |
|
203 | _search_next_long: | |
196 |
|
204 | |||
197 | { |
|
205 | { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |
198 | size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
|
|||
199 | size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); |
|
206 | size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); | |
200 | U32 const matchIndexL3 = hashLong[hl3]; |
|
207 | U32 const matchIndexL3 = hashLong[hl3]; | |
201 | const BYTE* matchL3 = base + matchIndexL3; |
|
208 | const BYTE* matchL3 = base + matchIndexL3; | |
@@ -221,9 +228,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
221 | offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); |
|
228 | offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); | |
222 | while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ |
|
229 | while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ | |
223 | goto _match_found; |
|
230 | goto _match_found; | |
224 | } |
|
231 | } } } | |
225 | } |
|
|||
226 | } |
|
|||
227 |
|
232 | |||
228 | /* if no long +1 match, explore the short match we found */ |
|
233 | /* if no long +1 match, explore the short match we found */ | |
229 | if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { |
|
234 | if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { | |
@@ -242,7 +247,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
242 | offset_2 = offset_1; |
|
247 | offset_2 = offset_1; | |
243 | offset_1 = offset; |
|
248 | offset_1 = offset; | |
244 |
|
249 | |||
245 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
|
250 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
246 |
|
251 | |||
247 | _match_stored: |
|
252 | _match_stored: | |
248 | /* match found */ |
|
253 | /* match found */ | |
@@ -250,11 +255,14 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
250 | anchor = ip; |
|
255 | anchor = ip; | |
251 |
|
256 | |||
252 | if (ip <= ilimit) { |
|
257 | if (ip <= ilimit) { | |
253 | /* Fill Table */ |
|
258 | /* Complementary insertion */ | |
254 | hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = |
|
259 | /* done after iLimit test, as candidates could be > iend-8 */ | |
255 | hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ |
|
260 | { U32 const indexToInsert = current+2; | |
256 |
hashLong[ZSTD_hashPtr( |
|
261 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |
257 |
hash |
|
262 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |
|
263 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |||
|
264 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); | |||
|
265 | } | |||
258 |
|
266 | |||
259 | /* check immediate repcode */ |
|
267 | /* check immediate repcode */ | |
260 | if (dictMode == ZSTD_dictMatchState) { |
|
268 | if (dictMode == ZSTD_dictMatchState) { | |
@@ -278,8 +286,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
278 | continue; |
|
286 | continue; | |
279 | } |
|
287 | } | |
280 | break; |
|
288 | break; | |
281 |
|
|
289 | } } | |
282 | } |
|
|||
283 |
|
290 | |||
284 | if (dictMode == ZSTD_noDict) { |
|
291 | if (dictMode == ZSTD_noDict) { | |
285 | while ( (ip <= ilimit) |
|
292 | while ( (ip <= ilimit) | |
@@ -294,14 +301,15 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||||
294 | ip += rLength; |
|
301 | ip += rLength; | |
295 | anchor = ip; |
|
302 | anchor = ip; | |
296 | continue; /* faster when present ... (?) */ |
|
303 | continue; /* faster when present ... (?) */ | |
297 |
|
|
304 | } } } | |
|
305 | } /* while (ip < ilimit) */ | |||
298 |
|
306 | |||
299 | /* save reps for next block */ |
|
307 | /* save reps for next block */ | |
300 | rep[0] = offset_1 ? offset_1 : offsetSaved; |
|
308 | rep[0] = offset_1 ? offset_1 : offsetSaved; | |
301 | rep[1] = offset_2 ? offset_2 : offsetSaved; |
|
309 | rep[1] = offset_2 ? offset_2 : offsetSaved; | |
302 |
|
310 | |||
303 | /* Return the last literals size */ |
|
311 | /* Return the last literals size */ | |
304 | return iend - anchor; |
|
312 | return (size_t)(iend - anchor); | |
305 | } |
|
313 | } | |
306 |
|
314 | |||
307 |
|
315 | |||
@@ -360,10 +368,13 b' static size_t ZSTD_compressBlock_doubleF' | |||||
360 | const BYTE* anchor = istart; |
|
368 | const BYTE* anchor = istart; | |
361 | const BYTE* const iend = istart + srcSize; |
|
369 | const BYTE* const iend = istart + srcSize; | |
362 | const BYTE* const ilimit = iend - 8; |
|
370 | const BYTE* const ilimit = iend - 8; | |
363 | const U32 prefixStartIndex = ms->window.dictLimit; |
|
|||
364 | const BYTE* const base = ms->window.base; |
|
371 | const BYTE* const base = ms->window.base; | |
|
372 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |||
|
373 | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); | |||
|
374 | const U32 dictStartIndex = lowLimit; | |||
|
375 | const U32 dictLimit = ms->window.dictLimit; | |||
|
376 | const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; | |||
365 | const BYTE* const prefixStart = base + prefixStartIndex; |
|
377 | const BYTE* const prefixStart = base + prefixStartIndex; | |
366 | const U32 dictStartIndex = ms->window.lowLimit; |
|
|||
367 | const BYTE* const dictBase = ms->window.dictBase; |
|
378 | const BYTE* const dictBase = ms->window.dictBase; | |
368 | const BYTE* const dictStart = dictBase + dictStartIndex; |
|
379 | const BYTE* const dictStart = dictBase + dictStartIndex; | |
369 | const BYTE* const dictEnd = dictBase + prefixStartIndex; |
|
380 | const BYTE* const dictEnd = dictBase + prefixStartIndex; | |
@@ -371,6 +382,10 b' static size_t ZSTD_compressBlock_doubleF' | |||||
371 |
|
382 | |||
372 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); |
|
383 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); | |
373 |
|
384 | |||
|
385 | /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ | |||
|
386 | if (prefixStartIndex == dictStartIndex) | |||
|
387 | return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); | |||
|
388 | ||||
374 | /* Search Loop */ |
|
389 | /* Search Loop */ | |
375 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ |
|
390 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ | |
376 | const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); |
|
391 | const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); | |
@@ -396,7 +411,7 b' static size_t ZSTD_compressBlock_doubleF' | |||||
396 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
411 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |
397 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
412 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; | |
398 | ip++; |
|
413 | ip++; | |
399 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); |
|
414 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
400 | } else { |
|
415 | } else { | |
401 | if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { |
|
416 | if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { | |
402 | const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; |
|
417 | const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; | |
@@ -407,7 +422,7 b' static size_t ZSTD_compressBlock_doubleF' | |||||
407 | while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
|
422 | while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ | |
408 | offset_2 = offset_1; |
|
423 | offset_2 = offset_1; | |
409 | offset_1 = offset; |
|
424 | offset_1 = offset; | |
410 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
|
425 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
411 |
|
426 | |||
412 | } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { |
|
427 | } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { | |
413 | size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
|
428 | size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |
@@ -432,23 +447,27 b' static size_t ZSTD_compressBlock_doubleF' | |||||
432 | } |
|
447 | } | |
433 | offset_2 = offset_1; |
|
448 | offset_2 = offset_1; | |
434 | offset_1 = offset; |
|
449 | offset_1 = offset; | |
435 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
|
450 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
436 |
|
451 | |||
437 | } else { |
|
452 | } else { | |
438 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
|
453 | ip += ((ip-anchor) >> kSearchStrength) + 1; | |
439 | continue; |
|
454 | continue; | |
440 | } } |
|
455 | } } | |
441 |
|
456 | |||
442 | /* found a match : store it */ |
|
457 | /* move to next sequence start */ | |
443 | ip += mLength; |
|
458 | ip += mLength; | |
444 | anchor = ip; |
|
459 | anchor = ip; | |
445 |
|
460 | |||
446 | if (ip <= ilimit) { |
|
461 | if (ip <= ilimit) { | |
447 | /* Fill Table */ |
|
462 | /* Complementary insertion */ | |
448 | hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; |
|
463 | /* done after iLimit test, as candidates could be > iend-8 */ | |
449 | hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; |
|
464 | { U32 const indexToInsert = current+2; | |
450 |
|
|
465 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |
451 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
|
466 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |
|
467 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |||
|
468 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); | |||
|
469 | } | |||
|
470 | ||||
452 | /* check immediate repcode */ |
|
471 | /* check immediate repcode */ | |
453 | while (ip <= ilimit) { |
|
472 | while (ip <= ilimit) { | |
454 | U32 const current2 = (U32)(ip-base); |
|
473 | U32 const current2 = (U32)(ip-base); | |
@@ -475,7 +494,7 b' static size_t ZSTD_compressBlock_doubleF' | |||||
475 | rep[1] = offset_2; |
|
494 | rep[1] = offset_2; | |
476 |
|
495 | |||
477 | /* Return the last literals size */ |
|
496 | /* Return the last literals size */ | |
478 | return iend - anchor; |
|
497 | return (size_t)(iend - anchor); | |
479 | } |
|
498 | } | |
480 |
|
499 | |||
481 |
|
500 |
@@ -13,7 +13,8 b'' | |||||
13 |
|
13 | |||
14 |
|
14 | |||
15 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, |
|
15 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |
16 |
void const |
|
16 | const void* const end, | |
|
17 | ZSTD_dictTableLoadMethod_e dtlm) | |||
17 | { |
|
18 | { | |
18 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
|
19 | const ZSTD_compressionParameters* const cParams = &ms->cParams; | |
19 | U32* const hashTable = ms->hashTable; |
|
20 | U32* const hashTable = ms->hashTable; | |
@@ -41,11 +42,164 b' void ZSTD_fillHashTable(ZSTD_matchState_' | |||||
41 | } } } } |
|
42 | } } } } | |
42 | } |
|
43 | } | |
43 |
|
44 | |||
|
45 | ||||
44 | FORCE_INLINE_TEMPLATE |
|
46 | FORCE_INLINE_TEMPLATE | |
45 | size_t ZSTD_compressBlock_fast_generic( |
|
47 | size_t ZSTD_compressBlock_fast_generic( | |
46 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
48 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
47 | void const* src, size_t srcSize, |
|
49 | void const* src, size_t srcSize, | |
48 | U32 const mls, ZSTD_dictMode_e const dictMode) |
|
50 | U32 const mls) | |
|
51 | { | |||
|
52 | const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
|
53 | U32* const hashTable = ms->hashTable; | |||
|
54 | U32 const hlog = cParams->hashLog; | |||
|
55 | /* support stepSize of 0 */ | |||
|
56 | size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; | |||
|
57 | const BYTE* const base = ms->window.base; | |||
|
58 | const BYTE* const istart = (const BYTE*)src; | |||
|
59 | /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ | |||
|
60 | const BYTE* ip0 = istart; | |||
|
61 | const BYTE* ip1; | |||
|
62 | const BYTE* anchor = istart; | |||
|
63 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |||
|
64 | const U32 maxDistance = 1U << cParams->windowLog; | |||
|
65 | const U32 validStartIndex = ms->window.dictLimit; | |||
|
66 | const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex; | |||
|
67 | const BYTE* const prefixStart = base + prefixStartIndex; | |||
|
68 | const BYTE* const iend = istart + srcSize; | |||
|
69 | const BYTE* const ilimit = iend - HASH_READ_SIZE; | |||
|
70 | U32 offset_1=rep[0], offset_2=rep[1]; | |||
|
71 | U32 offsetSaved = 0; | |||
|
72 | ||||
|
73 | /* init */ | |||
|
74 | DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); | |||
|
75 | ip0 += (ip0 == prefixStart); | |||
|
76 | ip1 = ip0 + 1; | |||
|
77 | { | |||
|
78 | U32 const maxRep = (U32)(ip0 - prefixStart); | |||
|
79 | if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; | |||
|
80 | if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; | |||
|
81 | } | |||
|
82 | ||||
|
83 | /* Main Search Loop */ | |||
|
84 | while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ | |||
|
85 | size_t mLength; | |||
|
86 | BYTE const* ip2 = ip0 + 2; | |||
|
87 | size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); | |||
|
88 | U32 const val0 = MEM_read32(ip0); | |||
|
89 | size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); | |||
|
90 | U32 const val1 = MEM_read32(ip1); | |||
|
91 | U32 const current0 = (U32)(ip0-base); | |||
|
92 | U32 const current1 = (U32)(ip1-base); | |||
|
93 | U32 const matchIndex0 = hashTable[h0]; | |||
|
94 | U32 const matchIndex1 = hashTable[h1]; | |||
|
95 | BYTE const* repMatch = ip2-offset_1; | |||
|
96 | const BYTE* match0 = base + matchIndex0; | |||
|
97 | const BYTE* match1 = base + matchIndex1; | |||
|
98 | U32 offcode; | |||
|
99 | hashTable[h0] = current0; /* update hash table */ | |||
|
100 | hashTable[h1] = current1; /* update hash table */ | |||
|
101 | ||||
|
102 | assert(ip0 + 1 == ip1); | |||
|
103 | ||||
|
104 | if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { | |||
|
105 | mLength = ip2[-1] == repMatch[-1] ? 1 : 0; | |||
|
106 | ip0 = ip2 - mLength; | |||
|
107 | match0 = repMatch - mLength; | |||
|
108 | offcode = 0; | |||
|
109 | goto _match; | |||
|
110 | } | |||
|
111 | if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { | |||
|
112 | /* found a regular match */ | |||
|
113 | goto _offset; | |||
|
114 | } | |||
|
115 | if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { | |||
|
116 | /* found a regular match after one literal */ | |||
|
117 | ip0 = ip1; | |||
|
118 | match0 = match1; | |||
|
119 | goto _offset; | |||
|
120 | } | |||
|
121 | { | |||
|
122 | size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; | |||
|
123 | assert(step >= 2); | |||
|
124 | ip0 += step; | |||
|
125 | ip1 += step; | |||
|
126 | continue; | |||
|
127 | } | |||
|
128 | _offset: /* Requires: ip0, match0 */ | |||
|
129 | /* Compute the offset code */ | |||
|
130 | offset_2 = offset_1; | |||
|
131 | offset_1 = (U32)(ip0-match0); | |||
|
132 | offcode = offset_1 + ZSTD_REP_MOVE; | |||
|
133 | mLength = 0; | |||
|
134 | /* Count the backwards match length */ | |||
|
135 | while (((ip0>anchor) & (match0>prefixStart)) | |||
|
136 | && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ | |||
|
137 | ||||
|
138 | _match: /* Requires: ip0, match0, offcode */ | |||
|
139 | /* Count the forward length */ | |||
|
140 | mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; | |||
|
141 | ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); | |||
|
142 | /* match found */ | |||
|
143 | ip0 += mLength; | |||
|
144 | anchor = ip0; | |||
|
145 | ip1 = ip0 + 1; | |||
|
146 | ||||
|
147 | if (ip0 <= ilimit) { | |||
|
148 | /* Fill Table */ | |||
|
149 | assert(base+current0+2 > istart); /* check base overflow */ | |||
|
150 | hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ | |||
|
151 | hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); | |||
|
152 | ||||
|
153 | while ( (ip0 <= ilimit) | |||
|
154 | && ( (offset_2>0) | |||
|
155 | & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { | |||
|
156 | /* store sequence */ | |||
|
157 | size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; | |||
|
158 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ | |||
|
159 | hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); | |||
|
160 | ip0 += rLength; | |||
|
161 | ip1 = ip0 + 1; | |||
|
162 | ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); | |||
|
163 | anchor = ip0; | |||
|
164 | continue; /* faster when present (confirmed on gcc-8) ... (?) */ | |||
|
165 | } | |||
|
166 | } | |||
|
167 | } | |||
|
168 | ||||
|
169 | /* save reps for next block */ | |||
|
170 | rep[0] = offset_1 ? offset_1 : offsetSaved; | |||
|
171 | rep[1] = offset_2 ? offset_2 : offsetSaved; | |||
|
172 | ||||
|
173 | /* Return the last literals size */ | |||
|
174 | return (size_t)(iend - anchor); | |||
|
175 | } | |||
|
176 | ||||
|
177 | ||||
|
178 | size_t ZSTD_compressBlock_fast( | |||
|
179 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
|
180 | void const* src, size_t srcSize) | |||
|
181 | { | |||
|
182 | ZSTD_compressionParameters const* cParams = &ms->cParams; | |||
|
183 | U32 const mls = cParams->minMatch; | |||
|
184 | assert(ms->dictMatchState == NULL); | |||
|
185 | switch(mls) | |||
|
186 | { | |||
|
187 | default: /* includes case 3 */ | |||
|
188 | case 4 : | |||
|
189 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); | |||
|
190 | case 5 : | |||
|
191 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); | |||
|
192 | case 6 : | |||
|
193 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); | |||
|
194 | case 7 : | |||
|
195 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); | |||
|
196 | } | |||
|
197 | } | |||
|
198 | ||||
|
199 | FORCE_INLINE_TEMPLATE | |||
|
200 | size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |||
|
201 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
|
202 | void const* src, size_t srcSize, U32 const mls) | |||
49 | { |
|
203 | { | |
50 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
|
204 | const ZSTD_compressionParameters* const cParams = &ms->cParams; | |
51 | U32* const hashTable = ms->hashTable; |
|
205 | U32* const hashTable = ms->hashTable; | |
@@ -64,46 +218,34 b' size_t ZSTD_compressBlock_fast_generic(' | |||||
64 | U32 offsetSaved = 0; |
|
218 | U32 offsetSaved = 0; | |
65 |
|
219 | |||
66 | const ZSTD_matchState_t* const dms = ms->dictMatchState; |
|
220 | const ZSTD_matchState_t* const dms = ms->dictMatchState; | |
67 | const ZSTD_compressionParameters* const dictCParams = |
|
221 | const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; | |
68 | dictMode == ZSTD_dictMatchState ? |
|
222 | const U32* const dictHashTable = dms->hashTable; | |
69 | &dms->cParams : NULL; |
|
223 | const U32 dictStartIndex = dms->window.dictLimit; | |
70 | const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ? |
|
224 | const BYTE* const dictBase = dms->window.base; | |
71 | dms->hashTable : NULL; |
|
225 | const BYTE* const dictStart = dictBase + dictStartIndex; | |
72 | const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? |
|
226 | const BYTE* const dictEnd = dms->window.nextSrc; | |
73 | dms->window.dictLimit : 0; |
|
227 | const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); | |
74 | const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? |
|
|||
75 | dms->window.base : NULL; |
|
|||
76 | const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? |
|
|||
77 | dictBase + dictStartIndex : NULL; |
|
|||
78 | const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? |
|
|||
79 | dms->window.nextSrc : NULL; |
|
|||
80 | const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? |
|
|||
81 | prefixStartIndex - (U32)(dictEnd - dictBase) : |
|
|||
82 | 0; |
|
|||
83 | const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); |
|
228 | const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); | |
84 |
const U32 dictHLog = dict |
|
229 | const U32 dictHLog = dictCParams->hashLog; | |
85 | dictCParams->hashLog : hlog; |
|
|||
86 |
|
||||
87 | assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); |
|
|||
88 |
|
230 | |||
89 | /* otherwise, we would get index underflow when translating a dict index |
|
231 | /* if a dictionary is still attached, it necessarily means that | |
90 | * into a local index */ |
|
232 | * it is within window size. So we just check it. */ | |
91 | assert(dictMode != ZSTD_dictMatchState |
|
233 | const U32 maxDistance = 1U << cParams->windowLog; | |
92 | || prefixStartIndex >= (U32)(dictEnd - dictBase)); |
|
234 | const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); | |
|
235 | assert(endIndex - prefixStartIndex <= maxDistance); | |||
|
236 | (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ | |||
|
237 | ||||
|
238 | /* ensure there will be no no underflow | |||
|
239 | * when translating a dict index into a local index */ | |||
|
240 | assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); | |||
93 |
|
241 | |||
94 | /* init */ |
|
242 | /* init */ | |
|
243 | DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); | |||
95 | ip += (dictAndPrefixLength == 0); |
|
244 | ip += (dictAndPrefixLength == 0); | |
96 | if (dictMode == ZSTD_noDict) { |
|
245 | /* dictMatchState repCode checks don't currently handle repCode == 0 | |
97 | U32 const maxRep = (U32)(ip - prefixStart); |
|
246 | * disabling. */ | |
98 | if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; |
|
247 | assert(offset_1 <= dictAndPrefixLength); | |
99 | if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; |
|
248 | assert(offset_2 <= dictAndPrefixLength); | |
100 | } |
|
|||
101 | if (dictMode == ZSTD_dictMatchState) { |
|
|||
102 | /* dictMatchState repCode checks don't currently handle repCode == 0 |
|
|||
103 | * disabling. */ |
|
|||
104 | assert(offset_1 <= dictAndPrefixLength); |
|
|||
105 | assert(offset_2 <= dictAndPrefixLength); |
|
|||
106 | } |
|
|||
107 |
|
249 | |||
108 | /* Main Search Loop */ |
|
250 | /* Main Search Loop */ | |
109 | while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ |
|
251 | while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ | |
@@ -113,50 +255,37 b' size_t ZSTD_compressBlock_fast_generic(' | |||||
113 | U32 const matchIndex = hashTable[h]; |
|
255 | U32 const matchIndex = hashTable[h]; | |
114 | const BYTE* match = base + matchIndex; |
|
256 | const BYTE* match = base + matchIndex; | |
115 | const U32 repIndex = current + 1 - offset_1; |
|
257 | const U32 repIndex = current + 1 - offset_1; | |
116 |
const BYTE* repMatch = ( |
|
258 | const BYTE* repMatch = (repIndex < prefixStartIndex) ? | |
117 | && repIndex < prefixStartIndex) ? |
|
|||
118 | dictBase + (repIndex - dictIndexDelta) : |
|
259 | dictBase + (repIndex - dictIndexDelta) : | |
119 | base + repIndex; |
|
260 | base + repIndex; | |
120 | hashTable[h] = current; /* update hash table */ |
|
261 | hashTable[h] = current; /* update hash table */ | |
121 |
|
262 | |||
122 | if ( (dictMode == ZSTD_dictMatchState) |
|
263 | if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ | |
123 | && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ |
|
|||
124 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
|
264 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |
125 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
265 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |
126 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
266 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; | |
127 | ip++; |
|
267 | ip++; | |
128 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); |
|
268 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
129 | } else if ( dictMode == ZSTD_noDict |
|
|||
130 | && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { |
|
|||
131 | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; |
|
|||
132 | ip++; |
|
|||
133 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); |
|
|||
134 | } else if ( (matchIndex <= prefixStartIndex) ) { |
|
269 | } else if ( (matchIndex <= prefixStartIndex) ) { | |
135 | if (dictMode == ZSTD_dictMatchState) { |
|
270 | size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); | |
136 | size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); |
|
271 | U32 const dictMatchIndex = dictHashTable[dictHash]; | |
137 | U32 const dictMatchIndex = dictHashTable[dictHash]; |
|
272 | const BYTE* dictMatch = dictBase + dictMatchIndex; | |
138 | const BYTE* dictMatch = dictBase + dictMatchIndex; |
|
273 | if (dictMatchIndex <= dictStartIndex || | |
139 | if (dictMatchIndex <= dictStartIndex || |
|
274 | MEM_read32(dictMatch) != MEM_read32(ip)) { | |
140 | MEM_read32(dictMatch) != MEM_read32(ip)) { |
|
|||
141 | assert(stepSize >= 1); |
|
|||
142 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; |
|
|||
143 | continue; |
|
|||
144 | } else { |
|
|||
145 | /* found a dict match */ |
|
|||
146 | U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); |
|
|||
147 | mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; |
|
|||
148 | while (((ip>anchor) & (dictMatch>dictStart)) |
|
|||
149 | && (ip[-1] == dictMatch[-1])) { |
|
|||
150 | ip--; dictMatch--; mLength++; |
|
|||
151 | } /* catch up */ |
|
|||
152 | offset_2 = offset_1; |
|
|||
153 | offset_1 = offset; |
|
|||
154 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
|
|||
155 | } |
|
|||
156 | } else { |
|
|||
157 | assert(stepSize >= 1); |
|
275 | assert(stepSize >= 1); | |
158 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; |
|
276 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |
159 | continue; |
|
277 | continue; | |
|
278 | } else { | |||
|
279 | /* found a dict match */ | |||
|
280 | U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); | |||
|
281 | mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; | |||
|
282 | while (((ip>anchor) & (dictMatch>dictStart)) | |||
|
283 | && (ip[-1] == dictMatch[-1])) { | |||
|
284 | ip--; dictMatch--; mLength++; | |||
|
285 | } /* catch up */ | |||
|
286 | offset_2 = offset_1; | |||
|
287 | offset_1 = offset; | |||
|
288 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
160 | } |
|
289 | } | |
161 | } else if (MEM_read32(match) != MEM_read32(ip)) { |
|
290 | } else if (MEM_read32(match) != MEM_read32(ip)) { | |
162 | /* it's not a match, and we're not going to check the dictionary */ |
|
291 | /* it's not a match, and we're not going to check the dictionary */ | |
@@ -171,7 +300,7 b' size_t ZSTD_compressBlock_fast_generic(' | |||||
171 | && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
|
300 | && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |
172 | offset_2 = offset_1; |
|
301 | offset_2 = offset_1; | |
173 | offset_1 = offset; |
|
302 | offset_1 = offset; | |
174 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
|
303 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
175 | } |
|
304 | } | |
176 |
|
305 | |||
177 | /* match found */ |
|
306 | /* match found */ | |
@@ -185,70 +314,34 b' size_t ZSTD_compressBlock_fast_generic(' | |||||
185 | hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); |
|
314 | hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); | |
186 |
|
315 | |||
187 | /* check immediate repcode */ |
|
316 | /* check immediate repcode */ | |
188 | if (dictMode == ZSTD_dictMatchState) { |
|
317 | while (ip <= ilimit) { | |
189 | while (ip <= ilimit) { |
|
318 | U32 const current2 = (U32)(ip-base); | |
190 |
|
|
319 | U32 const repIndex2 = current2 - offset_2; | |
191 | U32 const repIndex2 = current2 - offset_2; |
|
320 | const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? | |
192 | const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? |
|
321 | dictBase - dictIndexDelta + repIndex2 : | |
193 |
|
|
322 | base + repIndex2; | |
194 | base + repIndex2; |
|
323 | if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) | |
195 | if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) |
|
324 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |
196 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
|
325 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; | |
197 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
|
326 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |
198 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
|
327 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |
199 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
|
328 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); | |
200 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); |
|
329 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; | |
201 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; |
|
330 | ip += repLength2; | |
202 |
|
|
331 | anchor = ip; | |
203 |
|
|
332 | continue; | |
204 | continue; |
|
|||
205 | } |
|
|||
206 | break; |
|
|||
207 | } |
|
333 | } | |
|
334 | break; | |||
208 | } |
|
335 | } | |
209 |
|
336 | } | ||
210 | if (dictMode == ZSTD_noDict) { |
|
337 | } | |
211 | while ( (ip <= ilimit) |
|
|||
212 | && ( (offset_2>0) |
|
|||
213 | & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { |
|
|||
214 | /* store sequence */ |
|
|||
215 | size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; |
|
|||
216 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ |
|
|||
217 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); |
|
|||
218 | ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); |
|
|||
219 | ip += rLength; |
|
|||
220 | anchor = ip; |
|
|||
221 | continue; /* faster when present ... (?) */ |
|
|||
222 | } } } } |
|
|||
223 |
|
338 | |||
224 | /* save reps for next block */ |
|
339 | /* save reps for next block */ | |
225 | rep[0] = offset_1 ? offset_1 : offsetSaved; |
|
340 | rep[0] = offset_1 ? offset_1 : offsetSaved; | |
226 | rep[1] = offset_2 ? offset_2 : offsetSaved; |
|
341 | rep[1] = offset_2 ? offset_2 : offsetSaved; | |
227 |
|
342 | |||
228 | /* Return the last literals size */ |
|
343 | /* Return the last literals size */ | |
229 | return iend - anchor; |
|
344 | return (size_t)(iend - anchor); | |
230 | } |
|
|||
231 |
|
||||
232 |
|
||||
233 | size_t ZSTD_compressBlock_fast( |
|
|||
234 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
|||
235 | void const* src, size_t srcSize) |
|
|||
236 | { |
|
|||
237 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
|
|||
238 | U32 const mls = cParams->minMatch; |
|
|||
239 | assert(ms->dictMatchState == NULL); |
|
|||
240 | switch(mls) |
|
|||
241 | { |
|
|||
242 | default: /* includes case 3 */ |
|
|||
243 | case 4 : |
|
|||
244 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); |
|
|||
245 | case 5 : |
|
|||
246 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); |
|
|||
247 | case 6 : |
|
|||
248 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); |
|
|||
249 | case 7 : |
|
|||
250 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); |
|
|||
251 | } |
|
|||
252 | } |
|
345 | } | |
253 |
|
346 | |||
254 | size_t ZSTD_compressBlock_fast_dictMatchState( |
|
347 | size_t ZSTD_compressBlock_fast_dictMatchState( | |
@@ -262,13 +355,13 b' size_t ZSTD_compressBlock_fast_dictMatch' | |||||
262 | { |
|
355 | { | |
263 | default: /* includes case 3 */ |
|
356 | default: /* includes case 3 */ | |
264 | case 4 : |
|
357 | case 4 : | |
265 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4 |
|
358 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); | |
266 | case 5 : |
|
359 | case 5 : | |
267 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5 |
|
360 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); | |
268 | case 6 : |
|
361 | case 6 : | |
269 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6 |
|
362 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); | |
270 | case 7 : |
|
363 | case 7 : | |
271 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7 |
|
364 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); | |
272 | } |
|
365 | } | |
273 | } |
|
366 | } | |
274 |
|
367 | |||
@@ -287,15 +380,24 b' static size_t ZSTD_compressBlock_fast_ex' | |||||
287 | const BYTE* const istart = (const BYTE*)src; |
|
380 | const BYTE* const istart = (const BYTE*)src; | |
288 | const BYTE* ip = istart; |
|
381 | const BYTE* ip = istart; | |
289 | const BYTE* anchor = istart; |
|
382 | const BYTE* anchor = istart; | |
290 | const U32 dictStartIndex = ms->window.lowLimit; |
|
383 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |
|
384 | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); | |||
|
385 | const U32 dictStartIndex = lowLimit; | |||
291 | const BYTE* const dictStart = dictBase + dictStartIndex; |
|
386 | const BYTE* const dictStart = dictBase + dictStartIndex; | |
292 |
const U32 |
|
387 | const U32 dictLimit = ms->window.dictLimit; | |
|
388 | const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; | |||
293 | const BYTE* const prefixStart = base + prefixStartIndex; |
|
389 | const BYTE* const prefixStart = base + prefixStartIndex; | |
294 | const BYTE* const dictEnd = dictBase + prefixStartIndex; |
|
390 | const BYTE* const dictEnd = dictBase + prefixStartIndex; | |
295 | const BYTE* const iend = istart + srcSize; |
|
391 | const BYTE* const iend = istart + srcSize; | |
296 | const BYTE* const ilimit = iend - 8; |
|
392 | const BYTE* const ilimit = iend - 8; | |
297 | U32 offset_1=rep[0], offset_2=rep[1]; |
|
393 | U32 offset_1=rep[0], offset_2=rep[1]; | |
298 |
|
394 | |||
|
395 | DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic"); | |||
|
396 | ||||
|
397 | /* switch to "regular" variant if extDict is invalidated due to maxDistance */ | |||
|
398 | if (prefixStartIndex == dictStartIndex) | |||
|
399 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); | |||
|
400 | ||||
299 | /* Search Loop */ |
|
401 | /* Search Loop */ | |
300 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ |
|
402 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ | |
301 | const size_t h = ZSTD_hashPtr(ip, hlog, mls); |
|
403 | const size_t h = ZSTD_hashPtr(ip, hlog, mls); | |
@@ -312,10 +414,10 b' static size_t ZSTD_compressBlock_fast_ex' | |||||
312 |
|
414 | |||
313 | if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) |
|
415 | if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) | |
314 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
|
416 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |
315 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
417 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |
316 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
418 | mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; | |
317 | ip++; |
|
419 | ip++; | |
318 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); |
|
420 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
319 | } else { |
|
421 | } else { | |
320 | if ( (matchIndex < dictStartIndex) || |
|
422 | if ( (matchIndex < dictStartIndex) || | |
321 | (MEM_read32(match) != MEM_read32(ip)) ) { |
|
423 | (MEM_read32(match) != MEM_read32(ip)) ) { | |
@@ -323,15 +425,15 b' static size_t ZSTD_compressBlock_fast_ex' | |||||
323 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; |
|
425 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |
324 | continue; |
|
426 | continue; | |
325 | } |
|
427 | } | |
326 | { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; |
|
428 | { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; | |
327 | const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; |
|
429 | const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; | |
328 | U32 offset; |
|
430 | U32 offset; | |
329 | mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; |
|
431 | mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; | |
330 | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
|
432 | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |
331 | offset = current - matchIndex; |
|
433 | offset = current - matchIndex; | |
332 | offset_2 = offset_1; |
|
434 | offset_2 = offset_1; | |
333 | offset_1 = offset; |
|
435 | offset_1 = offset; | |
334 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
|
436 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
335 | } } |
|
437 | } } | |
336 |
|
438 | |||
337 | /* found a match : store it */ |
|
439 | /* found a match : store it */ | |
@@ -351,7 +453,7 b' static size_t ZSTD_compressBlock_fast_ex' | |||||
351 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
|
453 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |
352 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
|
454 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; | |
353 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
|
455 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |
354 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
|
456 | U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |
355 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); |
|
457 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); | |
356 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; |
|
458 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; | |
357 | ip += repLength2; |
|
459 | ip += repLength2; | |
@@ -366,7 +468,7 b' static size_t ZSTD_compressBlock_fast_ex' | |||||
366 | rep[1] = offset_2; |
|
468 | rep[1] = offset_2; | |
367 |
|
469 | |||
368 | /* Return the last literals size */ |
|
470 | /* Return the last literals size */ | |
369 | return iend - anchor; |
|
471 | return (size_t)(iend - anchor); | |
370 | } |
|
472 | } | |
371 |
|
473 | |||
372 |
|
474 |
@@ -83,7 +83,10 b' ZSTD_insertDUBT1(ZSTD_matchState_t* ms,' | |||||
83 | U32* largerPtr = smallerPtr + 1; |
|
83 | U32* largerPtr = smallerPtr + 1; | |
84 | U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ |
|
84 | U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ | |
85 | U32 dummy32; /* to be nullified at the end */ |
|
85 | U32 dummy32; /* to be nullified at the end */ | |
86 |
U32 const window |
|
86 | U32 const windowValid = ms->window.lowLimit; | |
|
87 | U32 const maxDistance = 1U << cParams->windowLog; | |||
|
88 | U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; | |||
|
89 | ||||
87 |
|
90 | |||
88 | DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", |
|
91 | DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", | |
89 | current, dictLimit, windowLow); |
|
92 | current, dictLimit, windowLow); | |
@@ -239,7 +242,7 b' ZSTD_DUBT_findBestMatch(ZSTD_matchState_' | |||||
239 |
|
242 | |||
240 | const BYTE* const base = ms->window.base; |
|
243 | const BYTE* const base = ms->window.base; | |
241 | U32 const current = (U32)(ip-base); |
|
244 | U32 const current = (U32)(ip-base); | |
242 |
U32 const windowLow = ms->window |
|
245 | U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); | |
243 |
|
246 | |||
244 | U32* const bt = ms->chainTable; |
|
247 | U32* const bt = ms->chainTable; | |
245 | U32 const btLog = cParams->chainLog - 1; |
|
248 | U32 const btLog = cParams->chainLog - 1; | |
@@ -490,8 +493,12 b' size_t ZSTD_HcFindBestMatch_generic (' | |||||
490 | const U32 dictLimit = ms->window.dictLimit; |
|
493 | const U32 dictLimit = ms->window.dictLimit; | |
491 | const BYTE* const prefixStart = base + dictLimit; |
|
494 | const BYTE* const prefixStart = base + dictLimit; | |
492 | const BYTE* const dictEnd = dictBase + dictLimit; |
|
495 | const BYTE* const dictEnd = dictBase + dictLimit; | |
493 | const U32 lowLimit = ms->window.lowLimit; |
|
|||
494 | const U32 current = (U32)(ip-base); |
|
496 | const U32 current = (U32)(ip-base); | |
|
497 | const U32 maxDistance = 1U << cParams->windowLog; | |||
|
498 | const U32 lowestValid = ms->window.lowLimit; | |||
|
499 | const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; | |||
|
500 | const U32 isDictionary = (ms->loadedDictEnd != 0); | |||
|
501 | const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; | |||
495 | const U32 minChain = current > chainSize ? current - chainSize : 0; |
|
502 | const U32 minChain = current > chainSize ? current - chainSize : 0; | |
496 | U32 nbAttempts = 1U << cParams->searchLog; |
|
503 | U32 nbAttempts = 1U << cParams->searchLog; | |
497 | size_t ml=4-1; |
|
504 | size_t ml=4-1; | |
@@ -612,12 +619,14 b' FORCE_INLINE_TEMPLATE size_t ZSTD_HcFind' | |||||
612 | /* ******************************* |
|
619 | /* ******************************* | |
613 | * Common parser - lazy strategy |
|
620 | * Common parser - lazy strategy | |
614 | *********************************/ |
|
621 | *********************************/ | |
615 | FORCE_INLINE_TEMPLATE |
|
622 | typedef enum { search_hashChain, search_binaryTree } searchMethod_e; | |
616 | size_t ZSTD_compressBlock_lazy_generic( |
|
623 | ||
|
624 | FORCE_INLINE_TEMPLATE size_t | |||
|
625 | ZSTD_compressBlock_lazy_generic( | |||
617 | ZSTD_matchState_t* ms, seqStore_t* seqStore, |
|
626 | ZSTD_matchState_t* ms, seqStore_t* seqStore, | |
618 | U32 rep[ZSTD_REP_NUM], |
|
627 | U32 rep[ZSTD_REP_NUM], | |
619 | const void* src, size_t srcSize, |
|
628 | const void* src, size_t srcSize, | |
620 |
const |
|
629 | const searchMethod_e searchMethod, const U32 depth, | |
621 | ZSTD_dictMode_e const dictMode) |
|
630 | ZSTD_dictMode_e const dictMode) | |
622 | { |
|
631 | { | |
623 | const BYTE* const istart = (const BYTE*)src; |
|
632 | const BYTE* const istart = (const BYTE*)src; | |
@@ -633,8 +642,10 b' size_t ZSTD_compressBlock_lazy_generic(' | |||||
633 | ZSTD_matchState_t* ms, |
|
642 | ZSTD_matchState_t* ms, | |
634 | const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); |
|
643 | const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); | |
635 | searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? |
|
644 | searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? | |
636 |
(searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS |
|
645 | (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS | |
637 | (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); |
|
646 | : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : | |
|
647 | (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS | |||
|
648 | : ZSTD_HcFindBestMatch_selectMLS); | |||
638 | U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; |
|
649 | U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; | |
639 |
|
650 | |||
640 | const ZSTD_matchState_t* const dms = ms->dictMatchState; |
|
651 | const ZSTD_matchState_t* const dms = ms->dictMatchState; | |
@@ -653,7 +664,6 b' size_t ZSTD_compressBlock_lazy_generic(' | |||||
653 |
|
664 | |||
654 | /* init */ |
|
665 | /* init */ | |
655 | ip += (dictAndPrefixLength == 0); |
|
666 | ip += (dictAndPrefixLength == 0); | |
656 | ms->nextToUpdate3 = ms->nextToUpdate; |
|
|||
657 | if (dictMode == ZSTD_noDict) { |
|
667 | if (dictMode == ZSTD_noDict) { | |
658 | U32 const maxRep = (U32)(ip - prefixLowest); |
|
668 | U32 const maxRep = (U32)(ip - prefixLowest); | |
659 | if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; |
|
669 | if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; | |
@@ -844,7 +854,7 b' size_t ZSTD_compressBlock_lazy_generic(' | |||||
844 | rep[1] = offset_2 ? offset_2 : savedOffset; |
|
854 | rep[1] = offset_2 ? offset_2 : savedOffset; | |
845 |
|
855 | |||
846 | /* Return the last literals size */ |
|
856 | /* Return the last literals size */ | |
847 | return iend - anchor; |
|
857 | return (size_t)(iend - anchor); | |
848 | } |
|
858 | } | |
849 |
|
859 | |||
850 |
|
860 | |||
@@ -852,56 +862,56 b' size_t ZSTD_compressBlock_btlazy2(' | |||||
852 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
862 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
853 | void const* src, size_t srcSize) |
|
863 | void const* src, size_t srcSize) | |
854 | { |
|
864 | { | |
855 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
865 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); | |
856 | } |
|
866 | } | |
857 |
|
867 | |||
858 | size_t ZSTD_compressBlock_lazy2( |
|
868 | size_t ZSTD_compressBlock_lazy2( | |
859 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
869 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
860 | void const* src, size_t srcSize) |
|
870 | void const* src, size_t srcSize) | |
861 | { |
|
871 | { | |
862 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
872 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); | |
863 | } |
|
873 | } | |
864 |
|
874 | |||
865 | size_t ZSTD_compressBlock_lazy( |
|
875 | size_t ZSTD_compressBlock_lazy( | |
866 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
876 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
867 | void const* src, size_t srcSize) |
|
877 | void const* src, size_t srcSize) | |
868 | { |
|
878 | { | |
869 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
879 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); | |
870 | } |
|
880 | } | |
871 |
|
881 | |||
872 | size_t ZSTD_compressBlock_greedy( |
|
882 | size_t ZSTD_compressBlock_greedy( | |
873 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
883 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
874 | void const* src, size_t srcSize) |
|
884 | void const* src, size_t srcSize) | |
875 | { |
|
885 | { | |
876 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
886 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); | |
877 | } |
|
887 | } | |
878 |
|
888 | |||
879 | size_t ZSTD_compressBlock_btlazy2_dictMatchState( |
|
889 | size_t ZSTD_compressBlock_btlazy2_dictMatchState( | |
880 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
890 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
881 | void const* src, size_t srcSize) |
|
891 | void const* src, size_t srcSize) | |
882 | { |
|
892 | { | |
883 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
893 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); | |
884 | } |
|
894 | } | |
885 |
|
895 | |||
886 | size_t ZSTD_compressBlock_lazy2_dictMatchState( |
|
896 | size_t ZSTD_compressBlock_lazy2_dictMatchState( | |
887 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
897 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
888 | void const* src, size_t srcSize) |
|
898 | void const* src, size_t srcSize) | |
889 | { |
|
899 | { | |
890 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
900 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); | |
891 | } |
|
901 | } | |
892 |
|
902 | |||
893 | size_t ZSTD_compressBlock_lazy_dictMatchState( |
|
903 | size_t ZSTD_compressBlock_lazy_dictMatchState( | |
894 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
904 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
895 | void const* src, size_t srcSize) |
|
905 | void const* src, size_t srcSize) | |
896 | { |
|
906 | { | |
897 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
907 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); | |
898 | } |
|
908 | } | |
899 |
|
909 | |||
900 | size_t ZSTD_compressBlock_greedy_dictMatchState( |
|
910 | size_t ZSTD_compressBlock_greedy_dictMatchState( | |
901 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
911 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
902 | void const* src, size_t srcSize) |
|
912 | void const* src, size_t srcSize) | |
903 | { |
|
913 | { | |
904 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
914 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); | |
905 | } |
|
915 | } | |
906 |
|
916 | |||
907 |
|
917 | |||
@@ -910,7 +920,7 b' size_t ZSTD_compressBlock_lazy_extDict_g' | |||||
910 | ZSTD_matchState_t* ms, seqStore_t* seqStore, |
|
920 | ZSTD_matchState_t* ms, seqStore_t* seqStore, | |
911 | U32 rep[ZSTD_REP_NUM], |
|
921 | U32 rep[ZSTD_REP_NUM], | |
912 | const void* src, size_t srcSize, |
|
922 | const void* src, size_t srcSize, | |
913 |
const |
|
923 | const searchMethod_e searchMethod, const U32 depth) | |
914 | { |
|
924 | { | |
915 | const BYTE* const istart = (const BYTE*)src; |
|
925 | const BYTE* const istart = (const BYTE*)src; | |
916 | const BYTE* ip = istart; |
|
926 | const BYTE* ip = istart; | |
@@ -928,12 +938,11 b' size_t ZSTD_compressBlock_lazy_extDict_g' | |||||
928 | typedef size_t (*searchMax_f)( |
|
938 | typedef size_t (*searchMax_f)( | |
929 | ZSTD_matchState_t* ms, |
|
939 | ZSTD_matchState_t* ms, | |
930 | const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); |
|
940 | const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); | |
931 | searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; |
|
941 | searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; | |
932 |
|
942 | |||
933 | U32 offset_1 = rep[0], offset_2 = rep[1]; |
|
943 | U32 offset_1 = rep[0], offset_2 = rep[1]; | |
934 |
|
944 | |||
935 | /* init */ |
|
945 | /* init */ | |
936 | ms->nextToUpdate3 = ms->nextToUpdate; |
|
|||
937 | ip += (ip == prefixStart); |
|
946 | ip += (ip == prefixStart); | |
938 |
|
947 | |||
939 | /* Match Loop */ |
|
948 | /* Match Loop */ | |
@@ -1070,7 +1079,7 b' size_t ZSTD_compressBlock_lazy_extDict_g' | |||||
1070 | rep[1] = offset_2; |
|
1079 | rep[1] = offset_2; | |
1071 |
|
1080 | |||
1072 | /* Return the last literals size */ |
|
1081 | /* Return the last literals size */ | |
1073 | return iend - anchor; |
|
1082 | return (size_t)(iend - anchor); | |
1074 | } |
|
1083 | } | |
1075 |
|
1084 | |||
1076 |
|
1085 | |||
@@ -1078,7 +1087,7 b' size_t ZSTD_compressBlock_greedy_extDict' | |||||
1078 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
1087 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
1079 | void const* src, size_t srcSize) |
|
1088 | void const* src, size_t srcSize) | |
1080 | { |
|
1089 | { | |
1081 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
1090 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); | |
1082 | } |
|
1091 | } | |
1083 |
|
1092 | |||
1084 | size_t ZSTD_compressBlock_lazy_extDict( |
|
1093 | size_t ZSTD_compressBlock_lazy_extDict( | |
@@ -1086,7 +1095,7 b' size_t ZSTD_compressBlock_lazy_extDict(' | |||||
1086 | void const* src, size_t srcSize) |
|
1095 | void const* src, size_t srcSize) | |
1087 |
|
1096 | |||
1088 | { |
|
1097 | { | |
1089 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
1098 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); | |
1090 | } |
|
1099 | } | |
1091 |
|
1100 | |||
1092 | size_t ZSTD_compressBlock_lazy2_extDict( |
|
1101 | size_t ZSTD_compressBlock_lazy2_extDict( | |
@@ -1094,7 +1103,7 b' size_t ZSTD_compressBlock_lazy2_extDict(' | |||||
1094 | void const* src, size_t srcSize) |
|
1103 | void const* src, size_t srcSize) | |
1095 |
|
1104 | |||
1096 | { |
|
1105 | { | |
1097 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
1106 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); | |
1098 | } |
|
1107 | } | |
1099 |
|
1108 | |||
1100 | size_t ZSTD_compressBlock_btlazy2_extDict( |
|
1109 | size_t ZSTD_compressBlock_btlazy2_extDict( | |
@@ -1102,5 +1111,5 b' size_t ZSTD_compressBlock_btlazy2_extDic' | |||||
1102 | void const* src, size_t srcSize) |
|
1111 | void const* src, size_t srcSize) | |
1103 |
|
1112 | |||
1104 | { |
|
1113 | { | |
1105 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
1114 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); | |
1106 | } |
|
1115 | } |
@@ -19,7 +19,7 b' extern "C" {' | |||||
19 |
|
19 | |||
20 | U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); |
|
20 | U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); | |
21 |
|
21 | |||
22 |
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre |
|
22 | void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ | |
23 |
|
23 | |||
24 | size_t ZSTD_compressBlock_btlazy2( |
|
24 | size_t ZSTD_compressBlock_btlazy2( | |
25 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
25 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
@@ -429,7 +429,7 b' size_t ZSTD_ldm_generateSequences(' | |||||
429 | */ |
|
429 | */ | |
430 | assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); |
|
430 | assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); | |
431 | /* The input could be very large (in zstdmt), so it must be broken up into |
|
431 | /* The input could be very large (in zstdmt), so it must be broken up into | |
432 |
* chunks to enforce the maxim |
|
432 | * chunks to enforce the maximum distance and handle overflow correction. | |
433 | */ |
|
433 | */ | |
434 | assert(sequences->pos <= sequences->size); |
|
434 | assert(sequences->pos <= sequences->size); | |
435 | assert(sequences->size <= sequences->capacity); |
|
435 | assert(sequences->size <= sequences->capacity); | |
@@ -447,7 +447,7 b' size_t ZSTD_ldm_generateSequences(' | |||||
447 | if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { |
|
447 | if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { | |
448 | U32 const ldmHSize = 1U << params->hashLog; |
|
448 | U32 const ldmHSize = 1U << params->hashLog; | |
449 | U32 const correction = ZSTD_window_correctOverflow( |
|
449 | U32 const correction = ZSTD_window_correctOverflow( | |
450 |
&ldmState->window, /* cycleLog */ 0, maxDist, |
|
450 | &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); | |
451 | ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); |
|
451 | ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); | |
452 | } |
|
452 | } | |
453 | /* 2. We enforce the maximum offset allowed. |
|
453 | /* 2. We enforce the maximum offset allowed. |
@@ -64,9 +64,15 b' MEM_STATIC double ZSTD_fCost(U32 price)' | |||||
64 | } |
|
64 | } | |
65 | #endif |
|
65 | #endif | |
66 |
|
66 | |||
|
67 | static int ZSTD_compressedLiterals(optState_t const* const optPtr) | |||
|
68 | { | |||
|
69 | return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; | |||
|
70 | } | |||
|
71 | ||||
67 | static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) |
|
72 | static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) | |
68 | { |
|
73 | { | |
69 | optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); |
|
74 | if (ZSTD_compressedLiterals(optPtr)) | |
|
75 | optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); | |||
70 | optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); |
|
76 | optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); | |
71 | optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); |
|
77 | optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); | |
72 | optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); |
|
78 | optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); | |
@@ -99,6 +105,7 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||||
99 | const BYTE* const src, size_t const srcSize, |
|
105 | const BYTE* const src, size_t const srcSize, | |
100 | int const optLevel) |
|
106 | int const optLevel) | |
101 | { |
|
107 | { | |
|
108 | int const compressedLiterals = ZSTD_compressedLiterals(optPtr); | |||
102 | DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); |
|
109 | DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); | |
103 | optPtr->priceType = zop_dynamic; |
|
110 | optPtr->priceType = zop_dynamic; | |
104 |
|
111 | |||
@@ -113,9 +120,10 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||||
113 | /* huffman table presumed generated by dictionary */ |
|
120 | /* huffman table presumed generated by dictionary */ | |
114 | optPtr->priceType = zop_dynamic; |
|
121 | optPtr->priceType = zop_dynamic; | |
115 |
|
122 | |||
116 | assert(optPtr->litFreq != NULL); |
|
123 | if (compressedLiterals) { | |
117 | optPtr->litSum = 0; |
|
124 | unsigned lit; | |
118 | { unsigned lit; |
|
125 | assert(optPtr->litFreq != NULL); | |
|
126 | optPtr->litSum = 0; | |||
119 | for (lit=0; lit<=MaxLit; lit++) { |
|
127 | for (lit=0; lit<=MaxLit; lit++) { | |
120 | U32 const scaleLog = 11; /* scale to 2K */ |
|
128 | U32 const scaleLog = 11; /* scale to 2K */ | |
121 | U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); |
|
129 | U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); | |
@@ -163,10 +171,11 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||||
163 | } else { /* not a dictionary */ |
|
171 | } else { /* not a dictionary */ | |
164 |
|
172 | |||
165 | assert(optPtr->litFreq != NULL); |
|
173 | assert(optPtr->litFreq != NULL); | |
166 | { unsigned lit = MaxLit; |
|
174 | if (compressedLiterals) { | |
|
175 | unsigned lit = MaxLit; | |||
167 | HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ |
|
176 | HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ | |
|
177 | optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); | |||
168 | } |
|
178 | } | |
169 | optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); |
|
|||
170 |
|
179 | |||
171 | { unsigned ll; |
|
180 | { unsigned ll; | |
172 | for (ll=0; ll<=MaxLL; ll++) |
|
181 | for (ll=0; ll<=MaxLL; ll++) | |
@@ -190,7 +199,8 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||||
190 |
|
199 | |||
191 | } else { /* new block : re-use previous statistics, scaled down */ |
|
200 | } else { /* new block : re-use previous statistics, scaled down */ | |
192 |
|
201 | |||
193 | optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); |
|
202 | if (compressedLiterals) | |
|
203 | optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); | |||
194 | optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); |
|
204 | optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); | |
195 | optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); |
|
205 | optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); | |
196 | optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); |
|
206 | optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); | |
@@ -207,6 +217,10 b' static U32 ZSTD_rawLiteralsCost(const BY' | |||||
207 | int optLevel) |
|
217 | int optLevel) | |
208 | { |
|
218 | { | |
209 | if (litLength == 0) return 0; |
|
219 | if (litLength == 0) return 0; | |
|
220 | ||||
|
221 | if (!ZSTD_compressedLiterals(optPtr)) | |||
|
222 | return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ | |||
|
223 | ||||
210 | if (optPtr->priceType == zop_predef) |
|
224 | if (optPtr->priceType == zop_predef) | |
211 | return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ |
|
225 | return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ | |
212 |
|
226 | |||
@@ -241,13 +255,13 b' static U32 ZSTD_litLengthPrice(U32 const' | |||||
241 | * to provide a cost which is directly comparable to a match ending at same position */ |
|
255 | * to provide a cost which is directly comparable to a match ending at same position */ | |
242 | static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) |
|
256 | static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) | |
243 | { |
|
257 | { | |
244 | if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); |
|
258 | if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel); | |
245 |
|
259 | |||
246 | /* dynamic statistics */ |
|
260 | /* dynamic statistics */ | |
247 | { U32 const llCode = ZSTD_LLcode(litLength); |
|
261 | { U32 const llCode = ZSTD_LLcode(litLength); | |
248 | int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) |
|
262 | int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) | |
249 | + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ |
|
263 | + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ | |
250 | - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); |
|
264 | - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel); | |
251 | #if 1 |
|
265 | #if 1 | |
252 | return contribution; |
|
266 | return contribution; | |
253 | #else |
|
267 | #else | |
@@ -264,7 +278,7 b' static int ZSTD_literalsContribution(con' | |||||
264 | const optState_t* const optPtr, |
|
278 | const optState_t* const optPtr, | |
265 | int optLevel) |
|
279 | int optLevel) | |
266 | { |
|
280 | { | |
267 | int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) |
|
281 | int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) | |
268 | + ZSTD_litLengthContribution(litLength, optPtr, optLevel); |
|
282 | + ZSTD_litLengthContribution(litLength, optPtr, optLevel); | |
269 | return contribution; |
|
283 | return contribution; | |
270 | } |
|
284 | } | |
@@ -310,7 +324,8 b' static void ZSTD_updateStats(optState_t*' | |||||
310 | U32 offsetCode, U32 matchLength) |
|
324 | U32 offsetCode, U32 matchLength) | |
311 | { |
|
325 | { | |
312 | /* literals */ |
|
326 | /* literals */ | |
313 | { U32 u; |
|
327 | if (ZSTD_compressedLiterals(optPtr)) { | |
|
328 | U32 u; | |||
314 | for (u=0; u < litLength; u++) |
|
329 | for (u=0; u < litLength; u++) | |
315 | optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; |
|
330 | optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; | |
316 | optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; |
|
331 | optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; | |
@@ -357,13 +372,15 b' MEM_STATIC U32 ZSTD_readMINMATCH(const v' | |||||
357 |
|
372 | |||
358 | /* Update hashTable3 up to ip (excluded) |
|
373 | /* Update hashTable3 up to ip (excluded) | |
359 | Assumption : always within prefix (i.e. not within extDict) */ |
|
374 | Assumption : always within prefix (i.e. not within extDict) */ | |
360 |
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, |
|
375 | static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, | |
|
376 | U32* nextToUpdate3, | |||
|
377 | const BYTE* const ip) | |||
361 | { |
|
378 | { | |
362 | U32* const hashTable3 = ms->hashTable3; |
|
379 | U32* const hashTable3 = ms->hashTable3; | |
363 | U32 const hashLog3 = ms->hashLog3; |
|
380 | U32 const hashLog3 = ms->hashLog3; | |
364 | const BYTE* const base = ms->window.base; |
|
381 | const BYTE* const base = ms->window.base; | |
365 |
U32 idx = |
|
382 | U32 idx = *nextToUpdate3; | |
366 |
U32 const target |
|
383 | U32 const target = (U32)(ip - base); | |
367 | size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); |
|
384 | size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); | |
368 | assert(hashLog3 > 0); |
|
385 | assert(hashLog3 > 0); | |
369 |
|
386 | |||
@@ -372,6 +389,7 b' static U32 ZSTD_insertAndFindFirstIndexH' | |||||
372 | idx++; |
|
389 | idx++; | |
373 | } |
|
390 | } | |
374 |
|
391 | |||
|
392 | *nextToUpdate3 = target; | |||
375 | return hashTable3[hash3]; |
|
393 | return hashTable3[hash3]; | |
376 | } |
|
394 | } | |
377 |
|
395 | |||
@@ -488,9 +506,11 b' static U32 ZSTD_insertBt1(' | |||||
488 | } } |
|
506 | } } | |
489 |
|
507 | |||
490 | *smallerPtr = *largerPtr = 0; |
|
508 | *smallerPtr = *largerPtr = 0; | |
491 | if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ |
|
509 | { U32 positions = 0; | |
492 | assert(matchEndIdx > current + 8); |
|
510 | if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ | |
493 |
|
|
511 | assert(matchEndIdx > current + 8); | |
|
512 | return MAX(positions, matchEndIdx - (current + 8)); | |||
|
513 | } | |||
494 | } |
|
514 | } | |
495 |
|
515 | |||
496 | FORCE_INLINE_TEMPLATE |
|
516 | FORCE_INLINE_TEMPLATE | |
@@ -505,8 +525,13 b' void ZSTD_updateTree_internal(' | |||||
505 | DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", |
|
525 | DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", | |
506 | idx, target, dictMode); |
|
526 | idx, target, dictMode); | |
507 |
|
527 | |||
508 | while(idx < target) |
|
528 | while(idx < target) { | |
509 |
|
|
529 | U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); | |
|
530 | assert(idx < (U32)(idx + forward)); | |||
|
531 | idx += forward; | |||
|
532 | } | |||
|
533 | assert((size_t)(ip - base) <= (size_t)(U32)(-1)); | |||
|
534 | assert((size_t)(iend - base) <= (size_t)(U32)(-1)); | |||
510 | ms->nextToUpdate = target; |
|
535 | ms->nextToUpdate = target; | |
511 | } |
|
536 | } | |
512 |
|
537 | |||
@@ -516,11 +541,12 b' void ZSTD_updateTree(ZSTD_matchState_t* ' | |||||
516 |
|
541 | |||
517 | FORCE_INLINE_TEMPLATE |
|
542 | FORCE_INLINE_TEMPLATE | |
518 | U32 ZSTD_insertBtAndGetAllMatches ( |
|
543 | U32 ZSTD_insertBtAndGetAllMatches ( | |
|
544 | ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ | |||
519 | ZSTD_matchState_t* ms, |
|
545 | ZSTD_matchState_t* ms, | |
|
546 | U32* nextToUpdate3, | |||
520 | const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, |
|
547 | const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, | |
521 | U32 rep[ZSTD_REP_NUM], |
|
548 | const U32 rep[ZSTD_REP_NUM], | |
522 | U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ |
|
549 | U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ | |
523 | ZSTD_match_t* matches, |
|
|||
524 | const U32 lengthToBeat, |
|
550 | const U32 lengthToBeat, | |
525 | U32 const mls /* template */) |
|
551 | U32 const mls /* template */) | |
526 | { |
|
552 | { | |
@@ -541,8 +567,8 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||||
541 | U32 const dictLimit = ms->window.dictLimit; |
|
567 | U32 const dictLimit = ms->window.dictLimit; | |
542 | const BYTE* const dictEnd = dictBase + dictLimit; |
|
568 | const BYTE* const dictEnd = dictBase + dictLimit; | |
543 | const BYTE* const prefixStart = base + dictLimit; |
|
569 | const BYTE* const prefixStart = base + dictLimit; | |
544 | U32 const btLow = btMask >= current ? 0 : current - btMask; |
|
570 | U32 const btLow = (btMask >= current) ? 0 : current - btMask; | |
545 | U32 const windowLow = ms->window.lowLimit; |
|
571 | U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); | |
546 | U32 const matchLow = windowLow ? windowLow : 1; |
|
572 | U32 const matchLow = windowLow ? windowLow : 1; | |
547 | U32* smallerPtr = bt + 2*(current&btMask); |
|
573 | U32* smallerPtr = bt + 2*(current&btMask); | |
548 | U32* largerPtr = bt + 2*(current&btMask) + 1; |
|
574 | U32* largerPtr = bt + 2*(current&btMask) + 1; | |
@@ -612,7 +638,7 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||||
612 |
|
638 | |||
613 | /* HC3 match finder */ |
|
639 | /* HC3 match finder */ | |
614 | if ((mls == 3) /*static*/ && (bestLength < mls)) { |
|
640 | if ((mls == 3) /*static*/ && (bestLength < mls)) { | |
615 | U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); |
|
641 | U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); | |
616 | if ((matchIndex3 >= matchLow) |
|
642 | if ((matchIndex3 >= matchLow) | |
617 | & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { |
|
643 | & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { | |
618 | size_t mlen; |
|
644 | size_t mlen; | |
@@ -638,9 +664,7 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||||
638 | (ip+mlen == iLimit) ) { /* best possible length */ |
|
664 | (ip+mlen == iLimit) ) { /* best possible length */ | |
639 | ms->nextToUpdate = current+1; /* skip insertion */ |
|
665 | ms->nextToUpdate = current+1; /* skip insertion */ | |
640 | return 1; |
|
666 | return 1; | |
641 | } |
|
667 | } } } | |
642 | } |
|
|||
643 | } |
|
|||
644 | /* no dictMatchState lookup: dicts don't have a populated HC3 table */ |
|
668 | /* no dictMatchState lookup: dicts don't have a populated HC3 table */ | |
645 | } |
|
669 | } | |
646 |
|
670 | |||
@@ -648,19 +672,21 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||||
648 |
|
672 | |||
649 | while (nbCompares-- && (matchIndex >= matchLow)) { |
|
673 | while (nbCompares-- && (matchIndex >= matchLow)) { | |
650 | U32* const nextPtr = bt + 2*(matchIndex & btMask); |
|
674 | U32* const nextPtr = bt + 2*(matchIndex & btMask); | |
|
675 | const BYTE* match; | |||
651 | size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ |
|
676 | size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ | |
652 | const BYTE* match; |
|
|||
653 | assert(current > matchIndex); |
|
677 | assert(current > matchIndex); | |
654 |
|
678 | |||
655 | if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { |
|
679 | if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { | |
656 | assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ |
|
680 | assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ | |
657 | match = base + matchIndex; |
|
681 | match = base + matchIndex; | |
|
682 | if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ | |||
658 | matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); |
|
683 | matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); | |
659 | } else { |
|
684 | } else { | |
660 | match = dictBase + matchIndex; |
|
685 | match = dictBase + matchIndex; | |
|
686 | assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ | |||
661 | matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); |
|
687 | matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); | |
662 | if (matchIndex+matchLength >= dictLimit) |
|
688 | if (matchIndex+matchLength >= dictLimit) | |
663 | match = base + matchIndex; /* prepare for match[matchLength] */ |
|
689 | match = base + matchIndex; /* prepare for match[matchLength] read */ | |
664 | } |
|
690 | } | |
665 |
|
691 | |||
666 | if (matchLength > bestLength) { |
|
692 | if (matchLength > bestLength) { | |
@@ -745,10 +771,13 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||||
745 |
|
771 | |||
746 |
|
772 | |||
747 | FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( |
|
773 | FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( | |
|
774 | ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ | |||
748 | ZSTD_matchState_t* ms, |
|
775 | ZSTD_matchState_t* ms, | |
|
776 | U32* nextToUpdate3, | |||
749 | const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, |
|
777 | const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, | |
750 |
U32 rep[ZSTD_REP_NUM], |
|
778 | const U32 rep[ZSTD_REP_NUM], | |
751 |
|
|
779 | U32 const ll0, | |
|
780 | U32 const lengthToBeat) | |||
752 | { |
|
781 | { | |
753 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
|
782 | const ZSTD_compressionParameters* const cParams = &ms->cParams; | |
754 | U32 const matchLengthSearch = cParams->minMatch; |
|
783 | U32 const matchLengthSearch = cParams->minMatch; | |
@@ -757,12 +786,12 b' FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllM' | |||||
757 | ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); |
|
786 | ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); | |
758 | switch(matchLengthSearch) |
|
787 | switch(matchLengthSearch) | |
759 | { |
|
788 | { | |
760 |
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
789 | case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); | |
761 | default : |
|
790 | default : | |
762 |
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
791 | case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); | |
763 |
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
792 | case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); | |
764 | case 7 : |
|
793 | case 7 : | |
765 |
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
794 | case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); | |
766 | } |
|
795 | } | |
767 | } |
|
796 | } | |
768 |
|
797 | |||
@@ -838,6 +867,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||||
838 |
|
867 | |||
839 | U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); |
|
868 | U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); | |
840 | U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; |
|
869 | U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; | |
|
870 | U32 nextToUpdate3 = ms->nextToUpdate; | |||
841 |
|
871 | |||
842 | ZSTD_optimal_t* const opt = optStatePtr->priceTable; |
|
872 | ZSTD_optimal_t* const opt = optStatePtr->priceTable; | |
843 | ZSTD_match_t* const matches = optStatePtr->matchTable; |
|
873 | ZSTD_match_t* const matches = optStatePtr->matchTable; | |
@@ -847,7 +877,6 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||||
847 | DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", |
|
877 | DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", | |
848 | (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); |
|
878 | (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); | |
849 | assert(optLevel <= 2); |
|
879 | assert(optLevel <= 2); | |
850 | ms->nextToUpdate3 = ms->nextToUpdate; |
|
|||
851 | ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); |
|
880 | ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); | |
852 | ip += (ip==prefixStart); |
|
881 | ip += (ip==prefixStart); | |
853 |
|
882 | |||
@@ -858,7 +887,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||||
858 | /* find first match */ |
|
887 | /* find first match */ | |
859 | { U32 const litlen = (U32)(ip - anchor); |
|
888 | { U32 const litlen = (U32)(ip - anchor); | |
860 | U32 const ll0 = !litlen; |
|
889 | U32 const ll0 = !litlen; | |
861 |
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, |
|
890 | U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); | |
862 | if (!nbMatches) { ip++; continue; } |
|
891 | if (!nbMatches) { ip++; continue; } | |
863 |
|
892 | |||
864 | /* initialize opt[0] */ |
|
893 | /* initialize opt[0] */ | |
@@ -870,7 +899,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||||
870 | /* large match -> immediate encoding */ |
|
899 | /* large match -> immediate encoding */ | |
871 | { U32 const maxML = matches[nbMatches-1].len; |
|
900 | { U32 const maxML = matches[nbMatches-1].len; | |
872 | U32 const maxOffset = matches[nbMatches-1].off; |
|
901 | U32 const maxOffset = matches[nbMatches-1].off; | |
873 | DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie", |
|
902 | DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", | |
874 | nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); |
|
903 | nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); | |
875 |
|
904 | |||
876 | if (maxML > sufficient_len) { |
|
905 | if (maxML > sufficient_len) { | |
@@ -955,7 +984,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||||
955 | U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; |
|
984 | U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; | |
956 | U32 const previousPrice = opt[cur].price; |
|
985 | U32 const previousPrice = opt[cur].price; | |
957 | U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); |
|
986 | U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); | |
958 |
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, |
|
987 | U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); | |
959 | U32 matchNb; |
|
988 | U32 matchNb; | |
960 | if (!nbMatches) { |
|
989 | if (!nbMatches) { | |
961 | DEBUGLOG(7, "rPos:%u : no match found", cur); |
|
990 | DEBUGLOG(7, "rPos:%u : no match found", cur); | |
@@ -1079,7 +1108,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||||
1079 | } /* while (ip < ilimit) */ |
|
1108 | } /* while (ip < ilimit) */ | |
1080 |
|
1109 | |||
1081 | /* Return the last literals size */ |
|
1110 | /* Return the last literals size */ | |
1082 | return iend - anchor; |
|
1111 | return (size_t)(iend - anchor); | |
1083 | } |
|
1112 | } | |
1084 |
|
1113 | |||
1085 |
|
1114 | |||
@@ -1108,7 +1137,8 b' static U32 ZSTD_upscaleStat(unsigned* ta' | |||||
1108 | /* used in 2-pass strategy */ |
|
1137 | /* used in 2-pass strategy */ | |
1109 | MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) |
|
1138 | MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) | |
1110 | { |
|
1139 | { | |
1111 | optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); |
|
1140 | if (ZSTD_compressedLiterals(optPtr)) | |
|
1141 | optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); | |||
1112 | optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); |
|
1142 | optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); | |
1113 | optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); |
|
1143 | optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); | |
1114 | optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); |
|
1144 | optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); | |
@@ -1117,7 +1147,7 b' MEM_STATIC void ZSTD_upscaleStats(optSta' | |||||
1117 | /* ZSTD_initStats_ultra(): |
|
1147 | /* ZSTD_initStats_ultra(): | |
1118 | * make a first compression pass, just to seed stats with more accurate starting values. |
|
1148 | * make a first compression pass, just to seed stats with more accurate starting values. | |
1119 | * only works on first block, with no dictionary and no ldm. |
|
1149 | * only works on first block, with no dictionary and no ldm. | |
1120 |
* this function cannot error, hence its con |
|
1150 | * this function cannot error, hence its contract must be respected. | |
1121 | */ |
|
1151 | */ | |
1122 | static void |
|
1152 | static void | |
1123 | ZSTD_initStats_ultra(ZSTD_matchState_t* ms, |
|
1153 | ZSTD_initStats_ultra(ZSTD_matchState_t* ms, | |
@@ -1142,7 +1172,6 b' ZSTD_initStats_ultra(ZSTD_matchState_t* ' | |||||
1142 | ms->window.dictLimit += (U32)srcSize; |
|
1172 | ms->window.dictLimit += (U32)srcSize; | |
1143 | ms->window.lowLimit = ms->window.dictLimit; |
|
1173 | ms->window.lowLimit = ms->window.dictLimit; | |
1144 | ms->nextToUpdate = ms->window.dictLimit; |
|
1174 | ms->nextToUpdate = ms->window.dictLimit; | |
1145 | ms->nextToUpdate3 = ms->window.dictLimit; |
|
|||
1146 |
|
1175 | |||
1147 | /* re-inforce weight of collected statistics */ |
|
1176 | /* re-inforce weight of collected statistics */ | |
1148 | ZSTD_upscaleStats(&ms->opt); |
|
1177 | ZSTD_upscaleStats(&ms->opt); |
@@ -22,6 +22,7 b'' | |||||
22 | /* ====== Dependencies ====== */ |
|
22 | /* ====== Dependencies ====== */ | |
23 | #include <string.h> /* memcpy, memset */ |
|
23 | #include <string.h> /* memcpy, memset */ | |
24 | #include <limits.h> /* INT_MAX, UINT_MAX */ |
|
24 | #include <limits.h> /* INT_MAX, UINT_MAX */ | |
|
25 | #include "mem.h" /* MEM_STATIC */ | |||
25 | #include "pool.h" /* threadpool */ |
|
26 | #include "pool.h" /* threadpool */ | |
26 | #include "threading.h" /* mutex */ |
|
27 | #include "threading.h" /* mutex */ | |
27 | #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ |
|
28 | #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ | |
@@ -456,7 +457,7 b' typedef struct {' | |||||
456 | * Must be acquired after the main mutex when acquiring both. |
|
457 | * Must be acquired after the main mutex when acquiring both. | |
457 | */ |
|
458 | */ | |
458 | ZSTD_pthread_mutex_t ldmWindowMutex; |
|
459 | ZSTD_pthread_mutex_t ldmWindowMutex; | |
459 |
ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is u |
|
460 | ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */ | |
460 | ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ |
|
461 | ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ | |
461 | } serialState_t; |
|
462 | } serialState_t; | |
462 |
|
463 | |||
@@ -647,7 +648,7 b' static void ZSTDMT_compressionJob(void* ' | |||||
647 | buffer_t dstBuff = job->dstBuff; |
|
648 | buffer_t dstBuff = job->dstBuff; | |
648 | size_t lastCBlockSize = 0; |
|
649 | size_t lastCBlockSize = 0; | |
649 |
|
650 | |||
650 |
/* res |
|
651 | /* resources */ | |
651 | if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); |
|
652 | if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); | |
652 | if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ |
|
653 | if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ | |
653 | dstBuff = ZSTDMT_getBuffer(job->bufPool); |
|
654 | dstBuff = ZSTDMT_getBuffer(job->bufPool); | |
@@ -672,7 +673,7 b' static void ZSTDMT_compressionJob(void* ' | |||||
672 | if (ZSTD_isError(initError)) JOB_ERROR(initError); |
|
673 | if (ZSTD_isError(initError)) JOB_ERROR(initError); | |
673 | } else { /* srcStart points at reloaded section */ |
|
674 | } else { /* srcStart points at reloaded section */ | |
674 | U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; |
|
675 | U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; | |
675 | { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); |
|
676 | { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); | |
676 | if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); |
|
677 | if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); | |
677 | } |
|
678 | } | |
678 | { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, |
|
679 | { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, | |
@@ -864,14 +865,10 b' static size_t ZSTDMT_expandJobsTable (ZS' | |||||
864 | * Internal use only */ |
|
865 | * Internal use only */ | |
865 | size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) |
|
866 | size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) | |
866 | { |
|
867 | { | |
867 | if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX; |
|
868 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); | |
868 | params->nbWorkers = nbWorkers; |
|
|||
869 | params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT; |
|
|||
870 | params->jobSize = 0; |
|
|||
871 | return nbWorkers; |
|
|||
872 | } |
|
869 | } | |
873 |
|
870 | |||
874 | ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) |
|
871 | MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem) | |
875 | { |
|
872 | { | |
876 | ZSTDMT_CCtx* mtctx; |
|
873 | ZSTDMT_CCtx* mtctx; | |
877 | U32 nbJobs = nbWorkers + 2; |
|
874 | U32 nbJobs = nbWorkers + 2; | |
@@ -906,6 +903,17 b' ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(' | |||||
906 | return mtctx; |
|
903 | return mtctx; | |
907 | } |
|
904 | } | |
908 |
|
905 | |||
|
906 | ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) | |||
|
907 | { | |||
|
908 | #ifdef ZSTD_MULTITHREAD | |||
|
909 | return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem); | |||
|
910 | #else | |||
|
911 | (void)nbWorkers; | |||
|
912 | (void)cMem; | |||
|
913 | return NULL; | |||
|
914 | #endif | |||
|
915 | } | |||
|
916 | ||||
909 | ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers) |
|
917 | ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers) | |
910 | { |
|
918 | { | |
911 | return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem); |
|
919 | return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem); | |
@@ -986,26 +994,13 b' ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_' | |||||
986 | { |
|
994 | { | |
987 | case ZSTDMT_p_jobSize : |
|
995 | case ZSTDMT_p_jobSize : | |
988 | DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value); |
|
996 | DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value); | |
989 | if ( value != 0 /* default */ |
|
997 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value); | |
990 | && value < ZSTDMT_JOBSIZE_MIN) |
|
|||
991 | value = ZSTDMT_JOBSIZE_MIN; |
|
|||
992 | assert(value >= 0); |
|
|||
993 | if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX; |
|
|||
994 | params->jobSize = value; |
|
|||
995 | return value; |
|
|||
996 |
|
||||
997 | case ZSTDMT_p_overlapLog : |
|
998 | case ZSTDMT_p_overlapLog : | |
998 | DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value); |
|
999 | DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value); | |
999 | if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN; |
|
1000 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value); | |
1000 | if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX; |
|
|||
1001 | params->overlapLog = value; |
|
|||
1002 | return value; |
|
|||
1003 |
|
||||
1004 | case ZSTDMT_p_rsyncable : |
|
1001 | case ZSTDMT_p_rsyncable : | |
1005 | value = (value != 0); |
|
1002 | DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value); | |
1006 | params->rsyncable = value; |
|
1003 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value); | |
1007 | return value; |
|
|||
1008 |
|
||||
1009 | default : |
|
1004 | default : | |
1010 | return ERROR(parameter_unsupported); |
|
1005 | return ERROR(parameter_unsupported); | |
1011 | } |
|
1006 | } | |
@@ -1021,32 +1016,29 b' size_t ZSTDMT_getMTCtxParameter(ZSTDMT_C' | |||||
1021 | { |
|
1016 | { | |
1022 | switch (parameter) { |
|
1017 | switch (parameter) { | |
1023 | case ZSTDMT_p_jobSize: |
|
1018 | case ZSTDMT_p_jobSize: | |
1024 | assert(mtctx->params.jobSize <= INT_MAX); |
|
1019 | return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value); | |
1025 | *value = (int)(mtctx->params.jobSize); |
|
|||
1026 | break; |
|
|||
1027 | case ZSTDMT_p_overlapLog: |
|
1020 | case ZSTDMT_p_overlapLog: | |
1028 | *value = mtctx->params.overlapLog; |
|
1021 | return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value); | |
1029 | break; |
|
|||
1030 | case ZSTDMT_p_rsyncable: |
|
1022 | case ZSTDMT_p_rsyncable: | |
1031 | *value = mtctx->params.rsyncable; |
|
1023 | return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value); | |
1032 | break; |
|
|||
1033 | default: |
|
1024 | default: | |
1034 | return ERROR(parameter_unsupported); |
|
1025 | return ERROR(parameter_unsupported); | |
1035 | } |
|
1026 | } | |
1036 | return 0; |
|
|||
1037 | } |
|
1027 | } | |
1038 |
|
1028 | |||
1039 | /* Sets parameters relevant to the compression job, |
|
1029 | /* Sets parameters relevant to the compression job, | |
1040 | * initializing others to default values. */ |
|
1030 | * initializing others to default values. */ | |
1041 | static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) |
|
1031 | static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) | |
1042 | { |
|
1032 | { | |
1043 | ZSTD_CCtx_params jobParams; |
|
1033 | ZSTD_CCtx_params jobParams = params; | |
1044 | memset(&jobParams, 0, sizeof(jobParams)); |
|
1034 | /* Clear parameters related to multithreading */ | |
1045 |
|
1035 | jobParams.forceWindow = 0; | ||
1046 | jobParams.cParams = params.cParams; |
|
1036 | jobParams.nbWorkers = 0; | |
1047 | jobParams.fParams = params.fParams; |
|
1037 | jobParams.jobSize = 0; | |
1048 | jobParams.compressionLevel = params.compressionLevel; |
|
1038 | jobParams.overlapLog = 0; | |
1049 |
|
1039 | jobParams.rsyncable = 0; | ||
|
1040 | memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t)); | |||
|
1041 | memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem)); | |||
1050 | return jobParams; |
|
1042 | return jobParams; | |
1051 | } |
|
1043 | } | |
1052 |
|
1044 | |||
@@ -1056,7 +1048,7 b' static ZSTD_CCtx_params ZSTDMT_initJobCC' | |||||
1056 | static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) |
|
1048 | static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) | |
1057 | { |
|
1049 | { | |
1058 | if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); |
|
1050 | if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); | |
1059 |
|
|
1051 | FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); | |
1060 | mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); |
|
1052 | mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); | |
1061 | if (mtctx->bufPool == NULL) return ERROR(memory_allocation); |
|
1053 | if (mtctx->bufPool == NULL) return ERROR(memory_allocation); | |
1062 | mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); |
|
1054 | mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); | |
@@ -1137,9 +1129,14 b' size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt' | |||||
1137 | size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; |
|
1129 | size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; | |
1138 | size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; |
|
1130 | size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; | |
1139 | assert(flushed <= produced); |
|
1131 | assert(flushed <= produced); | |
|
1132 | assert(jobPtr->consumed <= jobPtr->src.size); | |||
1140 | toFlush = produced - flushed; |
|
1133 | toFlush = produced - flushed; | |
1141 | if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) { |
|
1134 | /* if toFlush==0, nothing is available to flush. | |
1142 | /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */ |
|
1135 | * However, jobID is expected to still be active: | |
|
1136 | * if jobID was already completed and fully flushed, | |||
|
1137 | * ZSTDMT_flushProduced() should have already moved onto next job. | |||
|
1138 | * Therefore, some input has not yet been consumed. */ | |||
|
1139 | if (toFlush==0) { | |||
1143 | assert(jobPtr->consumed < jobPtr->src.size); |
|
1140 | assert(jobPtr->consumed < jobPtr->src.size); | |
1144 | } |
|
1141 | } | |
1145 | } |
|
1142 | } | |
@@ -1156,12 +1153,16 b' size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt' | |||||
1156 |
|
1153 | |||
1157 | static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) |
|
1154 | static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) | |
1158 | { |
|
1155 | { | |
1159 | if (params.ldmParams.enableLdm) |
|
1156 | unsigned jobLog; | |
|
1157 | if (params.ldmParams.enableLdm) { | |||
1160 | /* In Long Range Mode, the windowLog is typically oversized. |
|
1158 | /* In Long Range Mode, the windowLog is typically oversized. | |
1161 | * In which case, it's preferable to determine the jobSize |
|
1159 | * In which case, it's preferable to determine the jobSize | |
1162 | * based on chainLog instead. */ |
|
1160 | * based on chainLog instead. */ | |
1163 |
|
|
1161 | jobLog = MAX(21, params.cParams.chainLog + 4); | |
1164 | return MAX(20, params.cParams.windowLog + 2); |
|
1162 | } else { | |
|
1163 | jobLog = MAX(20, params.cParams.windowLog + 2); | |||
|
1164 | } | |||
|
1165 | return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); | |||
1165 | } |
|
1166 | } | |
1166 |
|
1167 | |||
1167 | static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) |
|
1168 | static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) | |
@@ -1205,7 +1206,7 b' static size_t ZSTDMT_computeOverlapSize(' | |||||
1205 | ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) |
|
1206 | ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) | |
1206 | - overlapRLog; |
|
1207 | - overlapRLog; | |
1207 | } |
|
1208 | } | |
1208 |
assert(0 <= ovLog && ovLog <= |
|
1209 | assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); | |
1209 | DEBUGLOG(4, "overlapLog : %i", params.overlapLog); |
|
1210 | DEBUGLOG(4, "overlapLog : %i", params.overlapLog); | |
1210 | DEBUGLOG(4, "overlap size : %i", 1 << ovLog); |
|
1211 | DEBUGLOG(4, "overlap size : %i", 1 << ovLog); | |
1211 | return (ovLog==0) ? 0 : (size_t)1 << ovLog; |
|
1212 | return (ovLog==0) ? 0 : (size_t)1 << ovLog; | |
@@ -1263,7 +1264,7 b' static size_t ZSTDMT_compress_advanced_i' | |||||
1263 | if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) |
|
1264 | if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) | |
1264 | return ERROR(memory_allocation); |
|
1265 | return ERROR(memory_allocation); | |
1265 |
|
1266 | |||
1266 |
|
|
1267 | FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ | |
1267 |
|
1268 | |||
1268 | { unsigned u; |
|
1269 | { unsigned u; | |
1269 | for (u=0; u<nbJobs; u++) { |
|
1270 | for (u=0; u<nbJobs; u++) { | |
@@ -1396,10 +1397,10 b' size_t ZSTDMT_initCStream_internal(' | |||||
1396 |
|
1397 | |||
1397 | /* init */ |
|
1398 | /* init */ | |
1398 | if (params.nbWorkers != mtctx->params.nbWorkers) |
|
1399 | if (params.nbWorkers != mtctx->params.nbWorkers) | |
1399 |
|
|
1400 | FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); | |
1400 |
|
1401 | |||
1401 | if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; |
|
1402 | if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; | |
1402 | if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; |
|
1403 | if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; | |
1403 |
|
1404 | |||
1404 | mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ |
|
1405 | mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ | |
1405 | if (mtctx->singleBlockingThread) { |
|
1406 | if (mtctx->singleBlockingThread) { | |
@@ -1440,6 +1441,8 b' size_t ZSTDMT_initCStream_internal(' | |||||
1440 | if (mtctx->targetSectionSize == 0) { |
|
1441 | if (mtctx->targetSectionSize == 0) { | |
1441 | mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); |
|
1442 | mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); | |
1442 | } |
|
1443 | } | |
|
1444 | assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); | |||
|
1445 | ||||
1443 | if (params.rsyncable) { |
|
1446 | if (params.rsyncable) { | |
1444 | /* Aim for the targetsectionSize as the average job size. */ |
|
1447 | /* Aim for the targetsectionSize as the average job size. */ | |
1445 | U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); |
|
1448 | U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); | |
@@ -1547,7 +1550,7 b' size_t ZSTDMT_initCStream(ZSTDMT_CCtx* m' | |||||
1547 | /* ZSTDMT_writeLastEmptyBlock() |
|
1550 | /* ZSTDMT_writeLastEmptyBlock() | |
1548 | * Write a single empty block with an end-of-frame to finish a frame. |
|
1551 | * Write a single empty block with an end-of-frame to finish a frame. | |
1549 | * Job must be created from streaming variant. |
|
1552 | * Job must be created from streaming variant. | |
1550 |
* This function is always successful |
|
1553 | * This function is always successful if expected conditions are fulfilled. | |
1551 | */ |
|
1554 | */ | |
1552 | static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) |
|
1555 | static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) | |
1553 | { |
|
1556 | { | |
@@ -1987,7 +1990,7 b' size_t ZSTDMT_compressStream_generic(ZST' | |||||
1987 | assert(input->pos <= input->size); |
|
1990 | assert(input->pos <= input->size); | |
1988 |
|
1991 | |||
1989 | if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ |
|
1992 | if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ | |
1990 |
return ZSTD_compressStream |
|
1993 | return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp); | |
1991 | } |
|
1994 | } | |
1992 |
|
1995 | |||
1993 | if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { |
|
1996 | if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { | |
@@ -2051,7 +2054,7 b' size_t ZSTDMT_compressStream_generic(ZST' | |||||
2051 | || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ |
|
2054 | || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ | |
2052 | size_t const jobSize = mtctx->inBuff.filled; |
|
2055 | size_t const jobSize = mtctx->inBuff.filled; | |
2053 | assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); |
|
2056 | assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); | |
2054 |
|
|
2057 | FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); | |
2055 | } |
|
2058 | } | |
2056 |
|
2059 | |||
2057 | /* check for potential compressed data ready to be flushed */ |
|
2060 | /* check for potential compressed data ready to be flushed */ | |
@@ -2065,7 +2068,7 b' size_t ZSTDMT_compressStream_generic(ZST' | |||||
2065 |
|
2068 | |||
2066 | size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) |
|
2069 | size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) | |
2067 | { |
|
2070 | { | |
2068 |
|
|
2071 | FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); | |
2069 |
|
2072 | |||
2070 | /* recommended next input size : fill current input buffer */ |
|
2073 | /* recommended next input size : fill current input buffer */ | |
2071 | return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ |
|
2074 | return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ | |
@@ -2082,7 +2085,7 b' static size_t ZSTDMT_flushStream_interna' | |||||
2082 | || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ |
|
2085 | || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ | |
2083 | DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", |
|
2086 | DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", | |
2084 | (U32)srcSize, (U32)endFrame); |
|
2087 | (U32)srcSize, (U32)endFrame); | |
2085 |
|
|
2088 | FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); | |
2086 | } |
|
2089 | } | |
2087 |
|
2090 | |||
2088 | /* check if there is any data available to flush */ |
|
2091 | /* check if there is any data available to flush */ |
@@ -17,10 +17,25 b'' | |||||
17 |
|
17 | |||
18 |
|
18 | |||
19 | /* Note : This is an internal API. |
|
19 | /* Note : This is an internal API. | |
20 |
* |
|
20 | * These APIs used to be exposed with ZSTDLIB_API, | |
21 | * because it used to be the only way to invoke MT compression. |
|
21 | * because it used to be the only way to invoke MT compression. | |
22 |
* Now, it's recommended to use ZSTD_compress |
|
22 | * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2() | |
23 | * These methods will stop being exposed in a future version */ |
|
23 | * instead. | |
|
24 | * | |||
|
25 | * If you depend on these APIs and can't switch, then define | |||
|
26 | * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library. | |||
|
27 | * However, we may completely remove these functions in a future | |||
|
28 | * release, so please switch soon. | |||
|
29 | * | |||
|
30 | * This API requires ZSTD_MULTITHREAD to be defined during compilation, | |||
|
31 | * otherwise ZSTDMT_createCCtx*() will fail. | |||
|
32 | */ | |||
|
33 | ||||
|
34 | #ifdef ZSTD_LEGACY_MULTITHREADED_API | |||
|
35 | # define ZSTDMT_API ZSTDLIB_API | |||
|
36 | #else | |||
|
37 | # define ZSTDMT_API | |||
|
38 | #endif | |||
24 |
|
39 | |||
25 | /* === Dependencies === */ |
|
40 | /* === Dependencies === */ | |
26 | #include <stddef.h> /* size_t */ |
|
41 | #include <stddef.h> /* size_t */ | |
@@ -35,22 +50,25 b'' | |||||
35 | #ifndef ZSTDMT_JOBSIZE_MIN |
|
50 | #ifndef ZSTDMT_JOBSIZE_MIN | |
36 | # define ZSTDMT_JOBSIZE_MIN (1 MB) |
|
51 | # define ZSTDMT_JOBSIZE_MIN (1 MB) | |
37 | #endif |
|
52 | #endif | |
|
53 | #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) | |||
38 | #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) |
|
54 | #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) | |
39 |
|
55 | |||
40 |
|
56 | |||
41 | /* === Memory management === */ |
|
57 | /* === Memory management === */ | |
42 | typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; |
|
58 | typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; | |
43 | ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); |
|
59 | /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ | |
44 |
ZSTD |
|
60 | ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); | |
|
61 | /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ | |||
|
62 | ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, | |||
45 | ZSTD_customMem cMem); |
|
63 | ZSTD_customMem cMem); | |
46 |
ZSTD |
|
64 | ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); | |
47 |
|
65 | |||
48 |
ZSTD |
|
66 | ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); | |
49 |
|
67 | |||
50 |
|
68 | |||
51 | /* === Simple one-pass compression function === */ |
|
69 | /* === Simple one-pass compression function === */ | |
52 |
|
70 | |||
53 |
ZSTD |
|
71 | ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, | |
54 | void* dst, size_t dstCapacity, |
|
72 | void* dst, size_t dstCapacity, | |
55 | const void* src, size_t srcSize, |
|
73 | const void* src, size_t srcSize, | |
56 | int compressionLevel); |
|
74 | int compressionLevel); | |
@@ -59,31 +77,31 b' ZSTDLIB_API size_t ZSTDMT_compressCCtx(Z' | |||||
59 |
|
77 | |||
60 | /* === Streaming functions === */ |
|
78 | /* === Streaming functions === */ | |
61 |
|
79 | |||
62 |
ZSTD |
|
80 | ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); | |
63 |
ZSTD |
|
81 | ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ | |
64 |
|
82 | |||
65 |
ZSTD |
|
83 | ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); | |
66 |
ZSTD |
|
84 | ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |
67 |
|
85 | |||
68 |
ZSTD |
|
86 | ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |
69 |
ZSTD |
|
87 | ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |
70 |
|
88 | |||
71 |
|
89 | |||
72 | /* === Advanced functions and parameters === */ |
|
90 | /* === Advanced functions and parameters === */ | |
73 |
|
91 | |||
74 |
ZSTD |
|
92 | ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, | |
75 |
|
|
93 | void* dst, size_t dstCapacity, | |
76 |
|
|
94 | const void* src, size_t srcSize, | |
77 |
|
|
95 | const ZSTD_CDict* cdict, | |
78 |
|
|
96 | ZSTD_parameters params, | |
79 |
|
|
97 | int overlapLog); | |
80 |
|
98 | |||
81 |
ZSTD |
|
99 | ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, | |
82 | const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ |
|
100 | const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ | |
83 | ZSTD_parameters params, |
|
101 | ZSTD_parameters params, | |
84 | unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ |
|
102 | unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ | |
85 |
|
103 | |||
86 |
ZSTD |
|
104 | ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, | |
87 | const ZSTD_CDict* cdict, |
|
105 | const ZSTD_CDict* cdict, | |
88 | ZSTD_frameParameters fparams, |
|
106 | ZSTD_frameParameters fparams, | |
89 | unsigned long long pledgedSrcSize); /* note : zero means empty */ |
|
107 | unsigned long long pledgedSrcSize); /* note : zero means empty */ | |
@@ -92,7 +110,7 b' ZSTDLIB_API size_t ZSTDMT_initCStream_us' | |||||
92 | * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ |
|
110 | * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ | |
93 | typedef enum { |
|
111 | typedef enum { | |
94 | ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ |
|
112 | ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ | |
95 |
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression |
|
113 | ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */ | |
96 | ZSTDMT_p_rsyncable /* Enables rsyncable mode. */ |
|
114 | ZSTDMT_p_rsyncable /* Enables rsyncable mode. */ | |
97 | } ZSTDMT_parameter; |
|
115 | } ZSTDMT_parameter; | |
98 |
|
116 | |||
@@ -101,12 +119,12 b' typedef enum {' | |||||
101 | * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ |
|
119 | * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ | |
102 | * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. |
|
120 | * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. | |
103 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ |
|
121 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ | |
104 |
ZSTD |
|
122 | ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value); | |
105 |
|
123 | |||
106 | /* ZSTDMT_getMTCtxParameter() : |
|
124 | /* ZSTDMT_getMTCtxParameter() : | |
107 | * Query the ZSTDMT_CCtx for a parameter value. |
|
125 | * Query the ZSTDMT_CCtx for a parameter value. | |
108 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ |
|
126 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ | |
109 |
ZSTD |
|
127 | ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value); | |
110 |
|
128 | |||
111 |
|
129 | |||
112 | /*! ZSTDMT_compressStream_generic() : |
|
130 | /*! ZSTDMT_compressStream_generic() : | |
@@ -116,7 +134,7 b' ZSTDLIB_API size_t ZSTDMT_getMTCtxParame' | |||||
116 | * 0 if fully flushed |
|
134 | * 0 if fully flushed | |
117 | * or an error code |
|
135 | * or an error code | |
118 | * note : needs to be init using any ZSTD_initCStream*() variant */ |
|
136 | * note : needs to be init using any ZSTD_initCStream*() variant */ | |
119 |
ZSTD |
|
137 | ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, | |
120 | ZSTD_outBuffer* output, |
|
138 | ZSTD_outBuffer* output, | |
121 | ZSTD_inBuffer* input, |
|
139 | ZSTD_inBuffer* input, | |
122 | ZSTD_EndDirective endOp); |
|
140 | ZSTD_EndDirective endOp); |
@@ -105,9 +105,9 b' ZSTD_loadEntropy_intoDDict(ZSTD_DDict* d' | |||||
105 | ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); |
|
105 | ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); | |
106 |
|
106 | |||
107 | /* load entropy tables */ |
|
107 | /* load entropy tables */ | |
108 | CHECK_E( ZSTD_loadDEntropy(&ddict->entropy, |
|
108 | RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( | |
109 |
|
|
109 | &ddict->entropy, ddict->dictContent, ddict->dictSize)), | |
110 |
|
|
110 | dictionary_corrupted); | |
111 | ddict->entropyPresent = 1; |
|
111 | ddict->entropyPresent = 1; | |
112 | return 0; |
|
112 | return 0; | |
113 | } |
|
113 | } | |
@@ -133,7 +133,7 b' static size_t ZSTD_initDDict_internal(ZS' | |||||
133 | ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ |
|
133 | ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ | |
134 |
|
134 | |||
135 | /* parse dictionary content */ |
|
135 | /* parse dictionary content */ | |
136 |
|
|
136 | FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); | |
137 |
|
137 | |||
138 | return 0; |
|
138 | return 0; | |
139 | } |
|
139 | } |
@@ -106,6 +106,7 b' static void ZSTD_initDCtx_internal(ZSTD_' | |||||
106 | dctx->ddictLocal = NULL; |
|
106 | dctx->ddictLocal = NULL; | |
107 | dctx->dictEnd = NULL; |
|
107 | dctx->dictEnd = NULL; | |
108 | dctx->ddictIsCold = 0; |
|
108 | dctx->ddictIsCold = 0; | |
|
109 | dctx->dictUses = ZSTD_dont_use; | |||
109 | dctx->inBuff = NULL; |
|
110 | dctx->inBuff = NULL; | |
110 | dctx->inBuffSize = 0; |
|
111 | dctx->inBuffSize = 0; | |
111 | dctx->outBuffSize = 0; |
|
112 | dctx->outBuffSize = 0; | |
@@ -147,13 +148,20 b' ZSTD_DCtx* ZSTD_createDCtx(void)' | |||||
147 | return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); |
|
148 | return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); | |
148 | } |
|
149 | } | |
149 |
|
150 | |||
|
151 | static void ZSTD_clearDict(ZSTD_DCtx* dctx) | |||
|
152 | { | |||
|
153 | ZSTD_freeDDict(dctx->ddictLocal); | |||
|
154 | dctx->ddictLocal = NULL; | |||
|
155 | dctx->ddict = NULL; | |||
|
156 | dctx->dictUses = ZSTD_dont_use; | |||
|
157 | } | |||
|
158 | ||||
150 | size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) |
|
159 | size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) | |
151 | { |
|
160 | { | |
152 | if (dctx==NULL) return 0; /* support free on NULL */ |
|
161 | if (dctx==NULL) return 0; /* support free on NULL */ | |
153 |
|
|
162 | RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); | |
154 | { ZSTD_customMem const cMem = dctx->customMem; |
|
163 | { ZSTD_customMem const cMem = dctx->customMem; | |
155 |
ZSTD_ |
|
164 | ZSTD_clearDict(dctx); | |
156 | dctx->ddictLocal = NULL; |
|
|||
157 | ZSTD_free(dctx->inBuff, cMem); |
|
165 | ZSTD_free(dctx->inBuff, cMem); | |
158 | dctx->inBuff = NULL; |
|
166 | dctx->inBuff = NULL; | |
159 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) |
|
167 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) | |
@@ -203,7 +211,7 b' unsigned ZSTD_isFrame(const void* buffer' | |||||
203 | static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) |
|
211 | static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) | |
204 | { |
|
212 | { | |
205 | size_t const minInputSize = ZSTD_startingInputLength(format); |
|
213 | size_t const minInputSize = ZSTD_startingInputLength(format); | |
206 |
|
|
214 | RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong); | |
207 |
|
215 | |||
208 | { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; |
|
216 | { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; | |
209 | U32 const dictID= fhd & 3; |
|
217 | U32 const dictID= fhd & 3; | |
@@ -238,7 +246,7 b' size_t ZSTD_getFrameHeader_advanced(ZSTD' | |||||
238 |
|
246 | |||
239 | memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ |
|
247 | memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ | |
240 | if (srcSize < minInputSize) return minInputSize; |
|
248 | if (srcSize < minInputSize) return minInputSize; | |
241 |
|
|
249 | RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); | |
242 |
|
250 | |||
243 | if ( (format != ZSTD_f_zstd1_magicless) |
|
251 | if ( (format != ZSTD_f_zstd1_magicless) | |
244 | && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { |
|
252 | && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { | |
@@ -251,7 +259,7 b' size_t ZSTD_getFrameHeader_advanced(ZSTD' | |||||
251 | zfhPtr->frameType = ZSTD_skippableFrame; |
|
259 | zfhPtr->frameType = ZSTD_skippableFrame; | |
252 | return 0; |
|
260 | return 0; | |
253 | } |
|
261 | } | |
254 |
|
|
262 | RETURN_ERROR(prefix_unknown); | |
255 | } |
|
263 | } | |
256 |
|
264 | |||
257 | /* ensure there is enough `srcSize` to fully read/decode frame header */ |
|
265 | /* ensure there is enough `srcSize` to fully read/decode frame header */ | |
@@ -269,14 +277,13 b' size_t ZSTD_getFrameHeader_advanced(ZSTD' | |||||
269 | U64 windowSize = 0; |
|
277 | U64 windowSize = 0; | |
270 | U32 dictID = 0; |
|
278 | U32 dictID = 0; | |
271 | U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
279 | U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
272 | if ((fhdByte & 0x08) != 0) |
|
280 | RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, | |
273 | return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */ |
|
281 | "reserved bits, must be zero"); | |
274 |
|
282 | |||
275 | if (!singleSegment) { |
|
283 | if (!singleSegment) { | |
276 | BYTE const wlByte = ip[pos++]; |
|
284 | BYTE const wlByte = ip[pos++]; | |
277 | U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; |
|
285 | U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; | |
278 |
|
|
286 | RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge); | |
279 | return ERROR(frameParameter_windowTooLarge); |
|
|||
280 | windowSize = (1ULL << windowLog); |
|
287 | windowSize = (1ULL << windowLog); | |
281 | windowSize += (windowSize >> 3) * (wlByte&7); |
|
288 | windowSize += (windowSize >> 3) * (wlByte&7); | |
282 | } |
|
289 | } | |
@@ -348,14 +355,16 b' static size_t readSkippableFrameSize(voi' | |||||
348 | size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; |
|
355 | size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; | |
349 | U32 sizeU32; |
|
356 | U32 sizeU32; | |
350 |
|
357 | |||
351 |
|
|
358 | RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong); | |
352 | return ERROR(srcSize_wrong); |
|
|||
353 |
|
359 | |||
354 | sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); |
|
360 | sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); | |
355 |
|
|
361 | RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, | |
356 |
|
|
362 | frameParameter_unsupported); | |
357 |
|
363 | { | ||
358 |
|
|
364 | size_t const skippableSize = skippableHeaderSize + sizeU32; | |
|
365 | RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong); | |||
|
366 | return skippableSize; | |||
|
367 | } | |||
359 | } |
|
368 | } | |
360 |
|
369 | |||
361 | /** ZSTD_findDecompressedSize() : |
|
370 | /** ZSTD_findDecompressedSize() : | |
@@ -372,11 +381,10 b' unsigned long long ZSTD_findDecompressed' | |||||
372 |
|
381 | |||
373 | if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { |
|
382 | if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { | |
374 | size_t const skippableSize = readSkippableFrameSize(src, srcSize); |
|
383 | size_t const skippableSize = readSkippableFrameSize(src, srcSize); | |
375 | if (ZSTD_isError(skippableSize)) |
|
384 | if (ZSTD_isError(skippableSize)) { | |
376 | return skippableSize; |
|
|||
377 | if (srcSize < skippableSize) { |
|
|||
378 | return ZSTD_CONTENTSIZE_ERROR; |
|
385 | return ZSTD_CONTENTSIZE_ERROR; | |
379 | } |
|
386 | } | |
|
387 | assert(skippableSize <= srcSize); | |||
380 |
|
388 | |||
381 | src = (const BYTE *)src + skippableSize; |
|
389 | src = (const BYTE *)src + skippableSize; | |
382 | srcSize -= skippableSize; |
|
390 | srcSize -= skippableSize; | |
@@ -428,13 +436,91 b' static size_t ZSTD_decodeFrameHeader(ZST' | |||||
428 | { |
|
436 | { | |
429 | size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); |
|
437 | size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); | |
430 | if (ZSTD_isError(result)) return result; /* invalid header */ |
|
438 | if (ZSTD_isError(result)) return result; /* invalid header */ | |
431 |
|
|
439 | RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); | |
432 | if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) |
|
440 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |
433 | return ERROR(dictionary_wrong); |
|
441 | /* Skip the dictID check in fuzzing mode, because it makes the search | |
|
442 | * harder. | |||
|
443 | */ | |||
|
444 | RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), | |||
|
445 | dictionary_wrong); | |||
|
446 | #endif | |||
434 | if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); |
|
447 | if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); | |
435 | return 0; |
|
448 | return 0; | |
436 | } |
|
449 | } | |
437 |
|
450 | |||
|
451 | static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) | |||
|
452 | { | |||
|
453 | ZSTD_frameSizeInfo frameSizeInfo; | |||
|
454 | frameSizeInfo.compressedSize = ret; | |||
|
455 | frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; | |||
|
456 | return frameSizeInfo; | |||
|
457 | } | |||
|
458 | ||||
|
459 | static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) | |||
|
460 | { | |||
|
461 | ZSTD_frameSizeInfo frameSizeInfo; | |||
|
462 | memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); | |||
|
463 | ||||
|
464 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) | |||
|
465 | if (ZSTD_isLegacy(src, srcSize)) | |||
|
466 | return ZSTD_findFrameSizeInfoLegacy(src, srcSize); | |||
|
467 | #endif | |||
|
468 | ||||
|
469 | if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) | |||
|
470 | && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { | |||
|
471 | frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); | |||
|
472 | assert(ZSTD_isError(frameSizeInfo.compressedSize) || | |||
|
473 | frameSizeInfo.compressedSize <= srcSize); | |||
|
474 | return frameSizeInfo; | |||
|
475 | } else { | |||
|
476 | const BYTE* ip = (const BYTE*)src; | |||
|
477 | const BYTE* const ipstart = ip; | |||
|
478 | size_t remainingSize = srcSize; | |||
|
479 | size_t nbBlocks = 0; | |||
|
480 | ZSTD_frameHeader zfh; | |||
|
481 | ||||
|
482 | /* Extract Frame Header */ | |||
|
483 | { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); | |||
|
484 | if (ZSTD_isError(ret)) | |||
|
485 | return ZSTD_errorFrameSizeInfo(ret); | |||
|
486 | if (ret > 0) | |||
|
487 | return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); | |||
|
488 | } | |||
|
489 | ||||
|
490 | ip += zfh.headerSize; | |||
|
491 | remainingSize -= zfh.headerSize; | |||
|
492 | ||||
|
493 | /* Iterate over each block */ | |||
|
494 | while (1) { | |||
|
495 | blockProperties_t blockProperties; | |||
|
496 | size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); | |||
|
497 | if (ZSTD_isError(cBlockSize)) | |||
|
498 | return ZSTD_errorFrameSizeInfo(cBlockSize); | |||
|
499 | ||||
|
500 | if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) | |||
|
501 | return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); | |||
|
502 | ||||
|
503 | ip += ZSTD_blockHeaderSize + cBlockSize; | |||
|
504 | remainingSize -= ZSTD_blockHeaderSize + cBlockSize; | |||
|
505 | nbBlocks++; | |||
|
506 | ||||
|
507 | if (blockProperties.lastBlock) break; | |||
|
508 | } | |||
|
509 | ||||
|
510 | /* Final frame content checksum */ | |||
|
511 | if (zfh.checksumFlag) { | |||
|
512 | if (remainingSize < 4) | |||
|
513 | return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); | |||
|
514 | ip += 4; | |||
|
515 | } | |||
|
516 | ||||
|
517 | frameSizeInfo.compressedSize = ip - ipstart; | |||
|
518 | frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) | |||
|
519 | ? zfh.frameContentSize | |||
|
520 | : nbBlocks * zfh.blockSizeMax; | |||
|
521 | return frameSizeInfo; | |||
|
522 | } | |||
|
523 | } | |||
438 |
|
524 | |||
439 | /** ZSTD_findFrameCompressedSize() : |
|
525 | /** ZSTD_findFrameCompressedSize() : | |
440 | * compatible with legacy mode |
|
526 | * compatible with legacy mode | |
@@ -443,52 +529,33 b' static size_t ZSTD_decodeFrameHeader(ZST' | |||||
443 | * @return : the compressed size of the frame starting at `src` */ |
|
529 | * @return : the compressed size of the frame starting at `src` */ | |
444 | size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) |
|
530 | size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) | |
445 | { |
|
531 | { | |
446 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) |
|
532 | ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); | |
447 | if (ZSTD_isLegacy(src, srcSize)) |
|
533 | return frameSizeInfo.compressedSize; | |
448 | return ZSTD_findFrameCompressedSizeLegacy(src, srcSize); |
|
|||
449 | #endif |
|
|||
450 | if ( (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) |
|
|||
451 | && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START ) { |
|
|||
452 | return readSkippableFrameSize(src, srcSize); |
|
|||
453 | } else { |
|
|||
454 | const BYTE* ip = (const BYTE*)src; |
|
|||
455 | const BYTE* const ipstart = ip; |
|
|||
456 | size_t remainingSize = srcSize; |
|
|||
457 | ZSTD_frameHeader zfh; |
|
|||
458 |
|
||||
459 | /* Extract Frame Header */ |
|
|||
460 | { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); |
|
|||
461 | if (ZSTD_isError(ret)) return ret; |
|
|||
462 | if (ret > 0) return ERROR(srcSize_wrong); |
|
|||
463 | } |
|
|||
464 |
|
||||
465 | ip += zfh.headerSize; |
|
|||
466 | remainingSize -= zfh.headerSize; |
|
|||
467 |
|
||||
468 | /* Loop on each block */ |
|
|||
469 | while (1) { |
|
|||
470 | blockProperties_t blockProperties; |
|
|||
471 | size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); |
|
|||
472 | if (ZSTD_isError(cBlockSize)) return cBlockSize; |
|
|||
473 |
|
||||
474 | if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) |
|
|||
475 | return ERROR(srcSize_wrong); |
|
|||
476 |
|
||||
477 | ip += ZSTD_blockHeaderSize + cBlockSize; |
|
|||
478 | remainingSize -= ZSTD_blockHeaderSize + cBlockSize; |
|
|||
479 |
|
||||
480 | if (blockProperties.lastBlock) break; |
|
|||
481 | } |
|
|||
482 |
|
||||
483 | if (zfh.checksumFlag) { /* Final frame content checksum */ |
|
|||
484 | if (remainingSize < 4) return ERROR(srcSize_wrong); |
|
|||
485 | ip += 4; |
|
|||
486 | } |
|
|||
487 |
|
||||
488 | return ip - ipstart; |
|
|||
489 | } |
|
|||
490 | } |
|
534 | } | |
491 |
|
535 | |||
|
536 | /** ZSTD_decompressBound() : | |||
|
537 | * compatible with legacy mode | |||
|
538 | * `src` must point to the start of a ZSTD frame or a skippeable frame | |||
|
539 | * `srcSize` must be at least as large as the frame contained | |||
|
540 | * @return : the maximum decompressed size of the compressed source | |||
|
541 | */ | |||
|
542 | unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) | |||
|
543 | { | |||
|
544 | unsigned long long bound = 0; | |||
|
545 | /* Iterate over each frame */ | |||
|
546 | while (srcSize > 0) { | |||
|
547 | ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); | |||
|
548 | size_t const compressedSize = frameSizeInfo.compressedSize; | |||
|
549 | unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; | |||
|
550 | if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) | |||
|
551 | return ZSTD_CONTENTSIZE_ERROR; | |||
|
552 | assert(srcSize >= compressedSize); | |||
|
553 | src = (const BYTE*)src + compressedSize; | |||
|
554 | srcSize -= compressedSize; | |||
|
555 | bound += decompressedBound; | |||
|
556 | } | |||
|
557 | return bound; | |||
|
558 | } | |||
492 |
|
559 | |||
493 |
|
560 | |||
494 | /*-************************************************************* |
|
561 | /*-************************************************************* | |
@@ -507,9 +574,10 b' void ZSTD_checkContinuity(ZSTD_DCtx* dct' | |||||
507 | } |
|
574 | } | |
508 |
|
575 | |||
509 | /** ZSTD_insertBlock() : |
|
576 | /** ZSTD_insertBlock() : | |
510 |
|
|
577 | * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ | |
511 | size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) |
|
578 | size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) | |
512 | { |
|
579 | { | |
|
580 | DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); | |||
513 | ZSTD_checkContinuity(dctx, blockStart); |
|
581 | ZSTD_checkContinuity(dctx, blockStart); | |
514 | dctx->previousDstEnd = (const char*)blockStart + blockSize; |
|
582 | dctx->previousDstEnd = (const char*)blockStart + blockSize; | |
515 | return blockSize; |
|
583 | return blockSize; | |
@@ -522,9 +590,9 b' static size_t ZSTD_copyRawBlock(void* ds' | |||||
522 | DEBUGLOG(5, "ZSTD_copyRawBlock"); |
|
590 | DEBUGLOG(5, "ZSTD_copyRawBlock"); | |
523 | if (dst == NULL) { |
|
591 | if (dst == NULL) { | |
524 | if (srcSize == 0) return 0; |
|
592 | if (srcSize == 0) return 0; | |
525 |
|
|
593 | RETURN_ERROR(dstBuffer_null); | |
526 | } |
|
594 | } | |
527 |
|
|
595 | RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall); | |
528 | memcpy(dst, src, srcSize); |
|
596 | memcpy(dst, src, srcSize); | |
529 | return srcSize; |
|
597 | return srcSize; | |
530 | } |
|
598 | } | |
@@ -535,9 +603,9 b' static size_t ZSTD_setRleBlock(void* dst' | |||||
535 | { |
|
603 | { | |
536 | if (dst == NULL) { |
|
604 | if (dst == NULL) { | |
537 | if (regenSize == 0) return 0; |
|
605 | if (regenSize == 0) return 0; | |
538 |
|
|
606 | RETURN_ERROR(dstBuffer_null); | |
539 | } |
|
607 | } | |
540 |
|
|
608 | RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall); | |
541 | memset(dst, b, regenSize); |
|
609 | memset(dst, b, regenSize); | |
542 | return regenSize; |
|
610 | return regenSize; | |
543 | } |
|
611 | } | |
@@ -560,15 +628,16 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||||
560 | DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); |
|
628 | DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); | |
561 |
|
629 | |||
562 | /* check */ |
|
630 | /* check */ | |
563 | if (remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize) |
|
631 | RETURN_ERROR_IF( | |
564 | return ERROR(srcSize_wrong); |
|
632 | remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize, | |
|
633 | srcSize_wrong); | |||
565 |
|
634 | |||
566 | /* Frame Header */ |
|
635 | /* Frame Header */ | |
567 | { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX); |
|
636 | { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX); | |
568 | if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; |
|
637 | if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; | |
569 |
|
|
638 | RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, | |
570 |
|
|
639 | srcSize_wrong); | |
571 |
|
|
640 | FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); | |
572 | ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; |
|
641 | ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; | |
573 | } |
|
642 | } | |
574 |
|
643 | |||
@@ -581,7 +650,7 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||||
581 |
|
650 | |||
582 | ip += ZSTD_blockHeaderSize; |
|
651 | ip += ZSTD_blockHeaderSize; | |
583 | remainingSrcSize -= ZSTD_blockHeaderSize; |
|
652 | remainingSrcSize -= ZSTD_blockHeaderSize; | |
584 |
|
|
653 | RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong); | |
585 |
|
654 | |||
586 | switch(blockProperties.blockType) |
|
655 | switch(blockProperties.blockType) | |
587 | { |
|
656 | { | |
@@ -596,7 +665,7 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||||
596 | break; |
|
665 | break; | |
597 | case bt_reserved : |
|
666 | case bt_reserved : | |
598 | default: |
|
667 | default: | |
599 |
|
|
668 | RETURN_ERROR(corruption_detected); | |
600 | } |
|
669 | } | |
601 |
|
670 | |||
602 | if (ZSTD_isError(decodedSize)) return decodedSize; |
|
671 | if (ZSTD_isError(decodedSize)) return decodedSize; | |
@@ -609,15 +678,15 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||||
609 | } |
|
678 | } | |
610 |
|
679 | |||
611 | if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { |
|
680 | if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { | |
612 |
|
|
681 | RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, | |
613 |
|
|
682 | corruption_detected); | |
614 |
} |
|
683 | } | |
615 | if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ |
|
684 | if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ | |
616 | U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); |
|
685 | U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); | |
617 | U32 checkRead; |
|
686 | U32 checkRead; | |
618 |
|
|
687 | RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong); | |
619 | checkRead = MEM_readLE32(ip); |
|
688 | checkRead = MEM_readLE32(ip); | |
620 |
|
|
689 | RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong); | |
621 | ip += 4; |
|
690 | ip += 4; | |
622 | remainingSrcSize -= 4; |
|
691 | remainingSrcSize -= 4; | |
623 | } |
|
692 | } | |
@@ -652,8 +721,8 b' static size_t ZSTD_decompressMultiFrame(' | |||||
652 | size_t decodedSize; |
|
721 | size_t decodedSize; | |
653 | size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); |
|
722 | size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); | |
654 | if (ZSTD_isError(frameSize)) return frameSize; |
|
723 | if (ZSTD_isError(frameSize)) return frameSize; | |
655 | /* legacy support is not compatible with static dctx */ |
|
724 | RETURN_ERROR_IF(dctx->staticSize, memory_allocation, | |
656 | if (dctx->staticSize) return ERROR(memory_allocation); |
|
725 | "legacy support is not compatible with static dctx"); | |
657 |
|
726 | |||
658 | decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); |
|
727 | decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); | |
659 | if (ZSTD_isError(decodedSize)) return decodedSize; |
|
728 | if (ZSTD_isError(decodedSize)) return decodedSize; | |
@@ -674,9 +743,8 b' static size_t ZSTD_decompressMultiFrame(' | |||||
674 | (unsigned)magicNumber, ZSTD_MAGICNUMBER); |
|
743 | (unsigned)magicNumber, ZSTD_MAGICNUMBER); | |
675 | if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { |
|
744 | if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { | |
676 | size_t const skippableSize = readSkippableFrameSize(src, srcSize); |
|
745 | size_t const skippableSize = readSkippableFrameSize(src, srcSize); | |
677 |
|
|
746 | FORWARD_IF_ERROR(skippableSize); | |
678 |
|
|
747 | assert(skippableSize <= srcSize); | |
679 | if (srcSize < skippableSize) return ERROR(srcSize_wrong); |
|
|||
680 |
|
748 | |||
681 | src = (const BYTE *)src + skippableSize; |
|
749 | src = (const BYTE *)src + skippableSize; | |
682 | srcSize -= skippableSize; |
|
750 | srcSize -= skippableSize; | |
@@ -685,29 +753,29 b' static size_t ZSTD_decompressMultiFrame(' | |||||
685 |
|
753 | |||
686 | if (ddict) { |
|
754 | if (ddict) { | |
687 | /* we were called from ZSTD_decompress_usingDDict */ |
|
755 | /* we were called from ZSTD_decompress_usingDDict */ | |
688 |
|
|
756 | FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict)); | |
689 | } else { |
|
757 | } else { | |
690 | /* this will initialize correctly with no dict if dict == NULL, so |
|
758 | /* this will initialize correctly with no dict if dict == NULL, so | |
691 | * use this in all cases but ddict */ |
|
759 | * use this in all cases but ddict */ | |
692 |
|
|
760 | FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); | |
693 | } |
|
761 | } | |
694 | ZSTD_checkContinuity(dctx, dst); |
|
762 | ZSTD_checkContinuity(dctx, dst); | |
695 |
|
763 | |||
696 | { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, |
|
764 | { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, | |
697 | &src, &srcSize); |
|
765 | &src, &srcSize); | |
698 | if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) |
|
766 | RETURN_ERROR_IF( | |
699 | && (moreThan1Frame==1) ) { |
|
767 | (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) | |
700 | /* at least one frame successfully completed, |
|
768 | && (moreThan1Frame==1), | |
701 | * but following bytes are garbage : |
|
769 | srcSize_wrong, | |
702 | * it's more likely to be a srcSize error, |
|
770 | "at least one frame successfully completed, but following " | |
703 | * specifying more bytes than compressed size of frame(s). |
|
771 | "bytes are garbage: it's more likely to be a srcSize error, " | |
704 | * This error message replaces ERROR(prefix_unknown), |
|
772 | "specifying more bytes than compressed size of frame(s). This " | |
705 | * which would be confusing, as the first header is actually correct. |
|
773 | "error message replaces ERROR(prefix_unknown), which would be " | |
706 | * Note that one could be unlucky, it might be a corruption error instead, |
|
774 | "confusing, as the first header is actually correct. Note that " | |
707 | * happening right at the place where we expect zstd magic bytes. |
|
775 | "one could be unlucky, it might be a corruption error instead, " | |
708 | * But this is _much_ less likely than a srcSize field error. */ |
|
776 | "happening right at the place where we expect zstd magic " | |
709 | return ERROR(srcSize_wrong); |
|
777 | "bytes. But this is _much_ less likely than a srcSize field " | |
710 |
|
|
778 | "error."); | |
711 | if (ZSTD_isError(res)) return res; |
|
779 | if (ZSTD_isError(res)) return res; | |
712 | assert(res <= dstCapacity); |
|
780 | assert(res <= dstCapacity); | |
713 | dst = (BYTE*)dst + res; |
|
781 | dst = (BYTE*)dst + res; | |
@@ -716,7 +784,7 b' static size_t ZSTD_decompressMultiFrame(' | |||||
716 | moreThan1Frame = 1; |
|
784 | moreThan1Frame = 1; | |
717 | } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ |
|
785 | } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ | |
718 |
|
786 | |||
719 |
|
|
787 | RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); | |
720 |
|
788 | |||
721 | return (BYTE*)dst - (BYTE*)dststart; |
|
789 | return (BYTE*)dst - (BYTE*)dststart; | |
722 | } |
|
790 | } | |
@@ -730,9 +798,26 b' size_t ZSTD_decompress_usingDict(ZSTD_DC' | |||||
730 | } |
|
798 | } | |
731 |
|
799 | |||
732 |
|
800 | |||
|
801 | static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) | |||
|
802 | { | |||
|
803 | switch (dctx->dictUses) { | |||
|
804 | default: | |||
|
805 | assert(0 /* Impossible */); | |||
|
806 | /* fall-through */ | |||
|
807 | case ZSTD_dont_use: | |||
|
808 | ZSTD_clearDict(dctx); | |||
|
809 | return NULL; | |||
|
810 | case ZSTD_use_indefinitely: | |||
|
811 | return dctx->ddict; | |||
|
812 | case ZSTD_use_once: | |||
|
813 | dctx->dictUses = ZSTD_dont_use; | |||
|
814 | return dctx->ddict; | |||
|
815 | } | |||
|
816 | } | |||
|
817 | ||||
733 | size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
|
818 | size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |
734 | { |
|
819 | { | |
735 |
return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, |
|
820 | return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); | |
736 | } |
|
821 | } | |
737 |
|
822 | |||
738 |
|
823 | |||
@@ -741,7 +826,7 b' size_t ZSTD_decompress(void* dst, size_t' | |||||
741 | #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) |
|
826 | #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) | |
742 | size_t regenSize; |
|
827 | size_t regenSize; | |
743 | ZSTD_DCtx* const dctx = ZSTD_createDCtx(); |
|
828 | ZSTD_DCtx* const dctx = ZSTD_createDCtx(); | |
744 |
|
|
829 | RETURN_ERROR_IF(dctx==NULL, memory_allocation); | |
745 | regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); |
|
830 | regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); | |
746 | ZSTD_freeDCtx(dctx); |
|
831 | ZSTD_freeDCtx(dctx); | |
747 | return regenSize; |
|
832 | return regenSize; | |
@@ -791,8 +876,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||||
791 | { |
|
876 | { | |
792 | DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); |
|
877 | DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); | |
793 | /* Sanity check */ |
|
878 | /* Sanity check */ | |
794 | if (srcSize != dctx->expected) |
|
879 | RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed"); | |
795 | return ERROR(srcSize_wrong); /* not allowed */ |
|
|||
796 | if (dstCapacity) ZSTD_checkContinuity(dctx, dst); |
|
880 | if (dstCapacity) ZSTD_checkContinuity(dctx, dst); | |
797 |
|
881 | |||
798 | switch (dctx->stage) |
|
882 | switch (dctx->stage) | |
@@ -817,7 +901,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||||
817 | case ZSTDds_decodeFrameHeader: |
|
901 | case ZSTDds_decodeFrameHeader: | |
818 | assert(src != NULL); |
|
902 | assert(src != NULL); | |
819 | memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); |
|
903 | memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); | |
820 |
|
|
904 | FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); | |
821 | dctx->expected = ZSTD_blockHeaderSize; |
|
905 | dctx->expected = ZSTD_blockHeaderSize; | |
822 | dctx->stage = ZSTDds_decodeBlockHeader; |
|
906 | dctx->stage = ZSTDds_decodeBlockHeader; | |
823 | return 0; |
|
907 | return 0; | |
@@ -826,6 +910,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||||
826 | { blockProperties_t bp; |
|
910 | { blockProperties_t bp; | |
827 | size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); |
|
911 | size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); | |
828 | if (ZSTD_isError(cBlockSize)) return cBlockSize; |
|
912 | if (ZSTD_isError(cBlockSize)) return cBlockSize; | |
|
913 | RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); | |||
829 | dctx->expected = cBlockSize; |
|
914 | dctx->expected = cBlockSize; | |
830 | dctx->bType = bp.blockType; |
|
915 | dctx->bType = bp.blockType; | |
831 | dctx->rleSize = bp.origSize; |
|
916 | dctx->rleSize = bp.origSize; | |
@@ -867,19 +952,20 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||||
867 | break; |
|
952 | break; | |
868 | case bt_reserved : /* should never happen */ |
|
953 | case bt_reserved : /* should never happen */ | |
869 | default: |
|
954 | default: | |
870 |
|
|
955 | RETURN_ERROR(corruption_detected); | |
871 | } |
|
956 | } | |
872 | if (ZSTD_isError(rSize)) return rSize; |
|
957 | if (ZSTD_isError(rSize)) return rSize; | |
|
958 | RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); | |||
873 | DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); |
|
959 | DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); | |
874 | dctx->decodedSize += rSize; |
|
960 | dctx->decodedSize += rSize; | |
875 | if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); |
|
961 | if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); | |
876 |
|
962 | |||
877 | if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ |
|
963 | if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ | |
878 | DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); |
|
964 | DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); | |
879 | if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { |
|
965 | RETURN_ERROR_IF( | |
880 |
|
|
966 | dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN | |
881 | return ERROR(corruption_detected); |
|
967 | && dctx->decodedSize != dctx->fParams.frameContentSize, | |
882 |
|
|
968 | corruption_detected); | |
883 | if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ |
|
969 | if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ | |
884 | dctx->expected = 4; |
|
970 | dctx->expected = 4; | |
885 | dctx->stage = ZSTDds_checkChecksum; |
|
971 | dctx->stage = ZSTDds_checkChecksum; | |
@@ -900,7 +986,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||||
900 | { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); |
|
986 | { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); | |
901 | U32 const check32 = MEM_readLE32(src); |
|
987 | U32 const check32 = MEM_readLE32(src); | |
902 | DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); |
|
988 | DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); | |
903 |
|
|
989 | RETURN_ERROR_IF(check32 != h32, checksum_wrong); | |
904 | dctx->expected = 0; |
|
990 | dctx->expected = 0; | |
905 | dctx->stage = ZSTDds_getFrameHeaderSize; |
|
991 | dctx->stage = ZSTDds_getFrameHeaderSize; | |
906 | return 0; |
|
992 | return 0; | |
@@ -921,7 +1007,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||||
921 |
|
1007 | |||
922 | default: |
|
1008 | default: | |
923 | assert(0); /* impossible */ |
|
1009 | assert(0); /* impossible */ | |
924 |
|
|
1010 | RETURN_ERROR(GENERIC); /* some compiler require default to do something */ | |
925 | } |
|
1011 | } | |
926 | } |
|
1012 | } | |
927 |
|
1013 | |||
@@ -945,7 +1031,7 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||||
945 | const BYTE* dictPtr = (const BYTE*)dict; |
|
1031 | const BYTE* dictPtr = (const BYTE*)dict; | |
946 | const BYTE* const dictEnd = dictPtr + dictSize; |
|
1032 | const BYTE* const dictEnd = dictPtr + dictSize; | |
947 |
|
1033 | |||
948 |
|
|
1034 | RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted); | |
949 | assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ |
|
1035 | assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ | |
950 | dictPtr += 8; /* skip header = magic + dictID */ |
|
1036 | dictPtr += 8; /* skip header = magic + dictID */ | |
951 |
|
1037 | |||
@@ -964,16 +1050,16 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||||
964 | dictPtr, dictEnd - dictPtr, |
|
1050 | dictPtr, dictEnd - dictPtr, | |
965 | workspace, workspaceSize); |
|
1051 | workspace, workspaceSize); | |
966 | #endif |
|
1052 | #endif | |
967 |
|
|
1053 | RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted); | |
968 | dictPtr += hSize; |
|
1054 | dictPtr += hSize; | |
969 | } |
|
1055 | } | |
970 |
|
1056 | |||
971 | { short offcodeNCount[MaxOff+1]; |
|
1057 | { short offcodeNCount[MaxOff+1]; | |
972 | unsigned offcodeMaxValue = MaxOff, offcodeLog; |
|
1058 | unsigned offcodeMaxValue = MaxOff, offcodeLog; | |
973 | size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); |
|
1059 | size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); | |
974 |
|
|
1060 | RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); | |
975 |
|
|
1061 | RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted); | |
976 |
|
|
1062 | RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); | |
977 | ZSTD_buildFSETable( entropy->OFTable, |
|
1063 | ZSTD_buildFSETable( entropy->OFTable, | |
978 | offcodeNCount, offcodeMaxValue, |
|
1064 | offcodeNCount, offcodeMaxValue, | |
979 | OF_base, OF_bits, |
|
1065 | OF_base, OF_bits, | |
@@ -984,9 +1070,9 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||||
984 | { short matchlengthNCount[MaxML+1]; |
|
1070 | { short matchlengthNCount[MaxML+1]; | |
985 | unsigned matchlengthMaxValue = MaxML, matchlengthLog; |
|
1071 | unsigned matchlengthMaxValue = MaxML, matchlengthLog; | |
986 | size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); |
|
1072 | size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); | |
987 |
|
|
1073 | RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); | |
988 |
|
|
1074 | RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted); | |
989 |
|
|
1075 | RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); | |
990 | ZSTD_buildFSETable( entropy->MLTable, |
|
1076 | ZSTD_buildFSETable( entropy->MLTable, | |
991 | matchlengthNCount, matchlengthMaxValue, |
|
1077 | matchlengthNCount, matchlengthMaxValue, | |
992 | ML_base, ML_bits, |
|
1078 | ML_base, ML_bits, | |
@@ -997,9 +1083,9 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||||
997 | { short litlengthNCount[MaxLL+1]; |
|
1083 | { short litlengthNCount[MaxLL+1]; | |
998 | unsigned litlengthMaxValue = MaxLL, litlengthLog; |
|
1084 | unsigned litlengthMaxValue = MaxLL, litlengthLog; | |
999 | size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); |
|
1085 | size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); | |
1000 |
|
|
1086 | RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); | |
1001 |
|
|
1087 | RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted); | |
1002 |
|
|
1088 | RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); | |
1003 | ZSTD_buildFSETable( entropy->LLTable, |
|
1089 | ZSTD_buildFSETable( entropy->LLTable, | |
1004 | litlengthNCount, litlengthMaxValue, |
|
1090 | litlengthNCount, litlengthMaxValue, | |
1005 | LL_base, LL_bits, |
|
1091 | LL_base, LL_bits, | |
@@ -1007,12 +1093,13 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||||
1007 | dictPtr += litlengthHeaderSize; |
|
1093 | dictPtr += litlengthHeaderSize; | |
1008 | } |
|
1094 | } | |
1009 |
|
1095 | |||
1010 |
|
|
1096 | RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); | |
1011 | { int i; |
|
1097 | { int i; | |
1012 | size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); |
|
1098 | size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); | |
1013 | for (i=0; i<3; i++) { |
|
1099 | for (i=0; i<3; i++) { | |
1014 | U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; |
|
1100 | U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; | |
1015 |
|
|
1101 | RETURN_ERROR_IF(rep==0 || rep >= dictContentSize, | |
|
1102 | dictionary_corrupted); | |||
1016 | entropy->rep[i] = rep; |
|
1103 | entropy->rep[i] = rep; | |
1017 | } } |
|
1104 | } } | |
1018 |
|
1105 | |||
@@ -1030,7 +1117,7 b' static size_t ZSTD_decompress_insertDict' | |||||
1030 |
|
1117 | |||
1031 | /* load entropy tables */ |
|
1118 | /* load entropy tables */ | |
1032 | { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); |
|
1119 | { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); | |
1033 |
|
|
1120 | RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted); | |
1034 | dict = (const char*)dict + eSize; |
|
1121 | dict = (const char*)dict + eSize; | |
1035 | dictSize -= eSize; |
|
1122 | dictSize -= eSize; | |
1036 | } |
|
1123 | } | |
@@ -1064,9 +1151,11 b' size_t ZSTD_decompressBegin(ZSTD_DCtx* d' | |||||
1064 |
|
1151 | |||
1065 | size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) |
|
1152 | size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) | |
1066 | { |
|
1153 | { | |
1067 |
|
|
1154 | FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); | |
1068 | if (dict && dictSize) |
|
1155 | if (dict && dictSize) | |
1069 | CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); |
|
1156 | RETURN_ERROR_IF( | |
|
1157 | ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), | |||
|
1158 | dictionary_corrupted); | |||
1070 | return 0; |
|
1159 | return 0; | |
1071 | } |
|
1160 | } | |
1072 |
|
1161 | |||
@@ -1085,7 +1174,7 b' size_t ZSTD_decompressBegin_usingDDict(Z' | |||||
1085 | DEBUGLOG(4, "DDict is %s", |
|
1174 | DEBUGLOG(4, "DDict is %s", | |
1086 | dctx->ddictIsCold ? "~cold~" : "hot!"); |
|
1175 | dctx->ddictIsCold ? "~cold~" : "hot!"); | |
1087 | } |
|
1176 | } | |
1088 |
|
|
1177 | FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); | |
1089 | if (ddict) { /* NULL ddict is equivalent to no dictionary */ |
|
1178 | if (ddict) { /* NULL ddict is equivalent to no dictionary */ | |
1090 | ZSTD_copyDDictParameters(dctx, ddict); |
|
1179 | ZSTD_copyDDictParameters(dctx, ddict); | |
1091 | } |
|
1180 | } | |
@@ -1104,7 +1193,7 b' unsigned ZSTD_getDictID_fromDict(const v' | |||||
1104 | } |
|
1193 | } | |
1105 |
|
1194 | |||
1106 | /*! ZSTD_getDictID_fromFrame() : |
|
1195 | /*! ZSTD_getDictID_fromFrame() : | |
1107 |
* Provides the dictID required to decompress |
|
1196 | * Provides the dictID required to decompress frame stored within `src`. | |
1108 | * If @return == 0, the dictID could not be decoded. |
|
1197 | * If @return == 0, the dictID could not be decoded. | |
1109 | * This could for one of the following reasons : |
|
1198 | * This could for one of the following reasons : | |
1110 | * - The frame does not require a dictionary (most common case). |
|
1199 | * - The frame does not require a dictionary (most common case). | |
@@ -1176,15 +1265,14 b' size_t ZSTD_DCtx_loadDictionary_advanced' | |||||
1176 | ZSTD_dictLoadMethod_e dictLoadMethod, |
|
1265 | ZSTD_dictLoadMethod_e dictLoadMethod, | |
1177 | ZSTD_dictContentType_e dictContentType) |
|
1266 | ZSTD_dictContentType_e dictContentType) | |
1178 | { |
|
1267 | { | |
1179 |
|
|
1268 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
1180 |
ZSTD_ |
|
1269 | ZSTD_clearDict(dctx); | |
1181 | if (dict && dictSize >= 8) { |
|
1270 | if (dict && dictSize >= 8) { | |
1182 | dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); |
|
1271 | dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); | |
1183 |
|
|
1272 | RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); | |
1184 | } else { |
|
1273 | dctx->ddict = dctx->ddictLocal; | |
1185 | dctx->ddictLocal = NULL; |
|
1274 | dctx->dictUses = ZSTD_use_indefinitely; | |
1186 | } |
|
1275 | } | |
1187 | dctx->ddict = dctx->ddictLocal; |
|
|||
1188 | return 0; |
|
1276 | return 0; | |
1189 | } |
|
1277 | } | |
1190 |
|
1278 | |||
@@ -1200,7 +1288,9 b' size_t ZSTD_DCtx_loadDictionary(ZSTD_DCt' | |||||
1200 |
|
1288 | |||
1201 | size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) |
|
1289 | size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) | |
1202 | { |
|
1290 | { | |
1203 |
|
|
1291 | FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType)); | |
|
1292 | dctx->dictUses = ZSTD_use_once; | |||
|
1293 | return 0; | |||
1204 | } |
|
1294 | } | |
1205 |
|
1295 | |||
1206 | size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) |
|
1296 | size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) | |
@@ -1215,9 +1305,8 b' size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dc' | |||||
1215 | size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) |
|
1305 | size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) | |
1216 | { |
|
1306 | { | |
1217 | DEBUGLOG(4, "ZSTD_initDStream_usingDict"); |
|
1307 | DEBUGLOG(4, "ZSTD_initDStream_usingDict"); | |
1218 | zds->streamStage = zdss_init; |
|
1308 | FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); | |
1219 | zds->noForwardProgress = 0; |
|
1309 | FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); | |
1220 | CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); |
|
|||
1221 | return ZSTD_FRAMEHEADERSIZE_PREFIX; |
|
1310 | return ZSTD_FRAMEHEADERSIZE_PREFIX; | |
1222 | } |
|
1311 | } | |
1223 |
|
1312 | |||
@@ -1225,7 +1314,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D' | |||||
1225 | size_t ZSTD_initDStream(ZSTD_DStream* zds) |
|
1314 | size_t ZSTD_initDStream(ZSTD_DStream* zds) | |
1226 | { |
|
1315 | { | |
1227 | DEBUGLOG(4, "ZSTD_initDStream"); |
|
1316 | DEBUGLOG(4, "ZSTD_initDStream"); | |
1228 |
return ZSTD_initDStream_usingDict(zds, NULL |
|
1317 | return ZSTD_initDStream_usingDDict(zds, NULL); | |
1229 | } |
|
1318 | } | |
1230 |
|
1319 | |||
1231 | /* ZSTD_initDStream_usingDDict() : |
|
1320 | /* ZSTD_initDStream_usingDDict() : | |
@@ -1233,9 +1322,9 b' size_t ZSTD_initDStream(ZSTD_DStream* zd' | |||||
1233 | * this function cannot fail */ |
|
1322 | * this function cannot fail */ | |
1234 | size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) |
|
1323 | size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) | |
1235 | { |
|
1324 | { | |
1236 | size_t const initResult = ZSTD_initDStream(dctx); |
|
1325 | FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); | |
1237 | dctx->ddict = ddict; |
|
1326 | FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); | |
1238 | return initResult; |
|
1327 | return ZSTD_FRAMEHEADERSIZE_PREFIX; | |
1239 | } |
|
1328 | } | |
1240 |
|
1329 | |||
1241 | /* ZSTD_resetDStream() : |
|
1330 | /* ZSTD_resetDStream() : | |
@@ -1243,19 +1332,19 b' size_t ZSTD_initDStream_usingDDict(ZSTD_' | |||||
1243 | * this function cannot fail */ |
|
1332 | * this function cannot fail */ | |
1244 | size_t ZSTD_resetDStream(ZSTD_DStream* dctx) |
|
1333 | size_t ZSTD_resetDStream(ZSTD_DStream* dctx) | |
1245 | { |
|
1334 | { | |
1246 | DEBUGLOG(4, "ZSTD_resetDStream"); |
|
1335 | FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); | |
1247 | dctx->streamStage = zdss_loadHeader; |
|
|||
1248 | dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0; |
|
|||
1249 | dctx->legacyVersion = 0; |
|
|||
1250 | dctx->hostageByte = 0; |
|
|||
1251 | return ZSTD_FRAMEHEADERSIZE_PREFIX; |
|
1336 | return ZSTD_FRAMEHEADERSIZE_PREFIX; | |
1252 | } |
|
1337 | } | |
1253 |
|
1338 | |||
1254 |
|
1339 | |||
1255 | size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) |
|
1340 | size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) | |
1256 | { |
|
1341 | { | |
1257 |
|
|
1342 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
1258 | dctx->ddict = ddict; |
|
1343 | ZSTD_clearDict(dctx); | |
|
1344 | if (ddict) { | |||
|
1345 | dctx->ddict = ddict; | |||
|
1346 | dctx->dictUses = ZSTD_use_indefinitely; | |||
|
1347 | } | |||
1259 | return 0; |
|
1348 | return 0; | |
1260 | } |
|
1349 | } | |
1261 |
|
1350 | |||
@@ -1267,9 +1356,9 b' size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_D' | |||||
1267 | ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); |
|
1356 | ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); | |
1268 | size_t const min = (size_t)1 << bounds.lowerBound; |
|
1357 | size_t const min = (size_t)1 << bounds.lowerBound; | |
1269 | size_t const max = (size_t)1 << bounds.upperBound; |
|
1358 | size_t const max = (size_t)1 << bounds.upperBound; | |
1270 |
|
|
1359 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
1271 |
|
|
1360 | RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound); | |
1272 |
|
|
1361 | RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound); | |
1273 | dctx->maxWindowSize = maxWindowSize; |
|
1362 | dctx->maxWindowSize = maxWindowSize; | |
1274 | return 0; |
|
1363 | return 0; | |
1275 | } |
|
1364 | } | |
@@ -1311,15 +1400,15 b' static int ZSTD_dParam_withinBounds(ZSTD' | |||||
1311 | } |
|
1400 | } | |
1312 |
|
1401 | |||
1313 | #define CHECK_DBOUNDS(p,v) { \ |
|
1402 | #define CHECK_DBOUNDS(p,v) { \ | |
1314 |
|
|
1403 | RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \ | |
1315 | return ERROR(parameter_outOfBound); \ |
|
|||
1316 | } |
|
1404 | } | |
1317 |
|
1405 | |||
1318 | size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) |
|
1406 | size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) | |
1319 | { |
|
1407 | { | |
1320 |
|
|
1408 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
1321 | switch(dParam) { |
|
1409 | switch(dParam) { | |
1322 | case ZSTD_d_windowLogMax: |
|
1410 | case ZSTD_d_windowLogMax: | |
|
1411 | if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; | |||
1323 | CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); |
|
1412 | CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); | |
1324 | dctx->maxWindowSize = ((size_t)1) << value; |
|
1413 | dctx->maxWindowSize = ((size_t)1) << value; | |
1325 | return 0; |
|
1414 | return 0; | |
@@ -1329,19 +1418,20 b' size_t ZSTD_DCtx_setParameter(ZSTD_DCtx*' | |||||
1329 | return 0; |
|
1418 | return 0; | |
1330 | default:; |
|
1419 | default:; | |
1331 | } |
|
1420 | } | |
1332 |
|
|
1421 | RETURN_ERROR(parameter_unsupported); | |
1333 | } |
|
1422 | } | |
1334 |
|
1423 | |||
1335 | size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) |
|
1424 | size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) | |
1336 | { |
|
1425 | { | |
1337 | if ( (reset == ZSTD_reset_session_only) |
|
1426 | if ( (reset == ZSTD_reset_session_only) | |
1338 | || (reset == ZSTD_reset_session_and_parameters) ) { |
|
1427 | || (reset == ZSTD_reset_session_and_parameters) ) { | |
1339 | (void)ZSTD_initDStream(dctx); |
|
1428 | dctx->streamStage = zdss_init; | |
|
1429 | dctx->noForwardProgress = 0; | |||
1340 | } |
|
1430 | } | |
1341 | if ( (reset == ZSTD_reset_parameters) |
|
1431 | if ( (reset == ZSTD_reset_parameters) | |
1342 | || (reset == ZSTD_reset_session_and_parameters) ) { |
|
1432 | || (reset == ZSTD_reset_session_and_parameters) ) { | |
1343 |
|
|
1433 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
1344 | return ERROR(stage_wrong); |
|
1434 | ZSTD_clearDict(dctx); | |
1345 | dctx->format = ZSTD_f_zstd1; |
|
1435 | dctx->format = ZSTD_f_zstd1; | |
1346 | dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; |
|
1436 | dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; | |
1347 | } |
|
1437 | } | |
@@ -1360,7 +1450,8 b' size_t ZSTD_decodingBufferSize_min(unsig' | |||||
1360 | unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); |
|
1450 | unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); | |
1361 | unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); |
|
1451 | unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); | |
1362 | size_t const minRBSize = (size_t) neededSize; |
|
1452 | size_t const minRBSize = (size_t) neededSize; | |
1363 | if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge); |
|
1453 | RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, | |
|
1454 | frameParameter_windowTooLarge); | |||
1364 | return minRBSize; |
|
1455 | return minRBSize; | |
1365 | } |
|
1456 | } | |
1366 |
|
1457 | |||
@@ -1378,9 +1469,9 b' size_t ZSTD_estimateDStreamSize_fromFram' | |||||
1378 | ZSTD_frameHeader zfh; |
|
1469 | ZSTD_frameHeader zfh; | |
1379 | size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); |
|
1470 | size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); | |
1380 | if (ZSTD_isError(err)) return err; |
|
1471 | if (ZSTD_isError(err)) return err; | |
1381 |
|
|
1472 | RETURN_ERROR_IF(err>0, srcSize_wrong); | |
1382 |
|
|
1473 | RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, | |
1383 |
|
|
1474 | frameParameter_windowTooLarge); | |
1384 | return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); |
|
1475 | return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); | |
1385 | } |
|
1476 | } | |
1386 |
|
1477 | |||
@@ -1406,16 +1497,16 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1406 | U32 someMoreWork = 1; |
|
1497 | U32 someMoreWork = 1; | |
1407 |
|
1498 | |||
1408 | DEBUGLOG(5, "ZSTD_decompressStream"); |
|
1499 | DEBUGLOG(5, "ZSTD_decompressStream"); | |
1409 | if (input->pos > input->size) { /* forbidden */ |
|
1500 | RETURN_ERROR_IF( | |
1410 | DEBUGLOG(5, "in: pos: %u vs size: %u", |
|
1501 | input->pos > input->size, | |
1411 | (U32)input->pos, (U32)input->size); |
|
1502 | srcSize_wrong, | |
1412 | return ERROR(srcSize_wrong); |
|
1503 | "forbidden. in: pos: %u vs size: %u", | |
1413 | } |
|
1504 | (U32)input->pos, (U32)input->size); | |
1414 | if (output->pos > output->size) { /* forbidden */ |
|
1505 | RETURN_ERROR_IF( | |
1415 | DEBUGLOG(5, "out: pos: %u vs size: %u", |
|
1506 | output->pos > output->size, | |
1416 | (U32)output->pos, (U32)output->size); |
|
1507 | dstSize_tooSmall, | |
1417 | return ERROR(dstSize_tooSmall); |
|
1508 | "forbidden. out: pos: %u vs size: %u", | |
1418 | } |
|
1509 | (U32)output->pos, (U32)output->size); | |
1419 | DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); |
|
1510 | DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); | |
1420 |
|
1511 | |||
1421 | while (someMoreWork) { |
|
1512 | while (someMoreWork) { | |
@@ -1423,15 +1514,18 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1423 | { |
|
1514 | { | |
1424 | case zdss_init : |
|
1515 | case zdss_init : | |
1425 | DEBUGLOG(5, "stage zdss_init => transparent reset "); |
|
1516 | DEBUGLOG(5, "stage zdss_init => transparent reset "); | |
1426 | ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ |
|
1517 | zds->streamStage = zdss_loadHeader; | |
|
1518 | zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; | |||
|
1519 | zds->legacyVersion = 0; | |||
|
1520 | zds->hostageByte = 0; | |||
1427 | /* fall-through */ |
|
1521 | /* fall-through */ | |
1428 |
|
1522 | |||
1429 | case zdss_loadHeader : |
|
1523 | case zdss_loadHeader : | |
1430 | DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); |
|
1524 | DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); | |
1431 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) |
|
1525 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) | |
1432 | if (zds->legacyVersion) { |
|
1526 | if (zds->legacyVersion) { | |
1433 | /* legacy support is incompatible with static dctx */ |
|
1527 | RETURN_ERROR_IF(zds->staticSize, memory_allocation, | |
1434 | if (zds->staticSize) return ERROR(memory_allocation); |
|
1528 | "legacy support is incompatible with static dctx"); | |
1435 | { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); |
|
1529 | { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); | |
1436 | if (hint==0) zds->streamStage = zdss_init; |
|
1530 | if (hint==0) zds->streamStage = zdss_init; | |
1437 | return hint; |
|
1531 | return hint; | |
@@ -1443,12 +1537,13 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1443 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) |
|
1537 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) | |
1444 | U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); |
|
1538 | U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); | |
1445 | if (legacyVersion) { |
|
1539 | if (legacyVersion) { | |
1446 | const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL; |
|
1540 | ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); | |
1447 |
|
|
1541 | const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; | |
|
1542 | size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; | |||
1448 | DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); |
|
1543 | DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); | |
1449 | /* legacy support is incompatible with static dctx */ |
|
1544 | RETURN_ERROR_IF(zds->staticSize, memory_allocation, | |
1450 | if (zds->staticSize) return ERROR(memory_allocation); |
|
1545 | "legacy support is incompatible with static dctx"); | |
1451 |
|
|
1546 | FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, | |
1452 | zds->previousLegacyVersion, legacyVersion, |
|
1547 | zds->previousLegacyVersion, legacyVersion, | |
1453 | dict, dictSize)); |
|
1548 | dict, dictSize)); | |
1454 | zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; |
|
1549 | zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; | |
@@ -1482,7 +1577,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1482 | size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); |
|
1577 | size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); | |
1483 | if (cSize <= (size_t)(iend-istart)) { |
|
1578 | if (cSize <= (size_t)(iend-istart)) { | |
1484 | /* shortcut : using single-pass mode */ |
|
1579 | /* shortcut : using single-pass mode */ | |
1485 |
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds |
|
1580 | size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); | |
1486 | if (ZSTD_isError(decompressedSize)) return decompressedSize; |
|
1581 | if (ZSTD_isError(decompressedSize)) return decompressedSize; | |
1487 | DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") |
|
1582 | DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") | |
1488 | ip = istart + cSize; |
|
1583 | ip = istart + cSize; | |
@@ -1495,13 +1590,13 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1495 |
|
1590 | |||
1496 | /* Consume header (see ZSTDds_decodeFrameHeader) */ |
|
1591 | /* Consume header (see ZSTDds_decodeFrameHeader) */ | |
1497 | DEBUGLOG(4, "Consume header"); |
|
1592 | DEBUGLOG(4, "Consume header"); | |
1498 |
|
|
1593 | FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds))); | |
1499 |
|
1594 | |||
1500 | if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ |
|
1595 | if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ | |
1501 | zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); |
|
1596 | zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); | |
1502 | zds->stage = ZSTDds_skipFrame; |
|
1597 | zds->stage = ZSTDds_skipFrame; | |
1503 | } else { |
|
1598 | } else { | |
1504 |
|
|
1599 | FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); | |
1505 | zds->expected = ZSTD_blockHeaderSize; |
|
1600 | zds->expected = ZSTD_blockHeaderSize; | |
1506 | zds->stage = ZSTDds_decodeBlockHeader; |
|
1601 | zds->stage = ZSTDds_decodeBlockHeader; | |
1507 | } |
|
1602 | } | |
@@ -1511,7 +1606,8 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1511 | (U32)(zds->fParams.windowSize >>10), |
|
1606 | (U32)(zds->fParams.windowSize >>10), | |
1512 | (U32)(zds->maxWindowSize >> 10) ); |
|
1607 | (U32)(zds->maxWindowSize >> 10) ); | |
1513 | zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); |
|
1608 | zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); | |
1514 |
|
|
1609 | RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, | |
|
1610 | frameParameter_windowTooLarge); | |||
1515 |
|
1611 | |||
1516 | /* Adapt buffer sizes to frame header instructions */ |
|
1612 | /* Adapt buffer sizes to frame header instructions */ | |
1517 | { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); |
|
1613 | { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); | |
@@ -1525,14 +1621,15 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1525 | if (zds->staticSize) { /* static DCtx */ |
|
1621 | if (zds->staticSize) { /* static DCtx */ | |
1526 | DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); |
|
1622 | DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); | |
1527 | assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ |
|
1623 | assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ | |
1528 | if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx)) |
|
1624 | RETURN_ERROR_IF( | |
1529 | return ERROR(memory_allocation); |
|
1625 | bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), | |
|
1626 | memory_allocation); | |||
1530 | } else { |
|
1627 | } else { | |
1531 | ZSTD_free(zds->inBuff, zds->customMem); |
|
1628 | ZSTD_free(zds->inBuff, zds->customMem); | |
1532 | zds->inBuffSize = 0; |
|
1629 | zds->inBuffSize = 0; | |
1533 | zds->outBuffSize = 0; |
|
1630 | zds->outBuffSize = 0; | |
1534 | zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); |
|
1631 | zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); | |
1535 |
|
|
1632 | RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation); | |
1536 | } |
|
1633 | } | |
1537 | zds->inBuffSize = neededInBuffSize; |
|
1634 | zds->inBuffSize = neededInBuffSize; | |
1538 | zds->outBuff = zds->inBuff + zds->inBuffSize; |
|
1635 | zds->outBuff = zds->inBuff + zds->inBuffSize; | |
@@ -1574,7 +1671,9 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1574 | if (isSkipFrame) { |
|
1671 | if (isSkipFrame) { | |
1575 | loadedSize = MIN(toLoad, (size_t)(iend-ip)); |
|
1672 | loadedSize = MIN(toLoad, (size_t)(iend-ip)); | |
1576 | } else { |
|
1673 | } else { | |
1577 | if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ |
|
1674 | RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, | |
|
1675 | corruption_detected, | |||
|
1676 | "should never happen"); | |||
1578 | loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); |
|
1677 | loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); | |
1579 | } |
|
1678 | } | |
1580 | ip += loadedSize; |
|
1679 | ip += loadedSize; | |
@@ -1615,7 +1714,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1615 |
|
1714 | |||
1616 | default: |
|
1715 | default: | |
1617 | assert(0); /* impossible */ |
|
1716 | assert(0); /* impossible */ | |
1618 |
|
|
1717 | RETURN_ERROR(GENERIC); /* some compiler require default to do something */ | |
1619 | } } |
|
1718 | } } | |
1620 |
|
1719 | |||
1621 | /* result */ |
|
1720 | /* result */ | |
@@ -1624,8 +1723,8 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
1624 | if ((ip==istart) && (op==ostart)) { /* no forward progress */ |
|
1723 | if ((ip==istart) && (op==ostart)) { /* no forward progress */ | |
1625 | zds->noForwardProgress ++; |
|
1724 | zds->noForwardProgress ++; | |
1626 | if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { |
|
1725 | if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { | |
1627 |
|
|
1726 | RETURN_ERROR_IF(op==oend, dstSize_tooSmall); | |
1628 |
|
|
1727 | RETURN_ERROR_IF(ip==iend, srcSize_wrong); | |
1629 | assert(0); |
|
1728 | assert(0); | |
1630 | } |
|
1729 | } | |
1631 | } else { |
|
1730 | } else { |
@@ -56,14 +56,15 b' static void ZSTD_copy4(void* dst, const ' | |||||
56 | size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, |
|
56 | size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, | |
57 | blockProperties_t* bpPtr) |
|
57 | blockProperties_t* bpPtr) | |
58 | { |
|
58 | { | |
59 |
|
|
59 | RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong); | |
|
60 | ||||
60 | { U32 const cBlockHeader = MEM_readLE24(src); |
|
61 | { U32 const cBlockHeader = MEM_readLE24(src); | |
61 | U32 const cSize = cBlockHeader >> 3; |
|
62 | U32 const cSize = cBlockHeader >> 3; | |
62 | bpPtr->lastBlock = cBlockHeader & 1; |
|
63 | bpPtr->lastBlock = cBlockHeader & 1; | |
63 | bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); |
|
64 | bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); | |
64 | bpPtr->origSize = cSize; /* only useful for RLE */ |
|
65 | bpPtr->origSize = cSize; /* only useful for RLE */ | |
65 | if (bpPtr->blockType == bt_rle) return 1; |
|
66 | if (bpPtr->blockType == bt_rle) return 1; | |
66 |
|
|
67 | RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected); | |
67 | return cSize; |
|
68 | return cSize; | |
68 | } |
|
69 | } | |
69 | } |
|
70 | } | |
@@ -78,7 +79,8 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||||
78 | size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, |
|
79 | size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, | |
79 | const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ |
|
80 | const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ | |
80 | { |
|
81 | { | |
81 | if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); |
|
82 | DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); | |
|
83 | RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); | |||
82 |
|
84 | |||
83 | { const BYTE* const istart = (const BYTE*) src; |
|
85 | { const BYTE* const istart = (const BYTE*) src; | |
84 | symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); |
|
86 | symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); | |
@@ -86,11 +88,12 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||||
86 | switch(litEncType) |
|
88 | switch(litEncType) | |
87 | { |
|
89 | { | |
88 | case set_repeat: |
|
90 | case set_repeat: | |
89 | if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); |
|
91 | DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); | |
|
92 | RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); | |||
90 | /* fall-through */ |
|
93 | /* fall-through */ | |
91 |
|
94 | |||
92 | case set_compressed: |
|
95 | case set_compressed: | |
93 |
|
|
96 | RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); | |
94 | { size_t lhSize, litSize, litCSize; |
|
97 | { size_t lhSize, litSize, litCSize; | |
95 | U32 singleStream=0; |
|
98 | U32 singleStream=0; | |
96 | U32 const lhlCode = (istart[0] >> 2) & 3; |
|
99 | U32 const lhlCode = (istart[0] >> 2) & 3; | |
@@ -115,11 +118,11 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||||
115 | /* 2 - 2 - 18 - 18 */ |
|
118 | /* 2 - 2 - 18 - 18 */ | |
116 | lhSize = 5; |
|
119 | lhSize = 5; | |
117 | litSize = (lhc >> 4) & 0x3FFFF; |
|
120 | litSize = (lhc >> 4) & 0x3FFFF; | |
118 | litCSize = (lhc >> 22) + (istart[4] << 10); |
|
121 | litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); | |
119 | break; |
|
122 | break; | |
120 | } |
|
123 | } | |
121 |
|
|
124 | RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); | |
122 |
|
|
125 | RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected); | |
123 |
|
126 | |||
124 | /* prefetch huffman table if cold */ |
|
127 | /* prefetch huffman table if cold */ | |
125 | if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { |
|
128 | if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { | |
@@ -157,7 +160,7 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||||
157 | } |
|
160 | } | |
158 | } |
|
161 | } | |
159 |
|
162 | |||
160 |
|
|
163 | RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected); | |
161 |
|
164 | |||
162 | dctx->litPtr = dctx->litBuffer; |
|
165 | dctx->litPtr = dctx->litBuffer; | |
163 | dctx->litSize = litSize; |
|
166 | dctx->litSize = litSize; | |
@@ -187,7 +190,7 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||||
187 | } |
|
190 | } | |
188 |
|
191 | |||
189 | if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ |
|
192 | if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ | |
190 |
|
|
193 | RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected); | |
191 | memcpy(dctx->litBuffer, istart+lhSize, litSize); |
|
194 | memcpy(dctx->litBuffer, istart+lhSize, litSize); | |
192 | dctx->litPtr = dctx->litBuffer; |
|
195 | dctx->litPtr = dctx->litBuffer; | |
193 | dctx->litSize = litSize; |
|
196 | dctx->litSize = litSize; | |
@@ -216,17 +219,17 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||||
216 | case 3: |
|
219 | case 3: | |
217 | lhSize = 3; |
|
220 | lhSize = 3; | |
218 | litSize = MEM_readLE24(istart) >> 4; |
|
221 | litSize = MEM_readLE24(istart) >> 4; | |
219 |
|
|
222 | RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); | |
220 | break; |
|
223 | break; | |
221 | } |
|
224 | } | |
222 |
|
|
225 | RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); | |
223 | memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); |
|
226 | memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); | |
224 | dctx->litPtr = dctx->litBuffer; |
|
227 | dctx->litPtr = dctx->litBuffer; | |
225 | dctx->litSize = litSize; |
|
228 | dctx->litSize = litSize; | |
226 | return lhSize+1; |
|
229 | return lhSize+1; | |
227 | } |
|
230 | } | |
228 | default: |
|
231 | default: | |
229 |
|
|
232 | RETURN_ERROR(corruption_detected, "impossible"); | |
230 | } |
|
233 | } | |
231 | } |
|
234 | } | |
232 | } |
|
235 | } | |
@@ -390,7 +393,8 b' ZSTD_buildFSETable(ZSTD_seqSymbol* dt,' | |||||
390 | symbolNext[s] = 1; |
|
393 | symbolNext[s] = 1; | |
391 | } else { |
|
394 | } else { | |
392 | if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; |
|
395 | if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; | |
393 |
|
|
396 | assert(normalizedCounter[s]>=0); | |
|
397 | symbolNext[s] = (U16)normalizedCounter[s]; | |||
394 | } } } |
|
398 | } } } | |
395 | memcpy(dt, &DTableH, sizeof(DTableH)); |
|
399 | memcpy(dt, &DTableH, sizeof(DTableH)); | |
396 | } |
|
400 | } | |
@@ -436,8 +440,8 b' static size_t ZSTD_buildSeqTable(ZSTD_se' | |||||
436 | switch(type) |
|
440 | switch(type) | |
437 | { |
|
441 | { | |
438 | case set_rle : |
|
442 | case set_rle : | |
439 |
|
|
443 | RETURN_ERROR_IF(!srcSize, srcSize_wrong); | |
440 |
|
|
444 | RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected); | |
441 | { U32 const symbol = *(const BYTE*)src; |
|
445 | { U32 const symbol = *(const BYTE*)src; | |
442 | U32 const baseline = baseValue[symbol]; |
|
446 | U32 const baseline = baseValue[symbol]; | |
443 | U32 const nbBits = nbAdditionalBits[symbol]; |
|
447 | U32 const nbBits = nbAdditionalBits[symbol]; | |
@@ -449,7 +453,7 b' static size_t ZSTD_buildSeqTable(ZSTD_se' | |||||
449 | *DTablePtr = defaultTable; |
|
453 | *DTablePtr = defaultTable; | |
450 | return 0; |
|
454 | return 0; | |
451 | case set_repeat: |
|
455 | case set_repeat: | |
452 |
|
|
456 | RETURN_ERROR_IF(!flagRepeatTable, corruption_detected); | |
453 | /* prefetch FSE table if used */ |
|
457 | /* prefetch FSE table if used */ | |
454 | if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { |
|
458 | if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { | |
455 | const void* const pStart = *DTablePtr; |
|
459 | const void* const pStart = *DTablePtr; | |
@@ -461,15 +465,15 b' static size_t ZSTD_buildSeqTable(ZSTD_se' | |||||
461 | { unsigned tableLog; |
|
465 | { unsigned tableLog; | |
462 | S16 norm[MaxSeq+1]; |
|
466 | S16 norm[MaxSeq+1]; | |
463 | size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); |
|
467 | size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); | |
464 |
|
|
468 | RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected); | |
465 |
|
|
469 | RETURN_ERROR_IF(tableLog > maxLog, corruption_detected); | |
466 | ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); |
|
470 | ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); | |
467 | *DTablePtr = DTableSpace; |
|
471 | *DTablePtr = DTableSpace; | |
468 | return headerSize; |
|
472 | return headerSize; | |
469 | } |
|
473 | } | |
470 | default : /* impossible */ |
|
474 | default : | |
471 | assert(0); |
|
475 | assert(0); | |
472 |
|
|
476 | RETURN_ERROR(GENERIC, "impossible"); | |
473 | } |
|
477 | } | |
474 | } |
|
478 | } | |
475 |
|
479 | |||
@@ -483,28 +487,28 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||||
483 | DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); |
|
487 | DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); | |
484 |
|
488 | |||
485 | /* check */ |
|
489 | /* check */ | |
486 |
|
|
490 | RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong); | |
487 |
|
491 | |||
488 | /* SeqHead */ |
|
492 | /* SeqHead */ | |
489 | nbSeq = *ip++; |
|
493 | nbSeq = *ip++; | |
490 | if (!nbSeq) { |
|
494 | if (!nbSeq) { | |
491 | *nbSeqPtr=0; |
|
495 | *nbSeqPtr=0; | |
492 |
|
|
496 | RETURN_ERROR_IF(srcSize != 1, srcSize_wrong); | |
493 | return 1; |
|
497 | return 1; | |
494 | } |
|
498 | } | |
495 | if (nbSeq > 0x7F) { |
|
499 | if (nbSeq > 0x7F) { | |
496 | if (nbSeq == 0xFF) { |
|
500 | if (nbSeq == 0xFF) { | |
497 |
|
|
501 | RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong); | |
498 | nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; |
|
502 | nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; | |
499 | } else { |
|
503 | } else { | |
500 |
|
|
504 | RETURN_ERROR_IF(ip >= iend, srcSize_wrong); | |
501 | nbSeq = ((nbSeq-0x80)<<8) + *ip++; |
|
505 | nbSeq = ((nbSeq-0x80)<<8) + *ip++; | |
502 | } |
|
506 | } | |
503 | } |
|
507 | } | |
504 | *nbSeqPtr = nbSeq; |
|
508 | *nbSeqPtr = nbSeq; | |
505 |
|
509 | |||
506 | /* FSE table descriptors */ |
|
510 | /* FSE table descriptors */ | |
507 |
|
|
511 | RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ | |
508 | { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); |
|
512 | { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); | |
509 | symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); |
|
513 | symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); | |
510 | symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); |
|
514 | symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); | |
@@ -517,7 +521,7 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||||
517 | LL_base, LL_bits, |
|
521 | LL_base, LL_bits, | |
518 | LL_defaultDTable, dctx->fseEntropy, |
|
522 | LL_defaultDTable, dctx->fseEntropy, | |
519 | dctx->ddictIsCold, nbSeq); |
|
523 | dctx->ddictIsCold, nbSeq); | |
520 |
|
|
524 | RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected); | |
521 | ip += llhSize; |
|
525 | ip += llhSize; | |
522 | } |
|
526 | } | |
523 |
|
527 | |||
@@ -527,7 +531,7 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||||
527 | OF_base, OF_bits, |
|
531 | OF_base, OF_bits, | |
528 | OF_defaultDTable, dctx->fseEntropy, |
|
532 | OF_defaultDTable, dctx->fseEntropy, | |
529 | dctx->ddictIsCold, nbSeq); |
|
533 | dctx->ddictIsCold, nbSeq); | |
530 |
|
|
534 | RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected); | |
531 | ip += ofhSize; |
|
535 | ip += ofhSize; | |
532 | } |
|
536 | } | |
533 |
|
537 | |||
@@ -537,7 +541,7 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||||
537 | ML_base, ML_bits, |
|
541 | ML_base, ML_bits, | |
538 | ML_defaultDTable, dctx->fseEntropy, |
|
542 | ML_defaultDTable, dctx->fseEntropy, | |
539 | dctx->ddictIsCold, nbSeq); |
|
543 | dctx->ddictIsCold, nbSeq); | |
540 |
|
|
544 | RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected); | |
541 | ip += mlhSize; |
|
545 | ip += mlhSize; | |
542 | } |
|
546 | } | |
543 | } |
|
547 | } | |
@@ -590,8 +594,8 b' size_t ZSTD_execSequenceLast7(BYTE* op,' | |||||
590 | const BYTE* match = oLitEnd - sequence.offset; |
|
594 | const BYTE* match = oLitEnd - sequence.offset; | |
591 |
|
595 | |||
592 | /* check */ |
|
596 | /* check */ | |
593 |
|
|
597 | RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer"); | |
594 |
|
|
598 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); | |
595 |
|
599 | |||
596 | /* copy literals */ |
|
600 | /* copy literals */ | |
597 | while (op < oLitEnd) *op++ = *(*litPtr)++; |
|
601 | while (op < oLitEnd) *op++ = *(*litPtr)++; | |
@@ -599,7 +603,7 b' size_t ZSTD_execSequenceLast7(BYTE* op,' | |||||
599 | /* copy Match */ |
|
603 | /* copy Match */ | |
600 | if (sequence.offset > (size_t)(oLitEnd - base)) { |
|
604 | if (sequence.offset > (size_t)(oLitEnd - base)) { | |
601 | /* offset beyond prefix */ |
|
605 | /* offset beyond prefix */ | |
602 |
|
|
606 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected); | |
603 | match = dictEnd - (base-match); |
|
607 | match = dictEnd - (base-match); | |
604 | if (match + sequence.matchLength <= dictEnd) { |
|
608 | if (match + sequence.matchLength <= dictEnd) { | |
605 | memmove(oLitEnd, match, sequence.matchLength); |
|
609 | memmove(oLitEnd, match, sequence.matchLength); | |
@@ -631,22 +635,22 b' size_t ZSTD_execSequence(BYTE* op,' | |||||
631 | const BYTE* match = oLitEnd - sequence.offset; |
|
635 | const BYTE* match = oLitEnd - sequence.offset; | |
632 |
|
636 | |||
633 | /* check */ |
|
637 | /* check */ | |
634 |
|
|
638 | RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); | |
635 |
|
|
639 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); | |
636 | if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); |
|
640 | if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); | |
637 |
|
641 | |||
638 | /* copy Literals */ |
|
642 | /* copy Literals */ | |
639 | ZSTD_copy8(op, *litPtr); |
|
|||
640 | if (sequence.litLength > 8) |
|
643 | if (sequence.litLength > 8) | |
641 |
ZSTD_wildcopy(op |
|
644 | ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
645 | else | |||
|
646 | ZSTD_copy8(op, *litPtr); | |||
642 | op = oLitEnd; |
|
647 | op = oLitEnd; | |
643 | *litPtr = iLitEnd; /* update for next sequence */ |
|
648 | *litPtr = iLitEnd; /* update for next sequence */ | |
644 |
|
649 | |||
645 | /* copy Match */ |
|
650 | /* copy Match */ | |
646 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { |
|
651 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { | |
647 | /* offset beyond prefix -> go into extDict */ |
|
652 | /* offset beyond prefix -> go into extDict */ | |
648 |
|
|
653 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); | |
649 | return ERROR(corruption_detected); |
|
|||
650 | match = dictEnd + (match - prefixStart); |
|
654 | match = dictEnd + (match - prefixStart); | |
651 | if (match + sequence.matchLength <= dictEnd) { |
|
655 | if (match + sequence.matchLength <= dictEnd) { | |
652 | memmove(oLitEnd, match, sequence.matchLength); |
|
656 | memmove(oLitEnd, match, sequence.matchLength); | |
@@ -686,13 +690,13 b' size_t ZSTD_execSequence(BYTE* op,' | |||||
686 |
|
690 | |||
687 | if (oMatchEnd > oend-(16-MINMATCH)) { |
|
691 | if (oMatchEnd > oend-(16-MINMATCH)) { | |
688 | if (op < oend_w) { |
|
692 | if (op < oend_w) { | |
689 | ZSTD_wildcopy(op, match, oend_w - op); |
|
693 | ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); | |
690 | match += oend_w - op; |
|
694 | match += oend_w - op; | |
691 | op = oend_w; |
|
695 | op = oend_w; | |
692 | } |
|
696 | } | |
693 | while (op < oMatchEnd) *op++ = *match++; |
|
697 | while (op < oMatchEnd) *op++ = *match++; | |
694 | } else { |
|
698 | } else { | |
695 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ |
|
699 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ | |
696 | } |
|
700 | } | |
697 | return sequenceLength; |
|
701 | return sequenceLength; | |
698 | } |
|
702 | } | |
@@ -712,21 +716,23 b' size_t ZSTD_execSequenceLong(BYTE* op,' | |||||
712 | const BYTE* match = sequence.match; |
|
716 | const BYTE* match = sequence.match; | |
713 |
|
717 | |||
714 | /* check */ |
|
718 | /* check */ | |
715 |
|
|
719 | RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); | |
716 |
|
|
720 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); | |
717 | if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); |
|
721 | if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); | |
718 |
|
722 | |||
719 | /* copy Literals */ |
|
723 | /* copy Literals */ | |
720 | ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ |
|
|||
721 | if (sequence.litLength > 8) |
|
724 | if (sequence.litLength > 8) | |
722 |
ZSTD_wildcopy(op |
|
725 | ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
726 | else | |||
|
727 | ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ | |||
|
728 | ||||
723 | op = oLitEnd; |
|
729 | op = oLitEnd; | |
724 | *litPtr = iLitEnd; /* update for next sequence */ |
|
730 | *litPtr = iLitEnd; /* update for next sequence */ | |
725 |
|
731 | |||
726 | /* copy Match */ |
|
732 | /* copy Match */ | |
727 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { |
|
733 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { | |
728 | /* offset beyond prefix */ |
|
734 | /* offset beyond prefix */ | |
729 |
|
|
735 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected); | |
730 | if (match + sequence.matchLength <= dictEnd) { |
|
736 | if (match + sequence.matchLength <= dictEnd) { | |
731 | memmove(oLitEnd, match, sequence.matchLength); |
|
737 | memmove(oLitEnd, match, sequence.matchLength); | |
732 | return sequenceLength; |
|
738 | return sequenceLength; | |
@@ -766,13 +772,13 b' size_t ZSTD_execSequenceLong(BYTE* op,' | |||||
766 |
|
772 | |||
767 | if (oMatchEnd > oend-(16-MINMATCH)) { |
|
773 | if (oMatchEnd > oend-(16-MINMATCH)) { | |
768 | if (op < oend_w) { |
|
774 | if (op < oend_w) { | |
769 | ZSTD_wildcopy(op, match, oend_w - op); |
|
775 | ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); | |
770 | match += oend_w - op; |
|
776 | match += oend_w - op; | |
771 | op = oend_w; |
|
777 | op = oend_w; | |
772 | } |
|
778 | } | |
773 | while (op < oMatchEnd) *op++ = *match++; |
|
779 | while (op < oMatchEnd) *op++ = *match++; | |
774 | } else { |
|
780 | } else { | |
775 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ |
|
781 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ | |
776 | } |
|
782 | } | |
777 | return sequenceLength; |
|
783 | return sequenceLength; | |
778 | } |
|
784 | } | |
@@ -801,7 +807,7 b' ZSTD_updateFseState(ZSTD_fseState* DStat' | |||||
801 | /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum |
|
807 | /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum | |
802 | * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) |
|
808 | * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) | |
803 | * bits before reloading. This value is the maximum number of bytes we read |
|
809 | * bits before reloading. This value is the maximum number of bytes we read | |
804 | * after reloading when we are decoding long offets. |
|
810 | * after reloading when we are decoding long offsets. | |
805 | */ |
|
811 | */ | |
806 | #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ |
|
812 | #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ | |
807 | (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ |
|
813 | (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ | |
@@ -889,6 +895,7 b' ZSTD_decodeSequence(seqState_t* seqState' | |||||
889 | } |
|
895 | } | |
890 |
|
896 | |||
891 | FORCE_INLINE_TEMPLATE size_t |
|
897 | FORCE_INLINE_TEMPLATE size_t | |
|
898 | DONT_VECTORIZE | |||
892 | ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, |
|
899 | ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, | |
893 | void* dst, size_t maxDstSize, |
|
900 | void* dst, size_t maxDstSize, | |
894 | const void* seqStart, size_t seqSize, int nbSeq, |
|
901 | const void* seqStart, size_t seqSize, int nbSeq, | |
@@ -911,11 +918,18 b' ZSTD_decompressSequences_body( ZSTD_DCtx' | |||||
911 | seqState_t seqState; |
|
918 | seqState_t seqState; | |
912 | dctx->fseEntropy = 1; |
|
919 | dctx->fseEntropy = 1; | |
913 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } |
|
920 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } | |
914 | CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); |
|
921 | RETURN_ERROR_IF( | |
|
922 | ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), | |||
|
923 | corruption_detected); | |||
915 | ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); |
|
924 | ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); | |
916 | ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); |
|
925 | ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); | |
917 | ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); |
|
926 | ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); | |
918 |
|
927 | |||
|
928 | ZSTD_STATIC_ASSERT( | |||
|
929 | BIT_DStream_unfinished < BIT_DStream_completed && | |||
|
930 | BIT_DStream_endOfBuffer < BIT_DStream_completed && | |||
|
931 | BIT_DStream_completed < BIT_DStream_overflow); | |||
|
932 | ||||
919 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { |
|
933 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { | |
920 | nbSeq--; |
|
934 | nbSeq--; | |
921 | { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); |
|
935 | { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); | |
@@ -927,14 +941,15 b' ZSTD_decompressSequences_body( ZSTD_DCtx' | |||||
927 |
|
941 | |||
928 | /* check if reached exact end */ |
|
942 | /* check if reached exact end */ | |
929 | DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); |
|
943 | DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); | |
930 |
|
|
944 | RETURN_ERROR_IF(nbSeq, corruption_detected); | |
|
945 | RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); | |||
931 | /* save reps for next block */ |
|
946 | /* save reps for next block */ | |
932 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } |
|
947 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } | |
933 | } |
|
948 | } | |
934 |
|
949 | |||
935 | /* last literal segment */ |
|
950 | /* last literal segment */ | |
936 | { size_t const lastLLSize = litEnd - litPtr; |
|
951 | { size_t const lastLLSize = litEnd - litPtr; | |
937 |
|
|
952 | RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); | |
938 | memcpy(op, litPtr, lastLLSize); |
|
953 | memcpy(op, litPtr, lastLLSize); | |
939 | op += lastLLSize; |
|
954 | op += lastLLSize; | |
940 | } |
|
955 | } | |
@@ -1066,7 +1081,9 b' ZSTD_decompressSequencesLong_body(' | |||||
1066 | seqState.pos = (size_t)(op-prefixStart); |
|
1081 | seqState.pos = (size_t)(op-prefixStart); | |
1067 | seqState.dictEnd = dictEnd; |
|
1082 | seqState.dictEnd = dictEnd; | |
1068 | assert(iend >= ip); |
|
1083 | assert(iend >= ip); | |
1069 | CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); |
|
1084 | RETURN_ERROR_IF( | |
|
1085 | ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), | |||
|
1086 | corruption_detected); | |||
1070 | ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); |
|
1087 | ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); | |
1071 | ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); |
|
1088 | ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); | |
1072 | ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); |
|
1089 | ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); | |
@@ -1076,7 +1093,7 b' ZSTD_decompressSequencesLong_body(' | |||||
1076 | sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset); |
|
1093 | sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset); | |
1077 | PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ |
|
1094 | PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ | |
1078 | } |
|
1095 | } | |
1079 |
|
|
1096 | RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected); | |
1080 |
|
1097 | |||
1081 | /* decode and decompress */ |
|
1098 | /* decode and decompress */ | |
1082 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) { |
|
1099 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) { | |
@@ -1087,7 +1104,7 b' ZSTD_decompressSequencesLong_body(' | |||||
1087 | sequences[seqNb & STORED_SEQS_MASK] = sequence; |
|
1104 | sequences[seqNb & STORED_SEQS_MASK] = sequence; | |
1088 | op += oneSeqSize; |
|
1105 | op += oneSeqSize; | |
1089 | } |
|
1106 | } | |
1090 |
|
|
1107 | RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected); | |
1091 |
|
1108 | |||
1092 | /* finish queue */ |
|
1109 | /* finish queue */ | |
1093 | seqNb -= seqAdvance; |
|
1110 | seqNb -= seqAdvance; | |
@@ -1103,7 +1120,7 b' ZSTD_decompressSequencesLong_body(' | |||||
1103 |
|
1120 | |||
1104 | /* last literal segment */ |
|
1121 | /* last literal segment */ | |
1105 | { size_t const lastLLSize = litEnd - litPtr; |
|
1122 | { size_t const lastLLSize = litEnd - litPtr; | |
1106 |
|
|
1123 | RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); | |
1107 | memcpy(op, litPtr, lastLLSize); |
|
1124 | memcpy(op, litPtr, lastLLSize); | |
1108 | op += lastLLSize; |
|
1125 | op += lastLLSize; | |
1109 | } |
|
1126 | } | |
@@ -1127,6 +1144,7 b' ZSTD_decompressSequencesLong_default(ZST' | |||||
1127 |
|
1144 | |||
1128 | #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG |
|
1145 | #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG | |
1129 | static TARGET_ATTRIBUTE("bmi2") size_t |
|
1146 | static TARGET_ATTRIBUTE("bmi2") size_t | |
|
1147 | DONT_VECTORIZE | |||
1130 | ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, |
|
1148 | ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, | |
1131 | void* dst, size_t maxDstSize, |
|
1149 | void* dst, size_t maxDstSize, | |
1132 | const void* seqStart, size_t seqSize, int nbSeq, |
|
1150 | const void* seqStart, size_t seqSize, int nbSeq, | |
@@ -1176,7 +1194,7 b' ZSTD_decompressSequences(ZSTD_DCtx* dctx' | |||||
1176 | /* ZSTD_decompressSequencesLong() : |
|
1194 | /* ZSTD_decompressSequencesLong() : | |
1177 | * decompression function triggered when a minimum share of offsets is considered "long", |
|
1195 | * decompression function triggered when a minimum share of offsets is considered "long", | |
1178 | * aka out of cache. |
|
1196 | * aka out of cache. | |
1179 |
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mea |
|
1197 | * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". | |
1180 | * This function will try to mitigate main memory latency through the use of prefetching */ |
|
1198 | * This function will try to mitigate main memory latency through the use of prefetching */ | |
1181 | static size_t |
|
1199 | static size_t | |
1182 | ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, |
|
1200 | ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, | |
@@ -1240,7 +1258,7 b' ZSTD_decompressBlock_internal(ZSTD_DCtx*' | |||||
1240 | ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); |
|
1258 | ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); | |
1241 | DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); |
|
1259 | DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); | |
1242 |
|
1260 | |||
1243 |
|
|
1261 | RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong); | |
1244 |
|
1262 | |||
1245 | /* Decode literals section */ |
|
1263 | /* Decode literals section */ | |
1246 | { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); |
|
1264 | { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); |
@@ -89,6 +89,12 b' typedef enum { ZSTDds_getFrameHeaderSize' | |||||
89 | typedef enum { zdss_init=0, zdss_loadHeader, |
|
89 | typedef enum { zdss_init=0, zdss_loadHeader, | |
90 | zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; |
|
90 | zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; | |
91 |
|
91 | |||
|
92 | typedef enum { | |||
|
93 | ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ | |||
|
94 | ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ | |||
|
95 | ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ | |||
|
96 | } ZSTD_dictUses_e; | |||
|
97 | ||||
92 | struct ZSTD_DCtx_s |
|
98 | struct ZSTD_DCtx_s | |
93 | { |
|
99 | { | |
94 | const ZSTD_seqSymbol* LLTptr; |
|
100 | const ZSTD_seqSymbol* LLTptr; | |
@@ -123,6 +129,7 b' struct ZSTD_DCtx_s' | |||||
123 | const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ |
|
129 | const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ | |
124 | U32 dictID; |
|
130 | U32 dictID; | |
125 | int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ |
|
131 | int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ | |
|
132 | ZSTD_dictUses_e dictUses; | |||
126 |
|
133 | |||
127 | /* streaming */ |
|
134 | /* streaming */ | |
128 | ZSTD_dStreamStage streamStage; |
|
135 | ZSTD_dStreamStage streamStage; |
@@ -391,7 +391,7 b' static void COVER_group(COVER_ctx_t *ctx' | |||||
391 | * |
|
391 | * | |
392 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) |
|
392 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) | |
393 | * |
|
393 | * | |
394 | * Once the dmer d is in the dictionay we set F(d) = 0. |
|
394 | * Once the dmer d is in the dictionary we set F(d) = 0. | |
395 | */ |
|
395 | */ | |
396 | static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, |
|
396 | static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, | |
397 | COVER_map_t *activeDmers, U32 begin, |
|
397 | COVER_map_t *activeDmers, U32 begin, | |
@@ -435,7 +435,7 b' static COVER_segment_t COVER_selectSegme' | |||||
435 | U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); |
|
435 | U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); | |
436 | activeSegment.begin += 1; |
|
436 | activeSegment.begin += 1; | |
437 | *delDmerOcc -= 1; |
|
437 | *delDmerOcc -= 1; | |
438 | /* If this is the last occurence of the dmer, subtract its score */ |
|
438 | /* If this is the last occurrence of the dmer, subtract its score */ | |
439 | if (*delDmerOcc == 0) { |
|
439 | if (*delDmerOcc == 0) { | |
440 | COVER_map_remove(activeDmers, delDmer); |
|
440 | COVER_map_remove(activeDmers, delDmer); | |
441 | activeSegment.score -= freqs[delDmer]; |
|
441 | activeSegment.score -= freqs[delDmer]; | |
@@ -526,10 +526,10 b' static void COVER_ctx_destroy(COVER_ctx_' | |||||
526 | * Prepare a context for dictionary building. |
|
526 | * Prepare a context for dictionary building. | |
527 | * The context is only dependent on the parameter `d` and can used multiple |
|
527 | * The context is only dependent on the parameter `d` and can used multiple | |
528 | * times. |
|
528 | * times. | |
529 |
* Returns |
|
529 | * Returns 0 on success or error code on error. | |
530 | * The context must be destroyed with `COVER_ctx_destroy()`. |
|
530 | * The context must be destroyed with `COVER_ctx_destroy()`. | |
531 | */ |
|
531 | */ | |
532 |
static |
|
532 | static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, | |
533 | const size_t *samplesSizes, unsigned nbSamples, |
|
533 | const size_t *samplesSizes, unsigned nbSamples, | |
534 | unsigned d, double splitPoint) { |
|
534 | unsigned d, double splitPoint) { | |
535 | const BYTE *const samples = (const BYTE *)samplesBuffer; |
|
535 | const BYTE *const samples = (const BYTE *)samplesBuffer; | |
@@ -544,17 +544,17 b' static int COVER_ctx_init(COVER_ctx_t *c' | |||||
544 | totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { |
|
544 | totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { | |
545 | DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", |
|
545 | DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", | |
546 | (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); |
|
546 | (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); | |
547 | return 0; |
|
547 | return ERROR(srcSize_wrong); | |
548 | } |
|
548 | } | |
549 | /* Check if there are at least 5 training samples */ |
|
549 | /* Check if there are at least 5 training samples */ | |
550 | if (nbTrainSamples < 5) { |
|
550 | if (nbTrainSamples < 5) { | |
551 | DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); |
|
551 | DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); | |
552 | return 0; |
|
552 | return ERROR(srcSize_wrong); | |
553 | } |
|
553 | } | |
554 | /* Check if there's testing sample */ |
|
554 | /* Check if there's testing sample */ | |
555 | if (nbTestSamples < 1) { |
|
555 | if (nbTestSamples < 1) { | |
556 | DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); |
|
556 | DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); | |
557 | return 0; |
|
557 | return ERROR(srcSize_wrong); | |
558 | } |
|
558 | } | |
559 | /* Zero the context */ |
|
559 | /* Zero the context */ | |
560 | memset(ctx, 0, sizeof(*ctx)); |
|
560 | memset(ctx, 0, sizeof(*ctx)); | |
@@ -577,7 +577,7 b' static int COVER_ctx_init(COVER_ctx_t *c' | |||||
577 | if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { |
|
577 | if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { | |
578 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); |
|
578 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); | |
579 | COVER_ctx_destroy(ctx); |
|
579 | COVER_ctx_destroy(ctx); | |
580 | return 0; |
|
580 | return ERROR(memory_allocation); | |
581 | } |
|
581 | } | |
582 | ctx->freqs = NULL; |
|
582 | ctx->freqs = NULL; | |
583 | ctx->d = d; |
|
583 | ctx->d = d; | |
@@ -624,7 +624,40 b' static int COVER_ctx_init(COVER_ctx_t *c' | |||||
624 | (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); |
|
624 | (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); | |
625 | ctx->freqs = ctx->suffix; |
|
625 | ctx->freqs = ctx->suffix; | |
626 | ctx->suffix = NULL; |
|
626 | ctx->suffix = NULL; | |
627 |
return |
|
627 | return 0; | |
|
628 | } | |||
|
629 | ||||
|
630 | void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) | |||
|
631 | { | |||
|
632 | const double ratio = (double)nbDmers / maxDictSize; | |||
|
633 | if (ratio >= 10) { | |||
|
634 | return; | |||
|
635 | } | |||
|
636 | LOCALDISPLAYLEVEL(displayLevel, 1, | |||
|
637 | "WARNING: The maximum dictionary size %u is too large " | |||
|
638 | "compared to the source size %u! " | |||
|
639 | "size(source)/size(dictionary) = %f, but it should be >= " | |||
|
640 | "10! This may lead to a subpar dictionary! We recommend " | |||
|
641 | "training on sources at least 10x, and up to 100x the " | |||
|
642 | "size of the dictionary!\n", (U32)maxDictSize, | |||
|
643 | (U32)nbDmers, ratio); | |||
|
644 | } | |||
|
645 | ||||
|
646 | COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, | |||
|
647 | U32 nbDmers, U32 k, U32 passes) | |||
|
648 | { | |||
|
649 | const U32 minEpochSize = k * 10; | |||
|
650 | COVER_epoch_info_t epochs; | |||
|
651 | epochs.num = MAX(1, maxDictSize / k / passes); | |||
|
652 | epochs.size = nbDmers / epochs.num; | |||
|
653 | if (epochs.size >= minEpochSize) { | |||
|
654 | assert(epochs.size * epochs.num <= nbDmers); | |||
|
655 | return epochs; | |||
|
656 | } | |||
|
657 | epochs.size = MIN(minEpochSize, nbDmers); | |||
|
658 | epochs.num = nbDmers / epochs.size; | |||
|
659 | assert(epochs.size * epochs.num <= nbDmers); | |||
|
660 | return epochs; | |||
628 | } |
|
661 | } | |
629 |
|
662 | |||
630 | /** |
|
663 | /** | |
@@ -636,28 +669,34 b' static size_t COVER_buildDictionary(cons' | |||||
636 | ZDICT_cover_params_t parameters) { |
|
669 | ZDICT_cover_params_t parameters) { | |
637 | BYTE *const dict = (BYTE *)dictBuffer; |
|
670 | BYTE *const dict = (BYTE *)dictBuffer; | |
638 | size_t tail = dictBufferCapacity; |
|
671 | size_t tail = dictBufferCapacity; | |
639 |
/* Divide the data |
|
672 | /* Divide the data into epochs. We will select one segment from each epoch. */ | |
640 | * We will select at least one segment from each epoch. |
|
673 | const COVER_epoch_info_t epochs = COVER_computeEpochs( | |
641 | */ |
|
674 | (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); | |
642 | const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4)); |
|
675 | const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); | |
643 | const unsigned epochSize = (U32)(ctx->suffixSize / epochs); |
|
676 | size_t zeroScoreRun = 0; | |
644 | size_t epoch; |
|
677 | size_t epoch; | |
645 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", |
|
678 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", | |
646 |
epochs, |
|
679 | (U32)epochs.num, (U32)epochs.size); | |
647 | /* Loop through the epochs until there are no more segments or the dictionary |
|
680 | /* Loop through the epochs until there are no more segments or the dictionary | |
648 | * is full. |
|
681 | * is full. | |
649 | */ |
|
682 | */ | |
650 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { |
|
683 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { | |
651 |
const U32 epochBegin = (U32)(epoch * epoch |
|
684 | const U32 epochBegin = (U32)(epoch * epochs.size); | |
652 |
const U32 epochEnd = epochBegin + epoch |
|
685 | const U32 epochEnd = epochBegin + epochs.size; | |
653 | size_t segmentSize; |
|
686 | size_t segmentSize; | |
654 | /* Select a segment */ |
|
687 | /* Select a segment */ | |
655 | COVER_segment_t segment = COVER_selectSegment( |
|
688 | COVER_segment_t segment = COVER_selectSegment( | |
656 | ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); |
|
689 | ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); | |
657 |
/* If the segment covers no dmers, then we are out of content |
|
690 | /* If the segment covers no dmers, then we are out of content. | |
|
691 | * There may be new content in other epochs, for continue for some time. | |||
|
692 | */ | |||
658 | if (segment.score == 0) { |
|
693 | if (segment.score == 0) { | |
659 | break; |
|
694 | if (++zeroScoreRun >= maxZeroScoreRun) { | |
|
695 | break; | |||
|
696 | } | |||
|
697 | continue; | |||
660 | } |
|
698 | } | |
|
699 | zeroScoreRun = 0; | |||
661 | /* Trim the segment if necessary and if it is too small then we are done */ |
|
700 | /* Trim the segment if necessary and if it is too small then we are done */ | |
662 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); |
|
701 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); | |
663 | if (segmentSize < parameters.d) { |
|
702 | if (segmentSize < parameters.d) { | |
@@ -690,11 +729,11 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||||
690 | /* Checks */ |
|
729 | /* Checks */ | |
691 | if (!COVER_checkParameters(parameters, dictBufferCapacity)) { |
|
730 | if (!COVER_checkParameters(parameters, dictBufferCapacity)) { | |
692 | DISPLAYLEVEL(1, "Cover parameters incorrect\n"); |
|
731 | DISPLAYLEVEL(1, "Cover parameters incorrect\n"); | |
693 | return ERROR(GENERIC); |
|
732 | return ERROR(parameter_outOfBound); | |
694 | } |
|
733 | } | |
695 | if (nbSamples == 0) { |
|
734 | if (nbSamples == 0) { | |
696 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); |
|
735 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | |
697 |
return ERROR( |
|
736 | return ERROR(srcSize_wrong); | |
698 | } |
|
737 | } | |
699 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
738 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | |
700 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
|
739 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | |
@@ -702,14 +741,18 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||||
702 | return ERROR(dstSize_tooSmall); |
|
741 | return ERROR(dstSize_tooSmall); | |
703 | } |
|
742 | } | |
704 | /* Initialize context and activeDmers */ |
|
743 | /* Initialize context and activeDmers */ | |
705 | if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, |
|
744 | { | |
706 | parameters.d, parameters.splitPoint)) { |
|
745 | size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | |
707 | return ERROR(GENERIC); |
|
746 | parameters.d, parameters.splitPoint); | |
|
747 | if (ZSTD_isError(initVal)) { | |||
|
748 | return initVal; | |||
|
749 | } | |||
708 | } |
|
750 | } | |
|
751 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); | |||
709 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { |
|
752 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | |
710 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); |
|
753 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | |
711 | COVER_ctx_destroy(&ctx); |
|
754 | COVER_ctx_destroy(&ctx); | |
712 |
return ERROR( |
|
755 | return ERROR(memory_allocation); | |
713 | } |
|
756 | } | |
714 |
|
757 | |||
715 | DISPLAYLEVEL(2, "Building dictionary\n"); |
|
758 | DISPLAYLEVEL(2, "Building dictionary\n"); | |
@@ -770,7 +813,7 b' size_t COVER_checkTotalCompressedSize(co' | |||||
770 | cctx, dst, dstCapacity, samples + offsets[i], |
|
813 | cctx, dst, dstCapacity, samples + offsets[i], | |
771 | samplesSizes[i], cdict); |
|
814 | samplesSizes[i], cdict); | |
772 | if (ZSTD_isError(size)) { |
|
815 | if (ZSTD_isError(size)) { | |
773 |
totalCompressedSize = |
|
816 | totalCompressedSize = size; | |
774 | goto _compressCleanup; |
|
817 | goto _compressCleanup; | |
775 | } |
|
818 | } | |
776 | totalCompressedSize += size; |
|
819 | totalCompressedSize += size; | |
@@ -846,9 +889,11 b' void COVER_best_start(COVER_best_t *best' | |||||
846 | * Decrements liveJobs and signals any waiting threads if liveJobs == 0. |
|
889 | * Decrements liveJobs and signals any waiting threads if liveJobs == 0. | |
847 | * If this dictionary is the best so far save it and its parameters. |
|
890 | * If this dictionary is the best so far save it and its parameters. | |
848 | */ |
|
891 | */ | |
849 |
void COVER_best_finish(COVER_best_t *best, |
|
892 | void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, | |
850 | ZDICT_cover_params_t parameters, void *dict, |
|
893 | COVER_dictSelection_t selection) { | |
851 | size_t dictSize) { |
|
894 | void* dict = selection.dictContent; | |
|
895 | size_t compressedSize = selection.totalCompressedSize; | |||
|
896 | size_t dictSize = selection.dictSize; | |||
852 | if (!best) { |
|
897 | if (!best) { | |
853 | return; |
|
898 | return; | |
854 | } |
|
899 | } | |
@@ -874,10 +919,12 b' void COVER_best_finish(COVER_best_t *bes' | |||||
874 | } |
|
919 | } | |
875 | } |
|
920 | } | |
876 | /* Save the dictionary, parameters, and size */ |
|
921 | /* Save the dictionary, parameters, and size */ | |
877 | memcpy(best->dict, dict, dictSize); |
|
922 | if (dict) { | |
878 |
best->dict |
|
923 | memcpy(best->dict, dict, dictSize); | |
879 | best->parameters = parameters; |
|
924 | best->dictSize = dictSize; | |
880 | best->compressedSize = compressedSize; |
|
925 | best->parameters = parameters; | |
|
926 | best->compressedSize = compressedSize; | |||
|
927 | } | |||
881 | } |
|
928 | } | |
882 | if (liveJobs == 0) { |
|
929 | if (liveJobs == 0) { | |
883 | ZSTD_pthread_cond_broadcast(&best->cond); |
|
930 | ZSTD_pthread_cond_broadcast(&best->cond); | |
@@ -886,6 +933,111 b' void COVER_best_finish(COVER_best_t *bes' | |||||
886 | } |
|
933 | } | |
887 | } |
|
934 | } | |
888 |
|
935 | |||
|
936 | COVER_dictSelection_t COVER_dictSelectionError(size_t error) { | |||
|
937 | COVER_dictSelection_t selection = { NULL, 0, error }; | |||
|
938 | return selection; | |||
|
939 | } | |||
|
940 | ||||
|
941 | unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { | |||
|
942 | return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); | |||
|
943 | } | |||
|
944 | ||||
|
945 | void COVER_dictSelectionFree(COVER_dictSelection_t selection){ | |||
|
946 | free(selection.dictContent); | |||
|
947 | } | |||
|
948 | ||||
|
949 | COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, | |||
|
950 | size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, | |||
|
951 | size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { | |||
|
952 | ||||
|
953 | size_t largestDict = 0; | |||
|
954 | size_t largestCompressed = 0; | |||
|
955 | BYTE* customDictContentEnd = customDictContent + dictContentSize; | |||
|
956 | ||||
|
957 | BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize); | |||
|
958 | BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize); | |||
|
959 | double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; | |||
|
960 | ||||
|
961 | if (!largestDictbuffer || !candidateDictBuffer) { | |||
|
962 | free(largestDictbuffer); | |||
|
963 | free(candidateDictBuffer); | |||
|
964 | return COVER_dictSelectionError(dictContentSize); | |||
|
965 | } | |||
|
966 | ||||
|
967 | /* Initial dictionary size and compressed size */ | |||
|
968 | memcpy(largestDictbuffer, customDictContent, dictContentSize); | |||
|
969 | dictContentSize = ZDICT_finalizeDictionary( | |||
|
970 | largestDictbuffer, dictContentSize, customDictContent, dictContentSize, | |||
|
971 | samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); | |||
|
972 | ||||
|
973 | if (ZDICT_isError(dictContentSize)) { | |||
|
974 | free(largestDictbuffer); | |||
|
975 | free(candidateDictBuffer); | |||
|
976 | return COVER_dictSelectionError(dictContentSize); | |||
|
977 | } | |||
|
978 | ||||
|
979 | totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, | |||
|
980 | samplesBuffer, offsets, | |||
|
981 | nbCheckSamples, nbSamples, | |||
|
982 | largestDictbuffer, dictContentSize); | |||
|
983 | ||||
|
984 | if (ZSTD_isError(totalCompressedSize)) { | |||
|
985 | free(largestDictbuffer); | |||
|
986 | free(candidateDictBuffer); | |||
|
987 | return COVER_dictSelectionError(totalCompressedSize); | |||
|
988 | } | |||
|
989 | ||||
|
990 | if (params.shrinkDict == 0) { | |||
|
991 | COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; | |||
|
992 | free(candidateDictBuffer); | |||
|
993 | return selection; | |||
|
994 | } | |||
|
995 | ||||
|
996 | largestDict = dictContentSize; | |||
|
997 | largestCompressed = totalCompressedSize; | |||
|
998 | dictContentSize = ZDICT_DICTSIZE_MIN; | |||
|
999 | ||||
|
1000 | /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ | |||
|
1001 | while (dictContentSize < largestDict) { | |||
|
1002 | memcpy(candidateDictBuffer, largestDictbuffer, largestDict); | |||
|
1003 | dictContentSize = ZDICT_finalizeDictionary( | |||
|
1004 | candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize, | |||
|
1005 | samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); | |||
|
1006 | ||||
|
1007 | if (ZDICT_isError(dictContentSize)) { | |||
|
1008 | free(largestDictbuffer); | |||
|
1009 | free(candidateDictBuffer); | |||
|
1010 | return COVER_dictSelectionError(dictContentSize); | |||
|
1011 | ||||
|
1012 | } | |||
|
1013 | ||||
|
1014 | totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, | |||
|
1015 | samplesBuffer, offsets, | |||
|
1016 | nbCheckSamples, nbSamples, | |||
|
1017 | candidateDictBuffer, dictContentSize); | |||
|
1018 | ||||
|
1019 | if (ZSTD_isError(totalCompressedSize)) { | |||
|
1020 | free(largestDictbuffer); | |||
|
1021 | free(candidateDictBuffer); | |||
|
1022 | return COVER_dictSelectionError(totalCompressedSize); | |||
|
1023 | } | |||
|
1024 | ||||
|
1025 | if (totalCompressedSize <= largestCompressed * regressionTolerance) { | |||
|
1026 | COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; | |||
|
1027 | free(largestDictbuffer); | |||
|
1028 | return selection; | |||
|
1029 | } | |||
|
1030 | dictContentSize *= 2; | |||
|
1031 | } | |||
|
1032 | dictContentSize = largestDict; | |||
|
1033 | totalCompressedSize = largestCompressed; | |||
|
1034 | { | |||
|
1035 | COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; | |||
|
1036 | free(candidateDictBuffer); | |||
|
1037 | return selection; | |||
|
1038 | } | |||
|
1039 | } | |||
|
1040 | ||||
889 | /** |
|
1041 | /** | |
890 | * Parameters for COVER_tryParameters(). |
|
1042 | * Parameters for COVER_tryParameters(). | |
891 | */ |
|
1043 | */ | |
@@ -911,6 +1063,7 b' static void COVER_tryParameters(void *op' | |||||
911 | /* Allocate space for hash table, dict, and freqs */ |
|
1063 | /* Allocate space for hash table, dict, and freqs */ | |
912 | COVER_map_t activeDmers; |
|
1064 | COVER_map_t activeDmers; | |
913 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); |
|
1065 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); | |
|
1066 | COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); | |||
914 | U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); |
|
1067 | U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | |
915 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { |
|
1068 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | |
916 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); |
|
1069 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | |
@@ -926,29 +1079,21 b' static void COVER_tryParameters(void *op' | |||||
926 | { |
|
1079 | { | |
927 | const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, |
|
1080 | const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, | |
928 | dictBufferCapacity, parameters); |
|
1081 | dictBufferCapacity, parameters); | |
929 | dictBufferCapacity = ZDICT_finalizeDictionary( |
|
1082 | selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, | |
930 | dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, |
|
1083 | ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, | |
931 | ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, |
|
1084 | totalCompressedSize); | |
932 | parameters.zParams); |
|
1085 | ||
933 | if (ZDICT_isError(dictBufferCapacity)) { |
|
1086 | if (COVER_dictSelectionIsError(selection)) { | |
934 |
DISPLAYLEVEL(1, "Failed to |
|
1087 | DISPLAYLEVEL(1, "Failed to select dictionary\n"); | |
935 | goto _cleanup; |
|
1088 | goto _cleanup; | |
936 | } |
|
1089 | } | |
937 | } |
|
1090 | } | |
938 | /* Check total compressed size */ |
|
|||
939 | totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, |
|
|||
940 | ctx->samples, ctx->offsets, |
|
|||
941 | ctx->nbTrainSamples, ctx->nbSamples, |
|
|||
942 | dict, dictBufferCapacity); |
|
|||
943 |
|
||||
944 | _cleanup: |
|
1091 | _cleanup: | |
945 | COVER_best_finish(data->best, totalCompressedSize, parameters, dict, |
|
1092 | free(dict); | |
946 | dictBufferCapacity); |
|
1093 | COVER_best_finish(data->best, parameters, selection); | |
947 | free(data); |
|
1094 | free(data); | |
948 | COVER_map_destroy(&activeDmers); |
|
1095 | COVER_map_destroy(&activeDmers); | |
949 | if (dict) { |
|
1096 | COVER_dictSelectionFree(selection); | |
950 | free(dict); |
|
|||
951 | } |
|
|||
952 | if (freqs) { |
|
1097 | if (freqs) { | |
953 | free(freqs); |
|
1098 | free(freqs); | |
954 | } |
|
1099 | } | |
@@ -970,6 +1115,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||||
970 | const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); |
|
1115 | const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); | |
971 | const unsigned kIterations = |
|
1116 | const unsigned kIterations = | |
972 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); |
|
1117 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); | |
|
1118 | const unsigned shrinkDict = 0; | |||
973 | /* Local variables */ |
|
1119 | /* Local variables */ | |
974 | const int displayLevel = parameters->zParams.notificationLevel; |
|
1120 | const int displayLevel = parameters->zParams.notificationLevel; | |
975 | unsigned iteration = 1; |
|
1121 | unsigned iteration = 1; | |
@@ -977,19 +1123,20 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||||
977 | unsigned k; |
|
1123 | unsigned k; | |
978 | COVER_best_t best; |
|
1124 | COVER_best_t best; | |
979 | POOL_ctx *pool = NULL; |
|
1125 | POOL_ctx *pool = NULL; | |
|
1126 | int warned = 0; | |||
980 |
|
1127 | |||
981 | /* Checks */ |
|
1128 | /* Checks */ | |
982 | if (splitPoint <= 0 || splitPoint > 1) { |
|
1129 | if (splitPoint <= 0 || splitPoint > 1) { | |
983 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); |
|
1130 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); | |
984 | return ERROR(GENERIC); |
|
1131 | return ERROR(parameter_outOfBound); | |
985 | } |
|
1132 | } | |
986 | if (kMinK < kMaxD || kMaxK < kMinK) { |
|
1133 | if (kMinK < kMaxD || kMaxK < kMinK) { | |
987 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); |
|
1134 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); | |
988 | return ERROR(GENERIC); |
|
1135 | return ERROR(parameter_outOfBound); | |
989 | } |
|
1136 | } | |
990 | if (nbSamples == 0) { |
|
1137 | if (nbSamples == 0) { | |
991 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); |
|
1138 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | |
992 |
return ERROR( |
|
1139 | return ERROR(srcSize_wrong); | |
993 | } |
|
1140 | } | |
994 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
1141 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | |
995 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
|
1142 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | |
@@ -1013,11 +1160,18 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||||
1013 | /* Initialize the context for this value of d */ |
|
1160 | /* Initialize the context for this value of d */ | |
1014 | COVER_ctx_t ctx; |
|
1161 | COVER_ctx_t ctx; | |
1015 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); |
|
1162 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); | |
1016 | if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) { |
|
1163 | { | |
1017 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); |
|
1164 | const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); | |
1018 | COVER_best_destroy(&best); |
|
1165 | if (ZSTD_isError(initVal)) { | |
1019 | POOL_free(pool); |
|
1166 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); | |
1020 | return ERROR(GENERIC); |
|
1167 | COVER_best_destroy(&best); | |
|
1168 | POOL_free(pool); | |||
|
1169 | return initVal; | |||
|
1170 | } | |||
|
1171 | } | |||
|
1172 | if (!warned) { | |||
|
1173 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); | |||
|
1174 | warned = 1; | |||
1021 | } |
|
1175 | } | |
1022 | /* Loop through k reusing the same context */ |
|
1176 | /* Loop through k reusing the same context */ | |
1023 | for (k = kMinK; k <= kMaxK; k += kStepSize) { |
|
1177 | for (k = kMinK; k <= kMaxK; k += kStepSize) { | |
@@ -1030,7 +1184,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||||
1030 | COVER_best_destroy(&best); |
|
1184 | COVER_best_destroy(&best); | |
1031 | COVER_ctx_destroy(&ctx); |
|
1185 | COVER_ctx_destroy(&ctx); | |
1032 | POOL_free(pool); |
|
1186 | POOL_free(pool); | |
1033 |
return ERROR( |
|
1187 | return ERROR(memory_allocation); | |
1034 | } |
|
1188 | } | |
1035 | data->ctx = &ctx; |
|
1189 | data->ctx = &ctx; | |
1036 | data->best = &best; |
|
1190 | data->best = &best; | |
@@ -1040,6 +1194,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||||
1040 | data->parameters.d = d; |
|
1194 | data->parameters.d = d; | |
1041 | data->parameters.splitPoint = splitPoint; |
|
1195 | data->parameters.splitPoint = splitPoint; | |
1042 | data->parameters.steps = kSteps; |
|
1196 | data->parameters.steps = kSteps; | |
|
1197 | data->parameters.shrinkDict = shrinkDict; | |||
1043 | data->parameters.zParams.notificationLevel = g_displayLevel; |
|
1198 | data->parameters.zParams.notificationLevel = g_displayLevel; | |
1044 | /* Check the parameters */ |
|
1199 | /* Check the parameters */ | |
1045 | if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { |
|
1200 | if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { |
@@ -39,6 +39,44 b' typedef struct {' | |||||
39 | } COVER_segment_t; |
|
39 | } COVER_segment_t; | |
40 |
|
40 | |||
41 | /** |
|
41 | /** | |
|
42 | *Number of epochs and size of each epoch. | |||
|
43 | */ | |||
|
44 | typedef struct { | |||
|
45 | U32 num; | |||
|
46 | U32 size; | |||
|
47 | } COVER_epoch_info_t; | |||
|
48 | ||||
|
49 | /** | |||
|
50 | * Struct used for the dictionary selection function. | |||
|
51 | */ | |||
|
52 | typedef struct COVER_dictSelection { | |||
|
53 | BYTE* dictContent; | |||
|
54 | size_t dictSize; | |||
|
55 | size_t totalCompressedSize; | |||
|
56 | } COVER_dictSelection_t; | |||
|
57 | ||||
|
58 | /** | |||
|
59 | * Computes the number of epochs and the size of each epoch. | |||
|
60 | * We will make sure that each epoch gets at least 10 * k bytes. | |||
|
61 | * | |||
|
62 | * The COVER algorithms divide the data up into epochs of equal size and | |||
|
63 | * select one segment from each epoch. | |||
|
64 | * | |||
|
65 | * @param maxDictSize The maximum allowed dictionary size. | |||
|
66 | * @param nbDmers The number of dmers we are training on. | |||
|
67 | * @param k The parameter k (segment size). | |||
|
68 | * @param passes The target number of passes over the dmer corpus. | |||
|
69 | * More passes means a better dictionary. | |||
|
70 | */ | |||
|
71 | COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, | |||
|
72 | U32 k, U32 passes); | |||
|
73 | ||||
|
74 | /** | |||
|
75 | * Warns the user when their corpus is too small. | |||
|
76 | */ | |||
|
77 | void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); | |||
|
78 | ||||
|
79 | /** | |||
42 | * Checks total compressed size of a dictionary |
|
80 | * Checks total compressed size of a dictionary | |
43 | */ |
|
81 | */ | |
44 | size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, |
|
82 | size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, | |
@@ -78,6 +116,32 b' void COVER_best_start(COVER_best_t *best' | |||||
78 | * Decrements liveJobs and signals any waiting threads if liveJobs == 0. |
|
116 | * Decrements liveJobs and signals any waiting threads if liveJobs == 0. | |
79 | * If this dictionary is the best so far save it and its parameters. |
|
117 | * If this dictionary is the best so far save it and its parameters. | |
80 | */ |
|
118 | */ | |
81 |
void COVER_best_finish(COVER_best_t *best, |
|
119 | void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, | |
82 | ZDICT_cover_params_t parameters, void *dict, |
|
120 | COVER_dictSelection_t selection); | |
83 | size_t dictSize); |
|
121 | /** | |
|
122 | * Error function for COVER_selectDict function. Checks if the return | |||
|
123 | * value is an error. | |||
|
124 | */ | |||
|
125 | unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); | |||
|
126 | ||||
|
127 | /** | |||
|
128 | * Error function for COVER_selectDict function. Returns a struct where | |||
|
129 | * return.totalCompressedSize is a ZSTD error. | |||
|
130 | */ | |||
|
131 | COVER_dictSelection_t COVER_dictSelectionError(size_t error); | |||
|
132 | ||||
|
133 | /** | |||
|
134 | * Always call after selectDict is called to free up used memory from | |||
|
135 | * newly created dictionary. | |||
|
136 | */ | |||
|
137 | void COVER_dictSelectionFree(COVER_dictSelection_t selection); | |||
|
138 | ||||
|
139 | /** | |||
|
140 | * Called to finalize the dictionary and select one based on whether or not | |||
|
141 | * the shrink-dict flag was enabled. If enabled the dictionary used is the | |||
|
142 | * smallest dictionary within a specified regression of the compressed size | |||
|
143 | * from the largest dictionary. | |||
|
144 | */ | |||
|
145 | COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, | |||
|
146 | size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, | |||
|
147 | size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); |
@@ -132,7 +132,7 b' typedef struct {' | |||||
132 | * |
|
132 | * | |
133 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) |
|
133 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) | |
134 | * |
|
134 | * | |
135 | * Once the dmer with hash value d is in the dictionay we set F(d) = 0. |
|
135 | * Once the dmer with hash value d is in the dictionary we set F(d) = 0. | |
136 | */ |
|
136 | */ | |
137 | static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, |
|
137 | static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, | |
138 | U32 *freqs, U32 begin, U32 end, |
|
138 | U32 *freqs, U32 begin, U32 end, | |
@@ -161,7 +161,7 b' static COVER_segment_t FASTCOVER_selectS' | |||||
161 | /* Get hash value of current dmer */ |
|
161 | /* Get hash value of current dmer */ | |
162 | const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); |
|
162 | const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); | |
163 |
|
163 | |||
164 | /* Add frequency of this index to score if this is the first occurence of index in active segment */ |
|
164 | /* Add frequency of this index to score if this is the first occurrence of index in active segment */ | |
165 | if (segmentFreqs[idx] == 0) { |
|
165 | if (segmentFreqs[idx] == 0) { | |
166 | activeSegment.score += freqs[idx]; |
|
166 | activeSegment.score += freqs[idx]; | |
167 | } |
|
167 | } | |
@@ -287,10 +287,10 b' FASTCOVER_computeFrequency(U32* freqs, c' | |||||
287 | * Prepare a context for dictionary building. |
|
287 | * Prepare a context for dictionary building. | |
288 | * The context is only dependent on the parameter `d` and can used multiple |
|
288 | * The context is only dependent on the parameter `d` and can used multiple | |
289 | * times. |
|
289 | * times. | |
290 |
* Returns |
|
290 | * Returns 0 on success or error code on error. | |
291 | * The context must be destroyed with `FASTCOVER_ctx_destroy()`. |
|
291 | * The context must be destroyed with `FASTCOVER_ctx_destroy()`. | |
292 | */ |
|
292 | */ | |
293 |
static |
|
293 | static size_t | |
294 | FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, |
|
294 | FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, | |
295 | const void* samplesBuffer, |
|
295 | const void* samplesBuffer, | |
296 | const size_t* samplesSizes, unsigned nbSamples, |
|
296 | const size_t* samplesSizes, unsigned nbSamples, | |
@@ -310,19 +310,19 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,' | |||||
310 | totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { |
|
310 | totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { | |
311 | DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", |
|
311 | DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", | |
312 | (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); |
|
312 | (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); | |
313 |
return |
|
313 | return ERROR(srcSize_wrong); | |
314 | } |
|
314 | } | |
315 |
|
315 | |||
316 | /* Check if there are at least 5 training samples */ |
|
316 | /* Check if there are at least 5 training samples */ | |
317 | if (nbTrainSamples < 5) { |
|
317 | if (nbTrainSamples < 5) { | |
318 | DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); |
|
318 | DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); | |
319 |
return |
|
319 | return ERROR(srcSize_wrong); | |
320 | } |
|
320 | } | |
321 |
|
321 | |||
322 | /* Check if there's testing sample */ |
|
322 | /* Check if there's testing sample */ | |
323 | if (nbTestSamples < 1) { |
|
323 | if (nbTestSamples < 1) { | |
324 | DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); |
|
324 | DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); | |
325 |
return |
|
325 | return ERROR(srcSize_wrong); | |
326 | } |
|
326 | } | |
327 |
|
327 | |||
328 | /* Zero the context */ |
|
328 | /* Zero the context */ | |
@@ -347,7 +347,7 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,' | |||||
347 | if (ctx->offsets == NULL) { |
|
347 | if (ctx->offsets == NULL) { | |
348 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); |
|
348 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); | |
349 | FASTCOVER_ctx_destroy(ctx); |
|
349 | FASTCOVER_ctx_destroy(ctx); | |
350 | return 0; |
|
350 | return ERROR(memory_allocation); | |
351 | } |
|
351 | } | |
352 |
|
352 | |||
353 | /* Fill offsets from the samplesSizes */ |
|
353 | /* Fill offsets from the samplesSizes */ | |
@@ -364,13 +364,13 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,' | |||||
364 | if (ctx->freqs == NULL) { |
|
364 | if (ctx->freqs == NULL) { | |
365 | DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); |
|
365 | DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); | |
366 | FASTCOVER_ctx_destroy(ctx); |
|
366 | FASTCOVER_ctx_destroy(ctx); | |
367 | return 0; |
|
367 | return ERROR(memory_allocation); | |
368 | } |
|
368 | } | |
369 |
|
369 | |||
370 | DISPLAYLEVEL(2, "Computing frequencies\n"); |
|
370 | DISPLAYLEVEL(2, "Computing frequencies\n"); | |
371 | FASTCOVER_computeFrequency(ctx->freqs, ctx); |
|
371 | FASTCOVER_computeFrequency(ctx->freqs, ctx); | |
372 |
|
372 | |||
373 |
return |
|
373 | return 0; | |
374 | } |
|
374 | } | |
375 |
|
375 | |||
376 |
|
376 | |||
@@ -386,29 +386,35 b' FASTCOVER_buildDictionary(const FASTCOVE' | |||||
386 | { |
|
386 | { | |
387 | BYTE *const dict = (BYTE *)dictBuffer; |
|
387 | BYTE *const dict = (BYTE *)dictBuffer; | |
388 | size_t tail = dictBufferCapacity; |
|
388 | size_t tail = dictBufferCapacity; | |
389 |
/* Divide the data |
|
389 | /* Divide the data into epochs. We will select one segment from each epoch. */ | |
390 | * We will select at least one segment from each epoch. |
|
390 | const COVER_epoch_info_t epochs = COVER_computeEpochs( | |
391 | */ |
|
391 | (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); | |
392 | const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k)); |
|
392 | const size_t maxZeroScoreRun = 10; | |
393 | const unsigned epochSize = (U32)(ctx->nbDmers / epochs); |
|
393 | size_t zeroScoreRun = 0; | |
394 | size_t epoch; |
|
394 | size_t epoch; | |
395 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", |
|
395 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", | |
396 |
epochs, |
|
396 | (U32)epochs.num, (U32)epochs.size); | |
397 | /* Loop through the epochs until there are no more segments or the dictionary |
|
397 | /* Loop through the epochs until there are no more segments or the dictionary | |
398 | * is full. |
|
398 | * is full. | |
399 | */ |
|
399 | */ | |
400 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { |
|
400 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { | |
401 |
const U32 epochBegin = (U32)(epoch * epoch |
|
401 | const U32 epochBegin = (U32)(epoch * epochs.size); | |
402 |
const U32 epochEnd = epochBegin + epoch |
|
402 | const U32 epochEnd = epochBegin + epochs.size; | |
403 | size_t segmentSize; |
|
403 | size_t segmentSize; | |
404 | /* Select a segment */ |
|
404 | /* Select a segment */ | |
405 | COVER_segment_t segment = FASTCOVER_selectSegment( |
|
405 | COVER_segment_t segment = FASTCOVER_selectSegment( | |
406 | ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); |
|
406 | ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); | |
407 |
|
407 | |||
408 |
/* If the segment covers no dmers, then we are out of content |
|
408 | /* If the segment covers no dmers, then we are out of content. | |
|
409 | * There may be new content in other epochs, for continue for some time. | |||
|
410 | */ | |||
409 | if (segment.score == 0) { |
|
411 | if (segment.score == 0) { | |
410 | break; |
|
412 | if (++zeroScoreRun >= maxZeroScoreRun) { | |
|
413 | break; | |||
|
414 | } | |||
|
415 | continue; | |||
411 | } |
|
416 | } | |
|
417 | zeroScoreRun = 0; | |||
412 |
|
418 | |||
413 | /* Trim the segment if necessary and if it is too small then we are done */ |
|
419 | /* Trim the segment if necessary and if it is too small then we are done */ | |
414 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); |
|
420 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); | |
@@ -429,7 +435,6 b' FASTCOVER_buildDictionary(const FASTCOVE' | |||||
429 | return tail; |
|
435 | return tail; | |
430 | } |
|
436 | } | |
431 |
|
437 | |||
432 |
|
||||
433 | /** |
|
438 | /** | |
434 | * Parameters for FASTCOVER_tryParameters(). |
|
439 | * Parameters for FASTCOVER_tryParameters(). | |
435 | */ |
|
440 | */ | |
@@ -458,6 +463,7 b' static void FASTCOVER_tryParameters(void' | |||||
458 | U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16)); |
|
463 | U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16)); | |
459 | /* Allocate space for hash table, dict, and freqs */ |
|
464 | /* Allocate space for hash table, dict, and freqs */ | |
460 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); |
|
465 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); | |
|
466 | COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); | |||
461 | U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); |
|
467 | U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); | |
462 | if (!segmentFreqs || !dict || !freqs) { |
|
468 | if (!segmentFreqs || !dict || !freqs) { | |
463 | DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); |
|
469 | DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); | |
@@ -467,27 +473,24 b' static void FASTCOVER_tryParameters(void' | |||||
467 | memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); |
|
473 | memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); | |
468 | /* Build the dictionary */ |
|
474 | /* Build the dictionary */ | |
469 | { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, |
|
475 | { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, | |
470 | parameters, segmentFreqs); |
|
476 | parameters, segmentFreqs); | |
|
477 | ||||
471 | const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); |
|
478 | const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); | |
472 | dictBufferCapacity = ZDICT_finalizeDictionary( |
|
479 | selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, | |
473 | dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, |
|
480 | ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, | |
474 | ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams); |
|
481 | totalCompressedSize); | |
475 | if (ZDICT_isError(dictBufferCapacity)) { |
|
482 | ||
476 | DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); |
|
483 | if (COVER_dictSelectionIsError(selection)) { | |
|
484 | DISPLAYLEVEL(1, "Failed to select dictionary\n"); | |||
477 | goto _cleanup; |
|
485 | goto _cleanup; | |
478 | } |
|
486 | } | |
479 | } |
|
487 | } | |
480 | /* Check total compressed size */ |
|
|||
481 | totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, |
|
|||
482 | ctx->samples, ctx->offsets, |
|
|||
483 | ctx->nbTrainSamples, ctx->nbSamples, |
|
|||
484 | dict, dictBufferCapacity); |
|
|||
485 | _cleanup: |
|
488 | _cleanup: | |
486 | COVER_best_finish(data->best, totalCompressedSize, parameters, dict, |
|
489 | free(dict); | |
487 | dictBufferCapacity); |
|
490 | COVER_best_finish(data->best, parameters, selection); | |
488 | free(data); |
|
491 | free(data); | |
489 | free(segmentFreqs); |
|
492 | free(segmentFreqs); | |
490 | free(dict); |
|
493 | COVER_dictSelectionFree(selection); | |
491 | free(freqs); |
|
494 | free(freqs); | |
492 | } |
|
495 | } | |
493 |
|
496 | |||
@@ -502,6 +505,7 b' FASTCOVER_convertToCoverParams(ZDICT_fas' | |||||
502 | coverParams->nbThreads = fastCoverParams.nbThreads; |
|
505 | coverParams->nbThreads = fastCoverParams.nbThreads; | |
503 | coverParams->splitPoint = fastCoverParams.splitPoint; |
|
506 | coverParams->splitPoint = fastCoverParams.splitPoint; | |
504 | coverParams->zParams = fastCoverParams.zParams; |
|
507 | coverParams->zParams = fastCoverParams.zParams; | |
|
508 | coverParams->shrinkDict = fastCoverParams.shrinkDict; | |||
505 | } |
|
509 | } | |
506 |
|
510 | |||
507 |
|
511 | |||
@@ -518,6 +522,7 b' FASTCOVER_convertToFastCoverParams(ZDICT' | |||||
518 | fastCoverParams->f = f; |
|
522 | fastCoverParams->f = f; | |
519 | fastCoverParams->accel = accel; |
|
523 | fastCoverParams->accel = accel; | |
520 | fastCoverParams->zParams = coverParams.zParams; |
|
524 | fastCoverParams->zParams = coverParams.zParams; | |
|
525 | fastCoverParams->shrinkDict = coverParams.shrinkDict; | |||
521 | } |
|
526 | } | |
522 |
|
527 | |||
523 |
|
528 | |||
@@ -544,11 +549,11 b' ZDICT_trainFromBuffer_fastCover(void* di' | |||||
544 | if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, |
|
549 | if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, | |
545 | parameters.accel)) { |
|
550 | parameters.accel)) { | |
546 | DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); |
|
551 | DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); | |
547 |
return ERROR( |
|
552 | return ERROR(parameter_outOfBound); | |
548 | } |
|
553 | } | |
549 | if (nbSamples == 0) { |
|
554 | if (nbSamples == 0) { | |
550 | DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); |
|
555 | DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); | |
551 |
return ERROR( |
|
556 | return ERROR(srcSize_wrong); | |
552 | } |
|
557 | } | |
553 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
558 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | |
554 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
|
559 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | |
@@ -558,12 +563,16 b' ZDICT_trainFromBuffer_fastCover(void* di' | |||||
558 | /* Assign corresponding FASTCOVER_accel_t to accelParams*/ |
|
563 | /* Assign corresponding FASTCOVER_accel_t to accelParams*/ | |
559 | accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; |
|
564 | accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; | |
560 | /* Initialize context */ |
|
565 | /* Initialize context */ | |
561 | if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, |
|
566 | { | |
|
567 | size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | |||
562 | coverParams.d, parameters.splitPoint, parameters.f, |
|
568 | coverParams.d, parameters.splitPoint, parameters.f, | |
563 |
accelParams) |
|
569 | accelParams); | |
564 | DISPLAYLEVEL(1, "Failed to initialize context\n"); |
|
570 | if (ZSTD_isError(initVal)) { | |
565 | return ERROR(GENERIC); |
|
571 | DISPLAYLEVEL(1, "Failed to initialize context\n"); | |
|
572 | return initVal; | |||
|
573 | } | |||
566 | } |
|
574 | } | |
|
575 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); | |||
567 | /* Build the dictionary */ |
|
576 | /* Build the dictionary */ | |
568 | DISPLAYLEVEL(2, "Building dictionary\n"); |
|
577 | DISPLAYLEVEL(2, "Building dictionary\n"); | |
569 | { |
|
578 | { | |
@@ -609,6 +618,7 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||||
609 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); |
|
618 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); | |
610 | const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; |
|
619 | const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; | |
611 | const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; |
|
620 | const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; | |
|
621 | const unsigned shrinkDict = 0; | |||
612 | /* Local variables */ |
|
622 | /* Local variables */ | |
613 | const int displayLevel = parameters->zParams.notificationLevel; |
|
623 | const int displayLevel = parameters->zParams.notificationLevel; | |
614 | unsigned iteration = 1; |
|
624 | unsigned iteration = 1; | |
@@ -616,22 +626,23 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||||
616 | unsigned k; |
|
626 | unsigned k; | |
617 | COVER_best_t best; |
|
627 | COVER_best_t best; | |
618 | POOL_ctx *pool = NULL; |
|
628 | POOL_ctx *pool = NULL; | |
|
629 | int warned = 0; | |||
619 | /* Checks */ |
|
630 | /* Checks */ | |
620 | if (splitPoint <= 0 || splitPoint > 1) { |
|
631 | if (splitPoint <= 0 || splitPoint > 1) { | |
621 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); |
|
632 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); | |
622 |
return ERROR( |
|
633 | return ERROR(parameter_outOfBound); | |
623 | } |
|
634 | } | |
624 | if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { |
|
635 | if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { | |
625 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); |
|
636 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); | |
626 |
return ERROR( |
|
637 | return ERROR(parameter_outOfBound); | |
627 | } |
|
638 | } | |
628 | if (kMinK < kMaxD || kMaxK < kMinK) { |
|
639 | if (kMinK < kMaxD || kMaxK < kMinK) { | |
629 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); |
|
640 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); | |
630 |
return ERROR( |
|
641 | return ERROR(parameter_outOfBound); | |
631 | } |
|
642 | } | |
632 | if (nbSamples == 0) { |
|
643 | if (nbSamples == 0) { | |
633 | LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); |
|
644 | LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); | |
634 |
return ERROR( |
|
645 | return ERROR(srcSize_wrong); | |
635 | } |
|
646 | } | |
636 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
647 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | |
637 | LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", |
|
648 | LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", | |
@@ -658,11 +669,18 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||||
658 | /* Initialize the context for this value of d */ |
|
669 | /* Initialize the context for this value of d */ | |
659 | FASTCOVER_ctx_t ctx; |
|
670 | FASTCOVER_ctx_t ctx; | |
660 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); |
|
671 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); | |
661 | if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) { |
|
672 | { | |
662 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); |
|
673 | size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); | |
663 | COVER_best_destroy(&best); |
|
674 | if (ZSTD_isError(initVal)) { | |
664 | POOL_free(pool); |
|
675 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); | |
665 | return ERROR(GENERIC); |
|
676 | COVER_best_destroy(&best); | |
|
677 | POOL_free(pool); | |||
|
678 | return initVal; | |||
|
679 | } | |||
|
680 | } | |||
|
681 | if (!warned) { | |||
|
682 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); | |||
|
683 | warned = 1; | |||
666 | } |
|
684 | } | |
667 | /* Loop through k reusing the same context */ |
|
685 | /* Loop through k reusing the same context */ | |
668 | for (k = kMinK; k <= kMaxK; k += kStepSize) { |
|
686 | for (k = kMinK; k <= kMaxK; k += kStepSize) { | |
@@ -675,7 +693,7 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||||
675 | COVER_best_destroy(&best); |
|
693 | COVER_best_destroy(&best); | |
676 | FASTCOVER_ctx_destroy(&ctx); |
|
694 | FASTCOVER_ctx_destroy(&ctx); | |
677 | POOL_free(pool); |
|
695 | POOL_free(pool); | |
678 |
return ERROR( |
|
696 | return ERROR(memory_allocation); | |
679 | } |
|
697 | } | |
680 | data->ctx = &ctx; |
|
698 | data->ctx = &ctx; | |
681 | data->best = &best; |
|
699 | data->best = &best; | |
@@ -685,6 +703,7 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||||
685 | data->parameters.d = d; |
|
703 | data->parameters.d = d; | |
686 | data->parameters.splitPoint = splitPoint; |
|
704 | data->parameters.splitPoint = splitPoint; | |
687 | data->parameters.steps = kSteps; |
|
705 | data->parameters.steps = kSteps; | |
|
706 | data->parameters.shrinkDict = shrinkDict; | |||
688 | data->parameters.zParams.notificationLevel = g_displayLevel; |
|
707 | data->parameters.zParams.notificationLevel = g_displayLevel; | |
689 | /* Check the parameters */ |
|
708 | /* Check the parameters */ | |
690 | if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, |
|
709 | if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, |
@@ -741,7 +741,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
741 | /* analyze, build stats, starting with literals */ |
|
741 | /* analyze, build stats, starting with literals */ | |
742 | { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); |
|
742 | { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); | |
743 | if (HUF_isError(maxNbBits)) { |
|
743 | if (HUF_isError(maxNbBits)) { | |
744 |
eSize = |
|
744 | eSize = maxNbBits; | |
745 | DISPLAYLEVEL(1, " HUF_buildCTable error \n"); |
|
745 | DISPLAYLEVEL(1, " HUF_buildCTable error \n"); | |
746 | goto _cleanup; |
|
746 | goto _cleanup; | |
747 | } |
|
747 | } | |
@@ -764,7 +764,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
764 | total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; |
|
764 | total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; | |
765 | errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); |
|
765 | errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); | |
766 | if (FSE_isError(errorCode)) { |
|
766 | if (FSE_isError(errorCode)) { | |
767 |
eSize = |
|
767 | eSize = errorCode; | |
768 | DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); |
|
768 | DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); | |
769 | goto _cleanup; |
|
769 | goto _cleanup; | |
770 | } |
|
770 | } | |
@@ -773,7 +773,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
773 | total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; |
|
773 | total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; | |
774 | errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); |
|
774 | errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); | |
775 | if (FSE_isError(errorCode)) { |
|
775 | if (FSE_isError(errorCode)) { | |
776 |
eSize = |
|
776 | eSize = errorCode; | |
777 | DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); |
|
777 | DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); | |
778 | goto _cleanup; |
|
778 | goto _cleanup; | |
779 | } |
|
779 | } | |
@@ -782,7 +782,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
782 | total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; |
|
782 | total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; | |
783 | errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); |
|
783 | errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); | |
784 | if (FSE_isError(errorCode)) { |
|
784 | if (FSE_isError(errorCode)) { | |
785 |
eSize = |
|
785 | eSize = errorCode; | |
786 | DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); |
|
786 | DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); | |
787 | goto _cleanup; |
|
787 | goto _cleanup; | |
788 | } |
|
788 | } | |
@@ -791,7 +791,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
791 | /* write result to buffer */ |
|
791 | /* write result to buffer */ | |
792 | { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog); |
|
792 | { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog); | |
793 | if (HUF_isError(hhSize)) { |
|
793 | if (HUF_isError(hhSize)) { | |
794 |
eSize = |
|
794 | eSize = hhSize; | |
795 | DISPLAYLEVEL(1, "HUF_writeCTable error \n"); |
|
795 | DISPLAYLEVEL(1, "HUF_writeCTable error \n"); | |
796 | goto _cleanup; |
|
796 | goto _cleanup; | |
797 | } |
|
797 | } | |
@@ -802,7 +802,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
802 |
|
802 | |||
803 | { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); |
|
803 | { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); | |
804 | if (FSE_isError(ohSize)) { |
|
804 | if (FSE_isError(ohSize)) { | |
805 |
eSize = |
|
805 | eSize = ohSize; | |
806 | DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); |
|
806 | DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); | |
807 | goto _cleanup; |
|
807 | goto _cleanup; | |
808 | } |
|
808 | } | |
@@ -813,7 +813,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
813 |
|
813 | |||
814 | { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog); |
|
814 | { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog); | |
815 | if (FSE_isError(mhSize)) { |
|
815 | if (FSE_isError(mhSize)) { | |
816 |
eSize = |
|
816 | eSize = mhSize; | |
817 | DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); |
|
817 | DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); | |
818 | goto _cleanup; |
|
818 | goto _cleanup; | |
819 | } |
|
819 | } | |
@@ -824,7 +824,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
824 |
|
824 | |||
825 | { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog); |
|
825 | { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog); | |
826 | if (FSE_isError(lhSize)) { |
|
826 | if (FSE_isError(lhSize)) { | |
827 |
eSize = |
|
827 | eSize = lhSize; | |
828 | DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); |
|
828 | DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); | |
829 | goto _cleanup; |
|
829 | goto _cleanup; | |
830 | } |
|
830 | } | |
@@ -834,7 +834,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
834 | } |
|
834 | } | |
835 |
|
835 | |||
836 | if (maxDstSize<12) { |
|
836 | if (maxDstSize<12) { | |
837 |
eSize = ERROR( |
|
837 | eSize = ERROR(dstSize_tooSmall); | |
838 | DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); |
|
838 | DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); | |
839 | goto _cleanup; |
|
839 | goto _cleanup; | |
840 | } |
|
840 | } |
@@ -46,7 +46,12 b' extern "C" {' | |||||
46 | * The resulting dictionary will be saved into `dictBuffer`. |
|
46 | * The resulting dictionary will be saved into `dictBuffer`. | |
47 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
47 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
48 | * or an error code, which can be tested with ZDICT_isError(). |
|
48 | * or an error code, which can be tested with ZDICT_isError(). | |
49 | * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. |
|
49 | * Note: Dictionary training will fail if there are not enough samples to construct a | |
|
50 | * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit). | |||
|
51 | * If dictionary training fails, you should use zstd without a dictionary, as the dictionary | |||
|
52 | * would've been ineffective anyways. If you believe your samples would benefit from a dictionary | |||
|
53 | * please open an issue with details, and we can look into it. | |||
|
54 | * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB. | |||
50 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
55 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |
51 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
|
56 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |
52 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. |
|
57 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. | |
@@ -89,6 +94,8 b' typedef struct {' | |||||
89 | unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ |
|
94 | unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ | |
90 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ |
|
95 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ | |
91 | double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ |
|
96 | double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ | |
|
97 | unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ | |||
|
98 | unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ | |||
92 | ZDICT_params_t zParams; |
|
99 | ZDICT_params_t zParams; | |
93 | } ZDICT_cover_params_t; |
|
100 | } ZDICT_cover_params_t; | |
94 |
|
101 | |||
@@ -100,6 +107,9 b' typedef struct {' | |||||
100 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ |
|
107 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ | |
101 | double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ |
|
108 | double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ | |
102 | unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ |
|
109 | unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ | |
|
110 | unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ | |||
|
111 | unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ | |||
|
112 | ||||
103 | ZDICT_params_t zParams; |
|
113 | ZDICT_params_t zParams; | |
104 | } ZDICT_fastCover_params_t; |
|
114 | } ZDICT_fastCover_params_t; | |
105 |
|
115 | |||
@@ -110,6 +120,7 b' typedef struct {' | |||||
110 | * The resulting dictionary will be saved into `dictBuffer`. |
|
120 | * The resulting dictionary will be saved into `dictBuffer`. | |
111 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
121 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
112 | * or an error code, which can be tested with ZDICT_isError(). |
|
122 | * or an error code, which can be tested with ZDICT_isError(). | |
|
123 | * See ZDICT_trainFromBuffer() for details on failure modes. | |||
113 | * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. |
|
124 | * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. | |
114 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
125 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |
115 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
|
126 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |
@@ -133,8 +144,9 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||||
133 | * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. |
|
144 | * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. | |
134 | * |
|
145 | * | |
135 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
146 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
136 |
* |
|
147 | * or an error code, which can be tested with ZDICT_isError(). | |
137 |
* |
|
148 | * On success `*parameters` contains the parameters selected. | |
|
149 | * See ZDICT_trainFromBuffer() for details on failure modes. | |||
138 | * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. |
|
150 | * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. | |
139 | */ |
|
151 | */ | |
140 | ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( |
|
152 | ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( | |
@@ -151,7 +163,8 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||||
151 | * The resulting dictionary will be saved into `dictBuffer`. |
|
163 | * The resulting dictionary will be saved into `dictBuffer`. | |
152 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
164 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
153 | * or an error code, which can be tested with ZDICT_isError(). |
|
165 | * or an error code, which can be tested with ZDICT_isError(). | |
154 | * Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory . |
|
166 | * See ZDICT_trainFromBuffer() for details on failure modes. | |
|
167 | * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. | |||
155 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
168 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |
156 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
|
169 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |
157 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. |
|
170 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. | |
@@ -175,9 +188,10 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||||
175 | * If accel is zero, default value of 1 is used. |
|
188 | * If accel is zero, default value of 1 is used. | |
176 | * |
|
189 | * | |
177 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
190 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
178 |
* |
|
191 | * or an error code, which can be tested with ZDICT_isError(). | |
179 |
* |
|
192 | * On success `*parameters` contains the parameters selected. | |
180 | * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread. |
|
193 | * See ZDICT_trainFromBuffer() for details on failure modes. | |
|
194 | * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. | |||
181 | */ |
|
195 | */ | |
182 | ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, |
|
196 | ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, | |
183 | size_t dictBufferCapacity, const void* samplesBuffer, |
|
197 | size_t dictBufferCapacity, const void* samplesBuffer, | |
@@ -195,7 +209,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||||
195 | * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. |
|
209 | * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. | |
196 | * |
|
210 | * | |
197 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), |
|
211 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), | |
198 |
* |
|
212 | * or an error code, which can be tested by ZDICT_isError(). | |
199 | * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. |
|
213 | * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. | |
200 | * Note 2: dictBuffer and dictContent can overlap |
|
214 | * Note 2: dictBuffer and dictContent can overlap | |
201 | */ |
|
215 | */ | |
@@ -219,6 +233,7 b' typedef struct {' | |||||
219 | * `parameters` is optional and can be provided with values set to 0 to mean "default". |
|
233 | * `parameters` is optional and can be provided with values set to 0 to mean "default". | |
220 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
234 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |
221 | * or an error code, which can be tested with ZDICT_isError(). |
|
235 | * or an error code, which can be tested with ZDICT_isError(). | |
|
236 | * See ZDICT_trainFromBuffer() for details on failure modes. | |||
222 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
237 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |
223 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
|
238 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |
224 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. |
|
239 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. |
This diff has been collapsed as it changes many lines, (1145 lines changed) Show them Hide them | |||||
@@ -70,8 +70,8 b' extern "C" {' | |||||
70 |
|
70 | |||
71 | /*------ Version ------*/ |
|
71 | /*------ Version ------*/ | |
72 | #define ZSTD_VERSION_MAJOR 1 |
|
72 | #define ZSTD_VERSION_MAJOR 1 | |
73 |
#define ZSTD_VERSION_MINOR |
|
73 | #define ZSTD_VERSION_MINOR 4 | |
74 |
#define ZSTD_VERSION_RELEASE |
|
74 | #define ZSTD_VERSION_RELEASE 3 | |
75 |
|
75 | |||
76 | #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) |
|
76 | #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) | |
77 | ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ |
|
77 | ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ | |
@@ -82,13 +82,28 b' ZSTDLIB_API unsigned ZSTD_versionNumber(' | |||||
82 | #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) |
|
82 | #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) | |
83 | ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ |
|
83 | ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ | |
84 |
|
84 | |||
85 |
/* |
|
85 | /* ************************************* | |
86 | * Default constant |
|
86 | * Default constant | |
87 | ***************************************/ |
|
87 | ***************************************/ | |
88 | #ifndef ZSTD_CLEVEL_DEFAULT |
|
88 | #ifndef ZSTD_CLEVEL_DEFAULT | |
89 | # define ZSTD_CLEVEL_DEFAULT 3 |
|
89 | # define ZSTD_CLEVEL_DEFAULT 3 | |
90 | #endif |
|
90 | #endif | |
91 |
|
91 | |||
|
92 | /* ************************************* | |||
|
93 | * Constants | |||
|
94 | ***************************************/ | |||
|
95 | ||||
|
96 | /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ | |||
|
97 | #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ | |||
|
98 | #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ | |||
|
99 | #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ | |||
|
100 | #define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 | |||
|
101 | ||||
|
102 | #define ZSTD_BLOCKSIZELOG_MAX 17 | |||
|
103 | #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) | |||
|
104 | ||||
|
105 | ||||
|
106 | ||||
92 | /*************************************** |
|
107 | /*************************************** | |
93 | * Simple API |
|
108 | * Simple API | |
94 | ***************************************/ |
|
109 | ***************************************/ | |
@@ -145,12 +160,21 b' ZSTDLIB_API unsigned long long ZSTD_getF' | |||||
145 | * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ |
|
160 | * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ | |
146 | ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); |
|
161 | ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); | |
147 |
|
162 | |||
|
163 | /*! ZSTD_findFrameCompressedSize() : | |||
|
164 | * `src` should point to the start of a ZSTD frame or skippable frame. | |||
|
165 | * `srcSize` must be >= first frame size | |||
|
166 | * @return : the compressed size of the first frame starting at `src`, | |||
|
167 | * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, | |||
|
168 | * or an error code if input is invalid */ | |||
|
169 | ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); | |||
|
170 | ||||
148 |
|
171 | |||
149 | /*====== Helper functions ======*/ |
|
172 | /*====== Helper functions ======*/ | |
150 | #define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ |
|
173 | #define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ | |
151 | ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ |
|
174 | ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ | |
152 | ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ |
|
175 | ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ | |
153 | ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ |
|
176 | ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ | |
|
177 | ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ | |||
154 | ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ |
|
178 | ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ | |
155 |
|
179 | |||
156 |
|
180 | |||
@@ -159,9 +183,14 b' ZSTDLIB_API int ZSTD_maxCLevel(v' | |||||
159 | ***************************************/ |
|
183 | ***************************************/ | |
160 | /*= Compression context |
|
184 | /*= Compression context | |
161 | * When compressing many times, |
|
185 | * When compressing many times, | |
162 |
* it is recommended to allocate a context just once, |
|
186 | * it is recommended to allocate a context just once, | |
|
187 | * and re-use it for each successive compression operation. | |||
163 | * This will make workload friendlier for system's memory. |
|
188 | * This will make workload friendlier for system's memory. | |
164 | * Use one context per thread for parallel execution in multi-threaded environments. */ |
|
189 | * Note : re-using context is just a speed / resource optimization. | |
|
190 | * It doesn't change the compression ratio, which remains identical. | |||
|
191 | * Note 2 : In multi-threaded environments, | |||
|
192 | * use one different context per thread for parallel execution. | |||
|
193 | */ | |||
165 | typedef struct ZSTD_CCtx_s ZSTD_CCtx; |
|
194 | typedef struct ZSTD_CCtx_s ZSTD_CCtx; | |
166 | ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); |
|
195 | ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); | |
167 | ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); |
|
196 | ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); | |
@@ -195,279 +224,6 b' ZSTDLIB_API size_t ZSTD_decompressDCtx(Z' | |||||
195 | const void* src, size_t srcSize); |
|
224 | const void* src, size_t srcSize); | |
196 |
|
225 | |||
197 |
|
226 | |||
198 | /************************** |
|
|||
199 | * Simple dictionary API |
|
|||
200 | ***************************/ |
|
|||
201 | /*! ZSTD_compress_usingDict() : |
|
|||
202 | * Compression at an explicit compression level using a Dictionary. |
|
|||
203 | * A dictionary can be any arbitrary data segment (also called a prefix), |
|
|||
204 | * or a buffer with specified information (see dictBuilder/zdict.h). |
|
|||
205 | * Note : This function loads the dictionary, resulting in significant startup delay. |
|
|||
206 | * It's intended for a dictionary used only once. |
|
|||
207 | * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ |
|
|||
208 | ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, |
|
|||
209 | void* dst, size_t dstCapacity, |
|
|||
210 | const void* src, size_t srcSize, |
|
|||
211 | const void* dict,size_t dictSize, |
|
|||
212 | int compressionLevel); |
|
|||
213 |
|
||||
214 | /*! ZSTD_decompress_usingDict() : |
|
|||
215 | * Decompression using a known Dictionary. |
|
|||
216 | * Dictionary must be identical to the one used during compression. |
|
|||
217 | * Note : This function loads the dictionary, resulting in significant startup delay. |
|
|||
218 | * It's intended for a dictionary used only once. |
|
|||
219 | * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ |
|
|||
220 | ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, |
|
|||
221 | void* dst, size_t dstCapacity, |
|
|||
222 | const void* src, size_t srcSize, |
|
|||
223 | const void* dict,size_t dictSize); |
|
|||
224 |
|
||||
225 |
|
||||
226 | /*********************************** |
|
|||
227 | * Bulk processing dictionary API |
|
|||
228 | **********************************/ |
|
|||
229 | typedef struct ZSTD_CDict_s ZSTD_CDict; |
|
|||
230 |
|
||||
231 | /*! ZSTD_createCDict() : |
|
|||
232 | * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. |
|
|||
233 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. |
|
|||
234 | * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. |
|
|||
235 | * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. |
|
|||
236 | * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. |
|
|||
237 | * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ |
|
|||
238 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, |
|
|||
239 | int compressionLevel); |
|
|||
240 |
|
||||
241 | /*! ZSTD_freeCDict() : |
|
|||
242 | * Function frees memory allocated by ZSTD_createCDict(). */ |
|
|||
243 | ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); |
|
|||
244 |
|
||||
245 | /*! ZSTD_compress_usingCDict() : |
|
|||
246 | * Compression using a digested Dictionary. |
|
|||
247 | * Recommended when same dictionary is used multiple times. |
|
|||
248 | * Note : compression level is _decided at dictionary creation time_, |
|
|||
249 | * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ |
|
|||
250 | ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, |
|
|||
251 | void* dst, size_t dstCapacity, |
|
|||
252 | const void* src, size_t srcSize, |
|
|||
253 | const ZSTD_CDict* cdict); |
|
|||
254 |
|
||||
255 |
|
||||
256 | typedef struct ZSTD_DDict_s ZSTD_DDict; |
|
|||
257 |
|
||||
258 | /*! ZSTD_createDDict() : |
|
|||
259 | * Create a digested dictionary, ready to start decompression operation without startup delay. |
|
|||
260 | * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ |
|
|||
261 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); |
|
|||
262 |
|
||||
263 | /*! ZSTD_freeDDict() : |
|
|||
264 | * Function frees memory allocated with ZSTD_createDDict() */ |
|
|||
265 | ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); |
|
|||
266 |
|
||||
267 | /*! ZSTD_decompress_usingDDict() : |
|
|||
268 | * Decompression using a digested Dictionary. |
|
|||
269 | * Recommended when same dictionary is used multiple times. */ |
|
|||
270 | ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, |
|
|||
271 | void* dst, size_t dstCapacity, |
|
|||
272 | const void* src, size_t srcSize, |
|
|||
273 | const ZSTD_DDict* ddict); |
|
|||
274 |
|
||||
275 |
|
||||
276 | /**************************** |
|
|||
277 | * Streaming |
|
|||
278 | ****************************/ |
|
|||
279 |
|
||||
280 | typedef struct ZSTD_inBuffer_s { |
|
|||
281 | const void* src; /**< start of input buffer */ |
|
|||
282 | size_t size; /**< size of input buffer */ |
|
|||
283 | size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ |
|
|||
284 | } ZSTD_inBuffer; |
|
|||
285 |
|
||||
286 | typedef struct ZSTD_outBuffer_s { |
|
|||
287 | void* dst; /**< start of output buffer */ |
|
|||
288 | size_t size; /**< size of output buffer */ |
|
|||
289 | size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ |
|
|||
290 | } ZSTD_outBuffer; |
|
|||
291 |
|
||||
292 |
|
||||
293 |
|
||||
294 | /*-*********************************************************************** |
|
|||
295 | * Streaming compression - HowTo |
|
|||
296 | * |
|
|||
297 | * A ZSTD_CStream object is required to track streaming operation. |
|
|||
298 | * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. |
|
|||
299 | * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. |
|
|||
300 | * It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. |
|
|||
301 | * |
|
|||
302 | * For parallel execution, use one separate ZSTD_CStream per thread. |
|
|||
303 | * |
|
|||
304 | * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. |
|
|||
305 | * |
|
|||
306 | * Parameters are sticky : when starting a new compression on the same context, |
|
|||
307 | * it will re-use the same sticky parameters as previous compression session. |
|
|||
308 | * When in doubt, it's recommended to fully initialize the context before usage. |
|
|||
309 | * Use ZSTD_initCStream() to set the parameter to a selected compression level. |
|
|||
310 | * Use advanced API (ZSTD_CCtx_setParameter(), etc.) to set more specific parameters. |
|
|||
311 | * |
|
|||
312 | * Use ZSTD_compressStream() as many times as necessary to consume input stream. |
|
|||
313 | * The function will automatically update both `pos` fields within `input` and `output`. |
|
|||
314 | * Note that the function may not consume the entire input, |
|
|||
315 | * for example, because the output buffer is already full, |
|
|||
316 | * in which case `input.pos < input.size`. |
|
|||
317 | * The caller must check if input has been entirely consumed. |
|
|||
318 | * If not, the caller must make some room to receive more compressed data, |
|
|||
319 | * and then present again remaining input data. |
|
|||
320 | * @return : a size hint, preferred nb of bytes to use as input for next function call |
|
|||
321 | * or an error code, which can be tested using ZSTD_isError(). |
|
|||
322 | * Note 1 : it's just a hint, to help latency a little, any value will work fine. |
|
|||
323 | * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() |
|
|||
324 | * |
|
|||
325 | * At any moment, it's possible to flush whatever data might remain stuck within internal buffer, |
|
|||
326 | * using ZSTD_flushStream(). `output->pos` will be updated. |
|
|||
327 | * Note that, if `output->size` is too small, a single invocation of ZSTD_flushStream() might not be enough (return code > 0). |
|
|||
328 | * In which case, make some room to receive more compressed data, and call again ZSTD_flushStream(). |
|
|||
329 | * @return : 0 if internal buffers are entirely flushed, |
|
|||
330 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), |
|
|||
331 | * or an error code, which can be tested using ZSTD_isError(). |
|
|||
332 | * |
|
|||
333 | * ZSTD_endStream() instructs to finish a frame. |
|
|||
334 | * It will perform a flush and write frame epilogue. |
|
|||
335 | * The epilogue is required for decoders to consider a frame completed. |
|
|||
336 | * flush() operation is the same, and follows same rules as ZSTD_flushStream(). |
|
|||
337 | * @return : 0 if frame fully completed and fully flushed, |
|
|||
338 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), |
|
|||
339 | * or an error code, which can be tested using ZSTD_isError(). |
|
|||
340 | * |
|
|||
341 | * *******************************************************************/ |
|
|||
342 |
|
||||
343 | typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ |
|
|||
344 | /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ |
|
|||
345 | /*===== ZSTD_CStream management functions =====*/ |
|
|||
346 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); |
|
|||
347 | ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); |
|
|||
348 |
|
||||
349 | /*===== Streaming compression functions =====*/ |
|
|||
350 | ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); |
|
|||
351 | ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); |
|
|||
352 | ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); |
|
|||
353 | ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); |
|
|||
354 |
|
||||
355 | ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ |
|
|||
356 | ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ |
|
|||
357 |
|
||||
358 |
|
||||
359 |
|
||||
360 | /*-*************************************************************************** |
|
|||
361 | * Streaming decompression - HowTo |
|
|||
362 | * |
|
|||
363 | * A ZSTD_DStream object is required to track streaming operations. |
|
|||
364 | * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. |
|
|||
365 | * ZSTD_DStream objects can be re-used multiple times. |
|
|||
366 | * |
|
|||
367 | * Use ZSTD_initDStream() to start a new decompression operation. |
|
|||
368 | * @return : recommended first input size |
|
|||
369 | * Alternatively, use advanced API to set specific properties. |
|
|||
370 | * |
|
|||
371 | * Use ZSTD_decompressStream() repetitively to consume your input. |
|
|||
372 | * The function will update both `pos` fields. |
|
|||
373 | * If `input.pos < input.size`, some input has not been consumed. |
|
|||
374 | * It's up to the caller to present again remaining data. |
|
|||
375 | * The function tries to flush all data decoded immediately, respecting output buffer size. |
|
|||
376 | * If `output.pos < output.size`, decoder has flushed everything it could. |
|
|||
377 | * But if `output.pos == output.size`, there might be some data left within internal buffers., |
|
|||
378 | * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. |
|
|||
379 | * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. |
|
|||
380 | * @return : 0 when a frame is completely decoded and fully flushed, |
|
|||
381 | * or an error code, which can be tested using ZSTD_isError(), |
|
|||
382 | * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : |
|
|||
383 | * the return value is a suggested next input size (just a hint for better latency) |
|
|||
384 | * that will never request more than the remaining frame size. |
|
|||
385 | * *******************************************************************************/ |
|
|||
386 |
|
||||
387 | typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ |
|
|||
388 | /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ |
|
|||
389 | /*===== ZSTD_DStream management functions =====*/ |
|
|||
390 | ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); |
|
|||
391 | ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); |
|
|||
392 |
|
||||
393 | /*===== Streaming decompression functions =====*/ |
|
|||
394 | ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); |
|
|||
395 | ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); |
|
|||
396 |
|
||||
397 | ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ |
|
|||
398 | ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ |
|
|||
399 |
|
||||
400 | #endif /* ZSTD_H_235446 */ |
|
|||
401 |
|
||||
402 |
|
||||
403 |
|
||||
404 |
|
||||
405 | /**************************************************************************************** |
|
|||
406 | * ADVANCED AND EXPERIMENTAL FUNCTIONS |
|
|||
407 | **************************************************************************************** |
|
|||
408 | * The definitions in the following section are considered experimental. |
|
|||
409 | * They are provided for advanced scenarios. |
|
|||
410 | * They should never be used with a dynamic library, as prototypes may change in the future. |
|
|||
411 | * Use them only in association with static linking. |
|
|||
412 | * ***************************************************************************************/ |
|
|||
413 |
|
||||
414 | #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) |
|
|||
415 | #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY |
|
|||
416 |
|
||||
417 |
|
||||
418 | /**************************************************************************************** |
|
|||
419 | * Candidate API for promotion to stable status |
|
|||
420 | **************************************************************************************** |
|
|||
421 | * The following symbols and constants form the "staging area" : |
|
|||
422 | * they are considered to join "stable API" by v1.4.0. |
|
|||
423 | * The proposal is written so that it can be made stable "as is", |
|
|||
424 | * though it's still possible to suggest improvements. |
|
|||
425 | * Staging is in fact last chance for changes, |
|
|||
426 | * the API is locked once reaching "stable" status. |
|
|||
427 | * ***************************************************************************************/ |
|
|||
428 |
|
||||
429 |
|
||||
430 | /* === Constants === */ |
|
|||
431 |
|
||||
432 | /* all magic numbers are supposed read/written to/from files/memory using little-endian convention */ |
|
|||
433 | #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ |
|
|||
434 | #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ |
|
|||
435 | #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ |
|
|||
436 | #define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 |
|
|||
437 |
|
||||
438 | #define ZSTD_BLOCKSIZELOG_MAX 17 |
|
|||
439 | #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) |
|
|||
440 |
|
||||
441 |
|
||||
442 | /* === query limits === */ |
|
|||
443 |
|
||||
444 | ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ |
|
|||
445 |
|
||||
446 |
|
||||
447 | /* === frame size === */ |
|
|||
448 |
|
||||
449 | /*! ZSTD_findFrameCompressedSize() : |
|
|||
450 | * `src` should point to the start of a ZSTD frame or skippable frame. |
|
|||
451 | * `srcSize` must be >= first frame size |
|
|||
452 | * @return : the compressed size of the first frame starting at `src`, |
|
|||
453 | * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, |
|
|||
454 | * or an error code if input is invalid */ |
|
|||
455 | ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); |
|
|||
456 |
|
||||
457 |
|
||||
458 | /* === Memory management === */ |
|
|||
459 |
|
||||
460 | /*! ZSTD_sizeof_*() : |
|
|||
461 | * These functions give the _current_ memory usage of selected object. |
|
|||
462 | * Note that object memory usage can evolve (increase or decrease) over time. */ |
|
|||
463 | ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); |
|
|||
464 | ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); |
|
|||
465 | ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); |
|
|||
466 | ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); |
|
|||
467 | ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); |
|
|||
468 | ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); |
|
|||
469 |
|
||||
470 |
|
||||
471 | /*************************************** |
|
227 | /*************************************** | |
472 | * Advanced compression API |
|
228 | * Advanced compression API | |
473 | ***************************************/ |
|
229 | ***************************************/ | |
@@ -503,7 +259,10 b' typedef enum { ZSTD_fast=1,' | |||||
503 |
|
259 | |||
504 | typedef enum { |
|
260 | typedef enum { | |
505 |
|
261 | |||
506 |
/* compression parameters |
|
262 | /* compression parameters | |
|
263 | * Note: When compressing with a ZSTD_CDict these parameters are superseded | |||
|
264 | * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict() | |||
|
265 | * for more info (superseded-by-cdict). */ | |||
507 | ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table |
|
266 | ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table | |
508 | * Default level is ZSTD_CLEVEL_DEFAULT==3. |
|
267 | * Default level is ZSTD_CLEVEL_DEFAULT==3. | |
509 | * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. |
|
268 | * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. | |
@@ -625,6 +384,8 b' typedef enum {' | |||||
625 | * ZSTD_c_format |
|
384 | * ZSTD_c_format | |
626 | * ZSTD_c_forceMaxWindow |
|
385 | * ZSTD_c_forceMaxWindow | |
627 | * ZSTD_c_forceAttachDict |
|
386 | * ZSTD_c_forceAttachDict | |
|
387 | * ZSTD_c_literalCompressionMode | |||
|
388 | * ZSTD_c_targetCBlockSize | |||
628 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. |
|
389 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. | |
629 | * note : never ever use experimentalParam? names directly; |
|
390 | * note : never ever use experimentalParam? names directly; | |
630 | * also, the enums values themselves are unstable and can still change. |
|
391 | * also, the enums values themselves are unstable and can still change. | |
@@ -632,10 +393,11 b' typedef enum {' | |||||
632 | ZSTD_c_experimentalParam1=500, |
|
393 | ZSTD_c_experimentalParam1=500, | |
633 | ZSTD_c_experimentalParam2=10, |
|
394 | ZSTD_c_experimentalParam2=10, | |
634 | ZSTD_c_experimentalParam3=1000, |
|
395 | ZSTD_c_experimentalParam3=1000, | |
635 | ZSTD_c_experimentalParam4=1001 |
|
396 | ZSTD_c_experimentalParam4=1001, | |
|
397 | ZSTD_c_experimentalParam5=1002, | |||
|
398 | ZSTD_c_experimentalParam6=1003, | |||
636 | } ZSTD_cParameter; |
|
399 | } ZSTD_cParameter; | |
637 |
|
400 | |||
638 |
|
||||
639 | typedef struct { |
|
401 | typedef struct { | |
640 | size_t error; |
|
402 | size_t error; | |
641 | int lowerBound; |
|
403 | int lowerBound; | |
@@ -677,10 +439,443 b' ZSTDLIB_API size_t ZSTD_CCtx_setParamete' | |||||
677 | * Note 3 : Whenever all input data is provided and consumed in a single round, |
|
439 | * Note 3 : Whenever all input data is provided and consumed in a single round, | |
678 | * for example with ZSTD_compress2(), |
|
440 | * for example with ZSTD_compress2(), | |
679 | * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), |
|
441 | * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), | |
680 | * this value is automatically overriden by srcSize instead. |
|
442 | * this value is automatically overridden by srcSize instead. | |
681 | */ |
|
443 | */ | |
682 | ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); |
|
444 | ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); | |
683 |
|
445 | |||
|
446 | typedef enum { | |||
|
447 | ZSTD_reset_session_only = 1, | |||
|
448 | ZSTD_reset_parameters = 2, | |||
|
449 | ZSTD_reset_session_and_parameters = 3 | |||
|
450 | } ZSTD_ResetDirective; | |||
|
451 | ||||
|
452 | /*! ZSTD_CCtx_reset() : | |||
|
453 | * There are 2 different things that can be reset, independently or jointly : | |||
|
454 | * - The session : will stop compressing current frame, and make CCtx ready to start a new one. | |||
|
455 | * Useful after an error, or to interrupt any ongoing compression. | |||
|
456 | * Any internal data not yet flushed is cancelled. | |||
|
457 | * Compression parameters and dictionary remain unchanged. | |||
|
458 | * They will be used to compress next frame. | |||
|
459 | * Resetting session never fails. | |||
|
460 | * - The parameters : changes all parameters back to "default". | |||
|
461 | * This removes any reference to any dictionary too. | |||
|
462 | * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) | |||
|
463 | * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) | |||
|
464 | * - Both : similar to resetting the session, followed by resetting parameters. | |||
|
465 | */ | |||
|
466 | ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); | |||
|
467 | ||||
|
468 | /*! ZSTD_compress2() : | |||
|
469 | * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. | |||
|
470 | * ZSTD_compress2() always starts a new frame. | |||
|
471 | * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. | |||
|
472 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() | |||
|
473 | * - The function is always blocking, returns when compression is completed. | |||
|
474 | * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. | |||
|
475 | * @return : compressed size written into `dst` (<= `dstCapacity), | |||
|
476 | * or an error code if it fails (which can be tested using ZSTD_isError()). | |||
|
477 | */ | |||
|
478 | ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, | |||
|
479 | void* dst, size_t dstCapacity, | |||
|
480 | const void* src, size_t srcSize); | |||
|
481 | ||||
|
482 | ||||
|
483 | /*************************************** | |||
|
484 | * Advanced decompression API | |||
|
485 | ***************************************/ | |||
|
486 | ||||
|
487 | /* The advanced API pushes parameters one by one into an existing DCtx context. | |||
|
488 | * Parameters are sticky, and remain valid for all following frames | |||
|
489 | * using the same DCtx context. | |||
|
490 | * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). | |||
|
491 | * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). | |||
|
492 | * Therefore, no new decompression function is necessary. | |||
|
493 | */ | |||
|
494 | ||||
|
495 | typedef enum { | |||
|
496 | ||||
|
497 | ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which | |||
|
498 | * the streaming API will refuse to allocate memory buffer | |||
|
499 | * in order to protect the host from unreasonable memory requirements. | |||
|
500 | * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. | |||
|
501 | * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). | |||
|
502 | * Special: value 0 means "use default maximum windowLog". */ | |||
|
503 | ||||
|
504 | /* note : additional experimental parameters are also available | |||
|
505 | * within the experimental section of the API. | |||
|
506 | * At the time of this writing, they include : | |||
|
507 | * ZSTD_c_format | |||
|
508 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. | |||
|
509 | * note : never ever use experimentalParam? names directly | |||
|
510 | */ | |||
|
511 | ZSTD_d_experimentalParam1=1000 | |||
|
512 | ||||
|
513 | } ZSTD_dParameter; | |||
|
514 | ||||
|
515 | /*! ZSTD_dParam_getBounds() : | |||
|
516 | * All parameters must belong to an interval with lower and upper bounds, | |||
|
517 | * otherwise they will either trigger an error or be automatically clamped. | |||
|
518 | * @return : a structure, ZSTD_bounds, which contains | |||
|
519 | * - an error status field, which must be tested using ZSTD_isError() | |||
|
520 | * - both lower and upper bounds, inclusive | |||
|
521 | */ | |||
|
522 | ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); | |||
|
523 | ||||
|
524 | /*! ZSTD_DCtx_setParameter() : | |||
|
525 | * Set one compression parameter, selected by enum ZSTD_dParameter. | |||
|
526 | * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). | |||
|
527 | * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). | |||
|
528 | * Setting a parameter is only possible during frame initialization (before starting decompression). | |||
|
529 | * @return : 0, or an error code (which can be tested using ZSTD_isError()). | |||
|
530 | */ | |||
|
531 | ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); | |||
|
532 | ||||
|
533 | /*! ZSTD_DCtx_reset() : | |||
|
534 | * Return a DCtx to clean state. | |||
|
535 | * Session and parameters can be reset jointly or separately. | |||
|
536 | * Parameters can only be reset when no active frame is being decompressed. | |||
|
537 | * @return : 0, or an error code, which can be tested with ZSTD_isError() | |||
|
538 | */ | |||
|
539 | ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); | |||
|
540 | ||||
|
541 | ||||
|
542 | /**************************** | |||
|
543 | * Streaming | |||
|
544 | ****************************/ | |||
|
545 | ||||
|
546 | typedef struct ZSTD_inBuffer_s { | |||
|
547 | const void* src; /**< start of input buffer */ | |||
|
548 | size_t size; /**< size of input buffer */ | |||
|
549 | size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ | |||
|
550 | } ZSTD_inBuffer; | |||
|
551 | ||||
|
552 | typedef struct ZSTD_outBuffer_s { | |||
|
553 | void* dst; /**< start of output buffer */ | |||
|
554 | size_t size; /**< size of output buffer */ | |||
|
555 | size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ | |||
|
556 | } ZSTD_outBuffer; | |||
|
557 | ||||
|
558 | ||||
|
559 | ||||
|
560 | /*-*********************************************************************** | |||
|
561 | * Streaming compression - HowTo | |||
|
562 | * | |||
|
563 | * A ZSTD_CStream object is required to track streaming operation. | |||
|
564 | * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. | |||
|
565 | * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. | |||
|
566 | * It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. | |||
|
567 | * | |||
|
568 | * For parallel execution, use one separate ZSTD_CStream per thread. | |||
|
569 | * | |||
|
570 | * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. | |||
|
571 | * | |||
|
572 | * Parameters are sticky : when starting a new compression on the same context, | |||
|
573 | * it will re-use the same sticky parameters as previous compression session. | |||
|
574 | * When in doubt, it's recommended to fully initialize the context before usage. | |||
|
575 | * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), | |||
|
576 | * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to | |||
|
577 | * set more specific parameters, the pledged source size, or load a dictionary. | |||
|
578 | * | |||
|
579 | * Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to | |||
|
580 | * consume input stream. The function will automatically update both `pos` | |||
|
581 | * fields within `input` and `output`. | |||
|
582 | * Note that the function may not consume the entire input, for example, because | |||
|
583 | * the output buffer is already full, in which case `input.pos < input.size`. | |||
|
584 | * The caller must check if input has been entirely consumed. | |||
|
585 | * If not, the caller must make some room to receive more compressed data, | |||
|
586 | * and then present again remaining input data. | |||
|
587 | * note: ZSTD_e_continue is guaranteed to make some forward progress when called, | |||
|
588 | * but doesn't guarantee maximal forward progress. This is especially relevant | |||
|
589 | * when compressing with multiple threads. The call won't block if it can | |||
|
590 | * consume some input, but if it can't it will wait for some, but not all, | |||
|
591 | * output to be flushed. | |||
|
592 | * @return : provides a minimum amount of data remaining to be flushed from internal buffers | |||
|
593 | * or an error code, which can be tested using ZSTD_isError(). | |||
|
594 | * | |||
|
595 | * At any moment, it's possible to flush whatever data might remain stuck within internal buffer, | |||
|
596 | * using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. | |||
|
597 | * Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). | |||
|
598 | * In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. | |||
|
599 | * You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the | |||
|
600 | * operation. | |||
|
601 | * note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will | |||
|
602 | * block until the flush is complete or the output buffer is full. | |||
|
603 | * @return : 0 if internal buffers are entirely flushed, | |||
|
604 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), | |||
|
605 | * or an error code, which can be tested using ZSTD_isError(). | |||
|
606 | * | |||
|
607 | * Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. | |||
|
608 | * It will perform a flush and write frame epilogue. | |||
|
609 | * The epilogue is required for decoders to consider a frame completed. | |||
|
610 | * flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. | |||
|
611 | * You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to | |||
|
612 | * start a new frame. | |||
|
613 | * note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will | |||
|
614 | * block until the flush is complete or the output buffer is full. | |||
|
615 | * @return : 0 if frame fully completed and fully flushed, | |||
|
616 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), | |||
|
617 | * or an error code, which can be tested using ZSTD_isError(). | |||
|
618 | * | |||
|
619 | * *******************************************************************/ | |||
|
620 | ||||
|
621 | typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ | |||
|
622 | /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ | |||
|
623 | /*===== ZSTD_CStream management functions =====*/ | |||
|
624 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); | |||
|
625 | ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); | |||
|
626 | ||||
|
627 | /*===== Streaming compression functions =====*/ | |||
|
628 | typedef enum { | |||
|
629 | ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ | |||
|
630 | ZSTD_e_flush=1, /* flush any data provided so far, | |||
|
631 | * it creates (at least) one new block, that can be decoded immediately on reception; | |||
|
632 | * frame will continue: any future data can still reference previously compressed data, improving compression. | |||
|
633 | * note : multithreaded compression will block to flush as much output as possible. */ | |||
|
634 | ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. | |||
|
635 | * note that frame is only closed after compressed data is fully flushed (return value == 0). | |||
|
636 | * After that point, any additional data starts a new frame. | |||
|
637 | * note : each frame is independent (does not reference any content from previous frame). | |||
|
638 | : note : multithreaded compression will block to flush as much output as possible. */ | |||
|
639 | } ZSTD_EndDirective; | |||
|
640 | ||||
|
641 | /*! ZSTD_compressStream2() : | |||
|
642 | * Behaves about the same as ZSTD_compressStream, with additional control on end directive. | |||
|
643 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() | |||
|
644 | * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) | |||
|
645 | * - output->pos must be <= dstCapacity, input->pos must be <= srcSize | |||
|
646 | * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. | |||
|
647 | * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. | |||
|
648 | * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, | |||
|
649 | * and then immediately returns, just indicating that there is some data remaining to be flushed. | |||
|
650 | * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. | |||
|
651 | * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. | |||
|
652 | * - @return provides a minimum amount of data remaining to be flushed from internal buffers | |||
|
653 | * or an error code, which can be tested using ZSTD_isError(). | |||
|
654 | * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. | |||
|
655 | * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. | |||
|
656 | * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. | |||
|
657 | * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), | |||
|
658 | * only ZSTD_e_end or ZSTD_e_flush operations are allowed. | |||
|
659 | * Before starting a new compression job, or changing compression parameters, | |||
|
660 | * it is required to fully flush internal buffers. | |||
|
661 | */ | |||
|
662 | ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, | |||
|
663 | ZSTD_outBuffer* output, | |||
|
664 | ZSTD_inBuffer* input, | |||
|
665 | ZSTD_EndDirective endOp); | |||
|
666 | ||||
|
667 | ||||
|
668 | /* These buffer sizes are softly recommended. | |||
|
669 | * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. | |||
|
670 | * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), | |||
|
671 | * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. | |||
|
672 | * | |||
|
673 | * However, note that these recommendations are from the perspective of a C caller program. | |||
|
674 | * If the streaming interface is invoked from some other language, | |||
|
675 | * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, | |||
|
676 | * a major performance rule is to reduce crossing such interface to an absolute minimum. | |||
|
677 | * It's not rare that performance ends being spent more into the interface, rather than compression itself. | |||
|
678 | * In which cases, prefer using large buffers, as large as practical, | |||
|
679 | * for both input and output, to reduce the nb of roundtrips. | |||
|
680 | */ | |||
|
681 | ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ | |||
|
682 | ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ | |||
|
683 | ||||
|
684 | ||||
|
685 | /* ***************************************************************************** | |||
|
686 | * This following is a legacy streaming API. | |||
|
687 | * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). | |||
|
688 | * It is redundant, but remains fully supported. | |||
|
689 | * Advanced parameters and dictionary compression can only be used through the | |||
|
690 | * new API. | |||
|
691 | ******************************************************************************/ | |||
|
692 | ||||
|
693 | /*! | |||
|
694 | * Equivalent to: | |||
|
695 | * | |||
|
696 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |||
|
697 | * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) | |||
|
698 | * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); | |||
|
699 | */ | |||
|
700 | ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); | |||
|
701 | /*! | |||
|
702 | * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). | |||
|
703 | * NOTE: The return value is different. ZSTD_compressStream() returns a hint for | |||
|
704 | * the next read size (if non-zero and not an error). ZSTD_compressStream2() | |||
|
705 | * returns the minimum nb of bytes left to flush (if non-zero and not an error). | |||
|
706 | */ | |||
|
707 | ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |||
|
708 | /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ | |||
|
709 | ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); | |||
|
710 | /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ | |||
|
711 | ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); | |||
|
712 | ||||
|
713 | ||||
|
714 | /*-*************************************************************************** | |||
|
715 | * Streaming decompression - HowTo | |||
|
716 | * | |||
|
717 | * A ZSTD_DStream object is required to track streaming operations. | |||
|
718 | * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. | |||
|
719 | * ZSTD_DStream objects can be re-used multiple times. | |||
|
720 | * | |||
|
721 | * Use ZSTD_initDStream() to start a new decompression operation. | |||
|
722 | * @return : recommended first input size | |||
|
723 | * Alternatively, use advanced API to set specific properties. | |||
|
724 | * | |||
|
725 | * Use ZSTD_decompressStream() repetitively to consume your input. | |||
|
726 | * The function will update both `pos` fields. | |||
|
727 | * If `input.pos < input.size`, some input has not been consumed. | |||
|
728 | * It's up to the caller to present again remaining data. | |||
|
729 | * The function tries to flush all data decoded immediately, respecting output buffer size. | |||
|
730 | * If `output.pos < output.size`, decoder has flushed everything it could. | |||
|
731 | * But if `output.pos == output.size`, there might be some data left within internal buffers., | |||
|
732 | * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. | |||
|
733 | * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. | |||
|
734 | * @return : 0 when a frame is completely decoded and fully flushed, | |||
|
735 | * or an error code, which can be tested using ZSTD_isError(), | |||
|
736 | * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : | |||
|
737 | * the return value is a suggested next input size (just a hint for better latency) | |||
|
738 | * that will never request more than the remaining frame size. | |||
|
739 | * *******************************************************************************/ | |||
|
740 | ||||
|
741 | typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ | |||
|
742 | /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ | |||
|
743 | /*===== ZSTD_DStream management functions =====*/ | |||
|
744 | ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); | |||
|
745 | ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); | |||
|
746 | ||||
|
747 | /*===== Streaming decompression functions =====*/ | |||
|
748 | ||||
|
749 | /* This function is redundant with the advanced API and equivalent to: | |||
|
750 | * | |||
|
751 | * ZSTD_DCtx_reset(zds); | |||
|
752 | * ZSTD_DCtx_refDDict(zds, NULL); | |||
|
753 | */ | |||
|
754 | ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); | |||
|
755 | ||||
|
756 | ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |||
|
757 | ||||
|
758 | ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ | |||
|
759 | ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ | |||
|
760 | ||||
|
761 | ||||
|
762 | /************************** | |||
|
763 | * Simple dictionary API | |||
|
764 | ***************************/ | |||
|
765 | /*! ZSTD_compress_usingDict() : | |||
|
766 | * Compression at an explicit compression level using a Dictionary. | |||
|
767 | * A dictionary can be any arbitrary data segment (also called a prefix), | |||
|
768 | * or a buffer with specified information (see dictBuilder/zdict.h). | |||
|
769 | * Note : This function loads the dictionary, resulting in significant startup delay. | |||
|
770 | * It's intended for a dictionary used only once. | |||
|
771 | * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |||
|
772 | ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, | |||
|
773 | void* dst, size_t dstCapacity, | |||
|
774 | const void* src, size_t srcSize, | |||
|
775 | const void* dict,size_t dictSize, | |||
|
776 | int compressionLevel); | |||
|
777 | ||||
|
778 | /*! ZSTD_decompress_usingDict() : | |||
|
779 | * Decompression using a known Dictionary. | |||
|
780 | * Dictionary must be identical to the one used during compression. | |||
|
781 | * Note : This function loads the dictionary, resulting in significant startup delay. | |||
|
782 | * It's intended for a dictionary used only once. | |||
|
783 | * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |||
|
784 | ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, | |||
|
785 | void* dst, size_t dstCapacity, | |||
|
786 | const void* src, size_t srcSize, | |||
|
787 | const void* dict,size_t dictSize); | |||
|
788 | ||||
|
789 | ||||
|
790 | /*********************************** | |||
|
791 | * Bulk processing dictionary API | |||
|
792 | **********************************/ | |||
|
793 | typedef struct ZSTD_CDict_s ZSTD_CDict; | |||
|
794 | ||||
|
795 | /*! ZSTD_createCDict() : | |||
|
796 | * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. | |||
|
797 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. | |||
|
798 | * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. | |||
|
799 | * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. | |||
|
800 | * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. | |||
|
801 | * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ | |||
|
802 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, | |||
|
803 | int compressionLevel); | |||
|
804 | ||||
|
805 | /*! ZSTD_freeCDict() : | |||
|
806 | * Function frees memory allocated by ZSTD_createCDict(). */ | |||
|
807 | ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); | |||
|
808 | ||||
|
809 | /*! ZSTD_compress_usingCDict() : | |||
|
810 | * Compression using a digested Dictionary. | |||
|
811 | * Recommended when same dictionary is used multiple times. | |||
|
812 | * Note : compression level is _decided at dictionary creation time_, | |||
|
813 | * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ | |||
|
814 | ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, | |||
|
815 | void* dst, size_t dstCapacity, | |||
|
816 | const void* src, size_t srcSize, | |||
|
817 | const ZSTD_CDict* cdict); | |||
|
818 | ||||
|
819 | ||||
|
820 | typedef struct ZSTD_DDict_s ZSTD_DDict; | |||
|
821 | ||||
|
822 | /*! ZSTD_createDDict() : | |||
|
823 | * Create a digested dictionary, ready to start decompression operation without startup delay. | |||
|
824 | * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ | |||
|
825 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); | |||
|
826 | ||||
|
827 | /*! ZSTD_freeDDict() : | |||
|
828 | * Function frees memory allocated with ZSTD_createDDict() */ | |||
|
829 | ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); | |||
|
830 | ||||
|
831 | /*! ZSTD_decompress_usingDDict() : | |||
|
832 | * Decompression using a digested Dictionary. | |||
|
833 | * Recommended when same dictionary is used multiple times. */ | |||
|
834 | ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, | |||
|
835 | void* dst, size_t dstCapacity, | |||
|
836 | const void* src, size_t srcSize, | |||
|
837 | const ZSTD_DDict* ddict); | |||
|
838 | ||||
|
839 | ||||
|
840 | /******************************** | |||
|
841 | * Dictionary helper functions | |||
|
842 | *******************************/ | |||
|
843 | ||||
|
844 | /*! ZSTD_getDictID_fromDict() : | |||
|
845 | * Provides the dictID stored within dictionary. | |||
|
846 | * if @return == 0, the dictionary is not conformant with Zstandard specification. | |||
|
847 | * It can still be loaded, but as a content-only dictionary. */ | |||
|
848 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); | |||
|
849 | ||||
|
850 | /*! ZSTD_getDictID_fromDDict() : | |||
|
851 | * Provides the dictID of the dictionary loaded into `ddict`. | |||
|
852 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | |||
|
853 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | |||
|
854 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); | |||
|
855 | ||||
|
856 | /*! ZSTD_getDictID_fromFrame() : | |||
|
857 | * Provides the dictID required to decompressed the frame stored within `src`. | |||
|
858 | * If @return == 0, the dictID could not be decoded. | |||
|
859 | * This could for one of the following reasons : | |||
|
860 | * - The frame does not require a dictionary to be decoded (most common case). | |||
|
861 | * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. | |||
|
862 | * Note : this use case also happens when using a non-conformant dictionary. | |||
|
863 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). | |||
|
864 | * - This is not a Zstandard frame. | |||
|
865 | * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ | |||
|
866 | ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); | |||
|
867 | ||||
|
868 | ||||
|
869 | /******************************************************************************* | |||
|
870 | * Advanced dictionary and prefix API | |||
|
871 | * | |||
|
872 | * This API allows dictionaries to be used with ZSTD_compress2(), | |||
|
873 | * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and | |||
|
874 | * only reset with the context is reset with ZSTD_reset_parameters or | |||
|
875 | * ZSTD_reset_session_and_parameters. Prefixes are single-use. | |||
|
876 | ******************************************************************************/ | |||
|
877 | ||||
|
878 | ||||
684 | /*! ZSTD_CCtx_loadDictionary() : |
|
879 | /*! ZSTD_CCtx_loadDictionary() : | |
685 | * Create an internal CDict from `dict` buffer. |
|
880 | * Create an internal CDict from `dict` buffer. | |
686 | * Decompression will have to use same dictionary. |
|
881 | * Decompression will have to use same dictionary. | |
@@ -703,7 +898,9 b' ZSTDLIB_API size_t ZSTD_CCtx_loadDiction' | |||||
703 | /*! ZSTD_CCtx_refCDict() : |
|
898 | /*! ZSTD_CCtx_refCDict() : | |
704 | * Reference a prepared dictionary, to be used for all next compressed frames. |
|
899 | * Reference a prepared dictionary, to be used for all next compressed frames. | |
705 | * Note that compression parameters are enforced from within CDict, |
|
900 | * Note that compression parameters are enforced from within CDict, | |
706 |
* and super |
|
901 | * and supersede any compression parameter previously set within CCtx. | |
|
902 | * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. | |||
|
903 | * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. | |||
707 | * The dictionary will remain valid for future compressed frames using same CCtx. |
|
904 | * The dictionary will remain valid for future compressed frames using same CCtx. | |
708 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). |
|
905 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). | |
709 | * Special : Referencing a NULL CDict means "return to no-dictionary mode". |
|
906 | * Special : Referencing a NULL CDict means "return to no-dictionary mode". | |
@@ -733,136 +930,6 b' ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZS' | |||||
733 | ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, |
|
930 | ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, | |
734 | const void* prefix, size_t prefixSize); |
|
931 | const void* prefix, size_t prefixSize); | |
735 |
|
932 | |||
736 |
|
||||
737 | typedef enum { |
|
|||
738 | ZSTD_reset_session_only = 1, |
|
|||
739 | ZSTD_reset_parameters = 2, |
|
|||
740 | ZSTD_reset_session_and_parameters = 3 |
|
|||
741 | } ZSTD_ResetDirective; |
|
|||
742 |
|
||||
743 | /*! ZSTD_CCtx_reset() : |
|
|||
744 | * There are 2 different things that can be reset, independently or jointly : |
|
|||
745 | * - The session : will stop compressing current frame, and make CCtx ready to start a new one. |
|
|||
746 | * Useful after an error, or to interrupt any ongoing compression. |
|
|||
747 | * Any internal data not yet flushed is cancelled. |
|
|||
748 | * Compression parameters and dictionary remain unchanged. |
|
|||
749 | * They will be used to compress next frame. |
|
|||
750 | * Resetting session never fails. |
|
|||
751 | * - The parameters : changes all parameters back to "default". |
|
|||
752 | * This removes any reference to any dictionary too. |
|
|||
753 | * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) |
|
|||
754 | * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) |
|
|||
755 | * - Both : similar to resetting the session, followed by resetting parameters. |
|
|||
756 | */ |
|
|||
757 | ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); |
|
|||
758 |
|
||||
759 |
|
||||
760 |
|
||||
761 | /*! ZSTD_compress2() : |
|
|||
762 | * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. |
|
|||
763 | * ZSTD_compress2() always starts a new frame. |
|
|||
764 | * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. |
|
|||
765 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() |
|
|||
766 | * - The function is always blocking, returns when compression is completed. |
|
|||
767 | * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. |
|
|||
768 | * @return : compressed size written into `dst` (<= `dstCapacity), |
|
|||
769 | * or an error code if it fails (which can be tested using ZSTD_isError()). |
|
|||
770 | */ |
|
|||
771 | ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, |
|
|||
772 | void* dst, size_t dstCapacity, |
|
|||
773 | const void* src, size_t srcSize); |
|
|||
774 |
|
||||
775 | typedef enum { |
|
|||
776 | ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ |
|
|||
777 | ZSTD_e_flush=1, /* flush any data provided so far, |
|
|||
778 | * it creates (at least) one new block, that can be decoded immediately on reception; |
|
|||
779 | * frame will continue: any future data can still reference previously compressed data, improving compression. */ |
|
|||
780 | ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. |
|
|||
781 | * note that frame is only closed after compressed data is fully flushed (return value == 0). |
|
|||
782 | * After that point, any additional data starts a new frame. |
|
|||
783 | * note : each frame is independent (does not reference any content from previous frame). */ |
|
|||
784 | } ZSTD_EndDirective; |
|
|||
785 |
|
||||
786 | /*! ZSTD_compressStream2() : |
|
|||
787 | * Behaves about the same as ZSTD_compressStream, with additional control on end directive. |
|
|||
788 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() |
|
|||
789 | * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) |
|
|||
790 | * - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize |
|
|||
791 | * - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. |
|
|||
792 | * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. |
|
|||
793 | * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, |
|
|||
794 | * and then immediately returns, just indicating that there is some data remaining to be flushed. |
|
|||
795 | * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. |
|
|||
796 | * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. |
|
|||
797 | * - @return provides a minimum amount of data remaining to be flushed from internal buffers |
|
|||
798 | * or an error code, which can be tested using ZSTD_isError(). |
|
|||
799 | * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. |
|
|||
800 | * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. |
|
|||
801 | * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. |
|
|||
802 | * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), |
|
|||
803 | * only ZSTD_e_end or ZSTD_e_flush operations are allowed. |
|
|||
804 | * Before starting a new compression job, or changing compression parameters, |
|
|||
805 | * it is required to fully flush internal buffers. |
|
|||
806 | */ |
|
|||
807 | ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, |
|
|||
808 | ZSTD_outBuffer* output, |
|
|||
809 | ZSTD_inBuffer* input, |
|
|||
810 | ZSTD_EndDirective endOp); |
|
|||
811 |
|
||||
812 |
|
||||
813 |
|
||||
814 | /* ============================== */ |
|
|||
815 | /* Advanced decompression API */ |
|
|||
816 | /* ============================== */ |
|
|||
817 |
|
||||
818 | /* The advanced API pushes parameters one by one into an existing DCtx context. |
|
|||
819 | * Parameters are sticky, and remain valid for all following frames |
|
|||
820 | * using the same DCtx context. |
|
|||
821 | * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). |
|
|||
822 | * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). |
|
|||
823 | * Therefore, no new decompression function is necessary. |
|
|||
824 | */ |
|
|||
825 |
|
||||
826 |
|
||||
827 | typedef enum { |
|
|||
828 |
|
||||
829 | ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which |
|
|||
830 | * the streaming API will refuse to allocate memory buffer |
|
|||
831 | * in order to protect the host from unreasonable memory requirements. |
|
|||
832 | * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. |
|
|||
833 | * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) */ |
|
|||
834 |
|
||||
835 | /* note : additional experimental parameters are also available |
|
|||
836 | * within the experimental section of the API. |
|
|||
837 | * At the time of this writing, they include : |
|
|||
838 | * ZSTD_c_format |
|
|||
839 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. |
|
|||
840 | * note : never ever use experimentalParam? names directly |
|
|||
841 | */ |
|
|||
842 | ZSTD_d_experimentalParam1=1000 |
|
|||
843 |
|
||||
844 | } ZSTD_dParameter; |
|
|||
845 |
|
||||
846 |
|
||||
847 | /*! ZSTD_dParam_getBounds() : |
|
|||
848 | * All parameters must belong to an interval with lower and upper bounds, |
|
|||
849 | * otherwise they will either trigger an error or be automatically clamped. |
|
|||
850 | * @return : a structure, ZSTD_bounds, which contains |
|
|||
851 | * - an error status field, which must be tested using ZSTD_isError() |
|
|||
852 | * - both lower and upper bounds, inclusive |
|
|||
853 | */ |
|
|||
854 | ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); |
|
|||
855 |
|
||||
856 | /*! ZSTD_DCtx_setParameter() : |
|
|||
857 | * Set one compression parameter, selected by enum ZSTD_dParameter. |
|
|||
858 | * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). |
|
|||
859 | * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). |
|
|||
860 | * Setting a parameter is only possible during frame initialization (before starting decompression). |
|
|||
861 | * @return : 0, or an error code (which can be tested using ZSTD_isError()). |
|
|||
862 | */ |
|
|||
863 | ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); |
|
|||
864 |
|
||||
865 |
|
||||
866 | /*! ZSTD_DCtx_loadDictionary() : |
|
933 | /*! ZSTD_DCtx_loadDictionary() : | |
867 | * Create an internal DDict from dict buffer, |
|
934 | * Create an internal DDict from dict buffer, | |
868 | * to be used to decompress next frames. |
|
935 | * to be used to decompress next frames. | |
@@ -910,15 +977,32 b' ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZS' | |||||
910 | ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, |
|
977 | ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, | |
911 | const void* prefix, size_t prefixSize); |
|
978 | const void* prefix, size_t prefixSize); | |
912 |
|
979 | |||
913 | /*! ZSTD_DCtx_reset() : |
|
980 | /* === Memory management === */ | |
914 | * Return a DCtx to clean state. |
|
981 | ||
915 | * Session and parameters can be reset jointly or separately. |
|
982 | /*! ZSTD_sizeof_*() : | |
916 | * Parameters can only be reset when no active frame is being decompressed. |
|
983 | * These functions give the _current_ memory usage of selected object. | |
917 | * @return : 0, or an error code, which can be tested with ZSTD_isError() |
|
984 | * Note that object memory usage can evolve (increase or decrease) over time. */ | |
918 | */ |
|
985 | ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); | |
919 |
ZSTDLIB_API size_t ZSTD_DCtx |
|
986 | ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); | |
|
987 | ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); | |||
|
988 | ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); | |||
|
989 | ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); | |||
|
990 | ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); | |||
|
991 | ||||
|
992 | #endif /* ZSTD_H_235446 */ | |||
920 |
|
993 | |||
921 |
|
994 | |||
|
995 | /* ************************************************************************************** | |||
|
996 | * ADVANCED AND EXPERIMENTAL FUNCTIONS | |||
|
997 | **************************************************************************************** | |||
|
998 | * The definitions in the following section are considered experimental. | |||
|
999 | * They are provided for advanced scenarios. | |||
|
1000 | * They should never be used with a dynamic library, as prototypes may change in the future. | |||
|
1001 | * Use them only in association with static linking. | |||
|
1002 | * ***************************************************************************************/ | |||
|
1003 | ||||
|
1004 | #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) | |||
|
1005 | #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY | |||
922 |
|
1006 | |||
923 | /**************************************************************************************** |
|
1007 | /**************************************************************************************** | |
924 | * experimental API (static linking only) |
|
1008 | * experimental API (static linking only) | |
@@ -962,7 +1046,7 b' ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_' | |||||
962 | #define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame |
|
1046 | #define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame | |
963 | * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size, |
|
1047 | * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size, | |
964 | * to preserve host's memory from unreasonable requirements. |
|
1048 | * to preserve host's memory from unreasonable requirements. | |
965 | * This limit can be overriden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,). |
|
1049 | * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,). | |
966 | * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */ |
|
1050 | * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */ | |
967 |
|
1051 | |||
968 |
|
1052 | |||
@@ -976,6 +1060,10 b' ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_' | |||||
976 | #define ZSTD_LDM_HASHRATELOG_MIN 0 |
|
1060 | #define ZSTD_LDM_HASHRATELOG_MIN 0 | |
977 | #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) |
|
1061 | #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) | |
978 |
|
1062 | |||
|
1063 | /* Advanced parameter bounds */ | |||
|
1064 | #define ZSTD_TARGETCBLOCKSIZE_MIN 64 | |||
|
1065 | #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX | |||
|
1066 | ||||
979 | /* internal */ |
|
1067 | /* internal */ | |
980 | #define ZSTD_HASHLOG3_MAX 17 |
|
1068 | #define ZSTD_HASHLOG3_MAX 17 | |
981 |
|
1069 | |||
@@ -1064,15 +1152,24 b' typedef enum {' | |||||
1064 | ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ |
|
1152 | ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ | |
1065 | } ZSTD_dictAttachPref_e; |
|
1153 | } ZSTD_dictAttachPref_e; | |
1066 |
|
1154 | |||
|
1155 | typedef enum { | |||
|
1156 | ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. | |||
|
1157 | * Negative compression levels will be uncompressed, and positive compression | |||
|
1158 | * levels will be compressed. */ | |||
|
1159 | ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be | |||
|
1160 | * emitted if Huffman compression is not profitable. */ | |||
|
1161 | ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */ | |||
|
1162 | } ZSTD_literalCompressionMode_e; | |||
|
1163 | ||||
1067 |
|
1164 | |||
1068 | /*************************************** |
|
1165 | /*************************************** | |
1069 | * Frame size functions |
|
1166 | * Frame size functions | |
1070 | ***************************************/ |
|
1167 | ***************************************/ | |
1071 |
|
1168 | |||
1072 | /*! ZSTD_findDecompressedSize() : |
|
1169 | /*! ZSTD_findDecompressedSize() : | |
1073 | * `src` should point the start of a series of ZSTD encoded and/or skippable frames |
|
1170 | * `src` should point to the start of a series of ZSTD encoded and/or skippable frames | |
1074 | * `srcSize` must be the _exact_ size of this series |
|
1171 | * `srcSize` must be the _exact_ size of this series | |
1075 |
* (i.e. there should be a frame boundary |
|
1172 | * (i.e. there should be a frame boundary at `src + srcSize`) | |
1076 | * @return : - decompressed size of all data in all successive frames |
|
1173 | * @return : - decompressed size of all data in all successive frames | |
1077 | * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN |
|
1174 | * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN | |
1078 | * - if an error occurred: ZSTD_CONTENTSIZE_ERROR |
|
1175 | * - if an error occurred: ZSTD_CONTENTSIZE_ERROR | |
@@ -1092,6 +1189,21 b' typedef enum {' | |||||
1092 | * however it does mean that all frame data must be present and valid. */ |
|
1189 | * however it does mean that all frame data must be present and valid. */ | |
1093 | ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); |
|
1190 | ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); | |
1094 |
|
1191 | |||
|
1192 | /*! ZSTD_decompressBound() : | |||
|
1193 | * `src` should point to the start of a series of ZSTD encoded and/or skippable frames | |||
|
1194 | * `srcSize` must be the _exact_ size of this series | |||
|
1195 | * (i.e. there should be a frame boundary at `src + srcSize`) | |||
|
1196 | * @return : - upper-bound for the decompressed size of all data in all successive frames | |||
|
1197 | * - if an error occured: ZSTD_CONTENTSIZE_ERROR | |||
|
1198 | * | |||
|
1199 | * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. | |||
|
1200 | * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. | |||
|
1201 | * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. | |||
|
1202 | * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: | |||
|
1203 | * upper-bound = # blocks * min(128 KB, Window_Size) | |||
|
1204 | */ | |||
|
1205 | ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); | |||
|
1206 | ||||
1095 | /*! ZSTD_frameHeaderSize() : |
|
1207 | /*! ZSTD_frameHeaderSize() : | |
1096 | * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. |
|
1208 | * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. | |
1097 | * @return : size of the Frame Header, |
|
1209 | * @return : size of the Frame Header, | |
@@ -1110,7 +1222,7 b' ZSTDLIB_API size_t ZSTD_frameHeaderSize(' | |||||
1110 | * It will also consider src size to be arbitrarily "large", which is worst case. |
|
1222 | * It will also consider src size to be arbitrarily "large", which is worst case. | |
1111 | * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. |
|
1223 | * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. | |
1112 | * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. |
|
1224 | * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. | |
1113 | * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. |
|
1225 | * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. | |
1114 | * Note : CCtx size estimation is only correct for single-threaded compression. */ |
|
1226 | * Note : CCtx size estimation is only correct for single-threaded compression. */ | |
1115 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); |
|
1227 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); | |
1116 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); |
|
1228 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); | |
@@ -1122,7 +1234,7 b' ZSTDLIB_API size_t ZSTD_estimateDCtxSize' | |||||
1122 | * It will also consider src size to be arbitrarily "large", which is worst case. |
|
1234 | * It will also consider src size to be arbitrarily "large", which is worst case. | |
1123 | * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. |
|
1235 | * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. | |
1124 | * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. |
|
1236 | * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. | |
1125 | * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. |
|
1237 | * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. | |
1126 | * Note : CStream size estimation is only correct for single-threaded compression. |
|
1238 | * Note : CStream size estimation is only correct for single-threaded compression. | |
1127 | * ZSTD_DStream memory budget depends on window Size. |
|
1239 | * ZSTD_DStream memory budget depends on window Size. | |
1128 | * This information can be passed manually, using ZSTD_estimateDStreamSize, |
|
1240 | * This information can be passed manually, using ZSTD_estimateDStreamSize, | |
@@ -1226,22 +1338,26 b' ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict' | |||||
1226 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); |
|
1338 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); | |
1227 |
|
1339 | |||
1228 | /*! ZSTD_getCParams() : |
|
1340 | /*! ZSTD_getCParams() : | |
1229 |
|
|
1341 | * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. | |
1230 |
|
|
1342 | * `estimatedSrcSize` value is optional, select 0 if not known */ | |
1231 | ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); |
|
1343 | ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); | |
1232 |
|
1344 | |||
1233 | /*! ZSTD_getParams() : |
|
1345 | /*! ZSTD_getParams() : | |
1234 |
|
|
1346 | * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. | |
1235 |
|
|
1347 | * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ | |
1236 | ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); |
|
1348 | ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); | |
1237 |
|
1349 | |||
1238 | /*! ZSTD_checkCParams() : |
|
1350 | /*! ZSTD_checkCParams() : | |
1239 |
|
|
1351 | * Ensure param values remain within authorized range. | |
|
1352 | * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ | |||
1240 | ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); |
|
1353 | ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); | |
1241 |
|
1354 | |||
1242 | /*! ZSTD_adjustCParams() : |
|
1355 | /*! ZSTD_adjustCParams() : | |
1243 | * optimize params for a given `srcSize` and `dictSize`. |
|
1356 | * optimize params for a given `srcSize` and `dictSize`. | |
1244 | * both values are optional, select `0` if unknown. */ |
|
1357 | * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. | |
|
1358 | * `dictSize` must be `0` when there is no dictionary. | |||
|
1359 | * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. | |||
|
1360 | * This function never fails (wide contract) */ | |||
1245 | ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); |
|
1361 | ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); | |
1246 |
|
1362 | |||
1247 | /*! ZSTD_compress_advanced() : |
|
1363 | /*! ZSTD_compress_advanced() : | |
@@ -1314,6 +1430,17 b' ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_a' | |||||
1314 | * See the comments on that enum for an explanation of the feature. */ |
|
1430 | * See the comments on that enum for an explanation of the feature. */ | |
1315 | #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 |
|
1431 | #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 | |
1316 |
|
1432 | |||
|
1433 | /* Controls how the literals are compressed (default is auto). | |||
|
1434 | * The value must be of type ZSTD_literalCompressionMode_e. | |||
|
1435 | * See ZSTD_literalCompressionMode_t enum definition for details. | |||
|
1436 | */ | |||
|
1437 | #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 | |||
|
1438 | ||||
|
1439 | /* Tries to fit compressed block size to be around targetCBlockSize. | |||
|
1440 | * No target when targetCBlockSize == 0. | |||
|
1441 | * There is no guarantee on compressed block size (default:0) */ | |||
|
1442 | #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 | |||
|
1443 | ||||
1317 | /*! ZSTD_CCtx_getParameter() : |
|
1444 | /*! ZSTD_CCtx_getParameter() : | |
1318 | * Get the requested compression parameter value, selected by enum ZSTD_cParameter, |
|
1445 | * Get the requested compression parameter value, selected by enum ZSTD_cParameter, | |
1319 | * and store it into int* value. |
|
1446 | * and store it into int* value. | |
@@ -1325,10 +1452,10 b' ZSTDLIB_API size_t ZSTD_CCtx_getParamete' | |||||
1325 | /*! ZSTD_CCtx_params : |
|
1452 | /*! ZSTD_CCtx_params : | |
1326 | * Quick howto : |
|
1453 | * Quick howto : | |
1327 | * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure |
|
1454 | * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure | |
1328 | * - ZSTD_CCtxParam_setParameter() : Push parameters one by one into |
|
1455 | * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into | |
1329 | * an existing ZSTD_CCtx_params structure. |
|
1456 | * an existing ZSTD_CCtx_params structure. | |
1330 | * This is similar to |
|
1457 | * This is similar to | |
1331 | * ZSTD_CCtx_setParameter(). |
|
1458 | * ZSTD_CCtx_setParameter(). | |
1332 | * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to |
|
1459 | * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to | |
1333 | * an existing CCtx. |
|
1460 | * an existing CCtx. | |
1334 | * These parameters will be applied to |
|
1461 | * These parameters will be applied to | |
@@ -1359,20 +1486,20 b' ZSTDLIB_API size_t ZSTD_CCtxParams_init(' | |||||
1359 | */ |
|
1486 | */ | |
1360 | ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); |
|
1487 | ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); | |
1361 |
|
1488 | |||
1362 | /*! ZSTD_CCtxParam_setParameter() : |
|
1489 | /*! ZSTD_CCtxParams_setParameter() : | |
1363 | * Similar to ZSTD_CCtx_setParameter. |
|
1490 | * Similar to ZSTD_CCtx_setParameter. | |
1364 | * Set one compression parameter, selected by enum ZSTD_cParameter. |
|
1491 | * Set one compression parameter, selected by enum ZSTD_cParameter. | |
1365 | * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). |
|
1492 | * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). | |
1366 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). |
|
1493 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). | |
1367 | */ |
|
1494 | */ | |
1368 | ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); |
|
1495 | ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); | |
1369 |
|
1496 | |||
1370 | /*! ZSTD_CCtxParam_getParameter() : |
|
1497 | /*! ZSTD_CCtxParams_getParameter() : | |
1371 | * Similar to ZSTD_CCtx_getParameter. |
|
1498 | * Similar to ZSTD_CCtx_getParameter. | |
1372 | * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. |
|
1499 | * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. | |
1373 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). |
|
1500 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). | |
1374 | */ |
|
1501 | */ | |
1375 | ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); |
|
1502 | ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); | |
1376 |
|
1503 | |||
1377 | /*! ZSTD_CCtx_setParametersUsingCCtxParams() : |
|
1504 | /*! ZSTD_CCtx_setParametersUsingCCtxParams() : | |
1378 | * Apply a set of ZSTD_CCtx_params to the compression context. |
|
1505 | * Apply a set of ZSTD_CCtx_params to the compression context. | |
@@ -1415,31 +1542,6 b' ZSTDLIB_API unsigned ZSTD_isFrame(const ' | |||||
1415 | * it must remain read accessible throughout the lifetime of DDict */ |
|
1542 | * it must remain read accessible throughout the lifetime of DDict */ | |
1416 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); |
|
1543 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); | |
1417 |
|
1544 | |||
1418 |
|
||||
1419 | /*! ZSTD_getDictID_fromDict() : |
|
|||
1420 | * Provides the dictID stored within dictionary. |
|
|||
1421 | * if @return == 0, the dictionary is not conformant with Zstandard specification. |
|
|||
1422 | * It can still be loaded, but as a content-only dictionary. */ |
|
|||
1423 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); |
|
|||
1424 |
|
||||
1425 | /*! ZSTD_getDictID_fromDDict() : |
|
|||
1426 | * Provides the dictID of the dictionary loaded into `ddict`. |
|
|||
1427 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. |
|
|||
1428 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ |
|
|||
1429 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); |
|
|||
1430 |
|
||||
1431 | /*! ZSTD_getDictID_fromFrame() : |
|
|||
1432 | * Provides the dictID required to decompressed the frame stored within `src`. |
|
|||
1433 | * If @return == 0, the dictID could not be decoded. |
|
|||
1434 | * This could for one of the following reasons : |
|
|||
1435 | * - The frame does not require a dictionary to be decoded (most common case). |
|
|||
1436 | * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. |
|
|||
1437 | * Note : this use case also happens when using a non-conformant dictionary. |
|
|||
1438 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). |
|
|||
1439 | * - This is not a Zstandard frame. |
|
|||
1440 | * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ |
|
|||
1441 | ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); |
|
|||
1442 |
|
||||
1443 | /*! ZSTD_DCtx_loadDictionary_byReference() : |
|
1545 | /*! ZSTD_DCtx_loadDictionary_byReference() : | |
1444 | * Same as ZSTD_DCtx_loadDictionary(), |
|
1546 | * Same as ZSTD_DCtx_loadDictionary(), | |
1445 | * but references `dict` content instead of copying it into `dctx`. |
|
1547 | * but references `dict` content instead of copying it into `dctx`. | |
@@ -1501,14 +1603,68 b' ZSTDLIB_API size_t ZSTD_decompressStream' | |||||
1501 | ********************************************************************/ |
|
1603 | ********************************************************************/ | |
1502 |
|
1604 | |||
1503 | /*===== Advanced Streaming compression functions =====*/ |
|
1605 | /*===== Advanced Streaming compression functions =====*/ | |
1504 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */ |
|
1606 | /**! ZSTD_initCStream_srcSize() : | |
1505 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/ |
|
1607 | * This function is deprecated, and equivalent to: | |
|
1608 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |||
|
1609 | * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) | |||
|
1610 | * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); | |||
|
1611 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |||
|
1612 | * | |||
|
1613 | * pledgedSrcSize must be correct. If it is not known at init time, use | |||
|
1614 | * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, | |||
|
1615 | * "0" also disables frame content size field. It may be enabled in the future. | |||
|
1616 | */ | |||
|
1617 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); | |||
|
1618 | /**! ZSTD_initCStream_usingDict() : | |||
|
1619 | * This function is deprecated, and is equivalent to: | |||
|
1620 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |||
|
1621 | * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); | |||
|
1622 | * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); | |||
|
1623 | * | |||
|
1624 | * Creates of an internal CDict (incompatible with static CCtx), except if | |||
|
1625 | * dict == NULL or dictSize < 8, in which case no dict is used. | |||
|
1626 | * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if | |||
|
1627 | * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. | |||
|
1628 | */ | |||
|
1629 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); | |||
|
1630 | /**! ZSTD_initCStream_advanced() : | |||
|
1631 | * This function is deprecated, and is approximately equivalent to: | |||
|
1632 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |||
|
1633 | * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is | |||
|
1634 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |||
|
1635 | * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); | |||
|
1636 | * | |||
|
1637 | * pledgedSrcSize must be correct. If srcSize is not known at init time, use | |||
|
1638 | * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. | |||
|
1639 | */ | |||
1506 | ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, |
|
1640 | ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, | |
1507 | ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. */ |
|
1641 | ZSTD_parameters params, unsigned long long pledgedSrcSize); | |
1508 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ |
|
1642 | /**! ZSTD_initCStream_usingCDict() : | |
1509 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters. pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. */ |
|
1643 | * This function is deprecated, and equivalent to: | |
|
1644 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |||
|
1645 | * ZSTD_CCtx_refCDict(zcs, cdict); | |||
|
1646 | * | |||
|
1647 | * note : cdict will just be referenced, and must outlive compression session | |||
|
1648 | */ | |||
|
1649 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); | |||
|
1650 | /**! ZSTD_initCStream_usingCDict_advanced() : | |||
|
1651 | * This function is deprecated, and is approximately equivalent to: | |||
|
1652 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |||
|
1653 | * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is | |||
|
1654 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |||
|
1655 | * ZSTD_CCtx_refCDict(zcs, cdict); | |||
|
1656 | * | |||
|
1657 | * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. | |||
|
1658 | * pledgedSrcSize must be correct. If srcSize is not known at init time, use | |||
|
1659 | * value ZSTD_CONTENTSIZE_UNKNOWN. | |||
|
1660 | */ | |||
|
1661 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); | |||
1510 |
|
1662 | |||
1511 | /*! ZSTD_resetCStream() : |
|
1663 | /*! ZSTD_resetCStream() : | |
|
1664 | * This function is deprecated, and is equivalent to: | |||
|
1665 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |||
|
1666 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |||
|
1667 | * | |||
1512 | * start a new frame, using same parameters from previous frame. |
|
1668 | * start a new frame, using same parameters from previous frame. | |
1513 | * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. |
|
1669 | * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. | |
1514 | * Note that zcs must be init at least once before using ZSTD_resetCStream(). |
|
1670 | * Note that zcs must be init at least once before using ZSTD_resetCStream(). | |
@@ -1555,9 +1711,32 b' ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_' | |||||
1555 |
|
1711 | |||
1556 |
|
1712 | |||
1557 | /*===== Advanced Streaming decompression functions =====*/ |
|
1713 | /*===== Advanced Streaming decompression functions =====*/ | |
1558 | ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */ |
|
1714 | /** | |
1559 | ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */ |
|
1715 | * This function is deprecated, and is equivalent to: | |
1560 | ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ |
|
1716 | * | |
|
1717 | * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); | |||
|
1718 | * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); | |||
|
1719 | * | |||
|
1720 | * note: no dictionary will be used if dict == NULL or dictSize < 8 | |||
|
1721 | */ | |||
|
1722 | ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); | |||
|
1723 | /** | |||
|
1724 | * This function is deprecated, and is equivalent to: | |||
|
1725 | * | |||
|
1726 | * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); | |||
|
1727 | * ZSTD_DCtx_refDDict(zds, ddict); | |||
|
1728 | * | |||
|
1729 | * note : ddict is referenced, it must outlive decompression session | |||
|
1730 | */ | |||
|
1731 | ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); | |||
|
1732 | /** | |||
|
1733 | * This function is deprecated, and is equivalent to: | |||
|
1734 | * | |||
|
1735 | * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); | |||
|
1736 | * | |||
|
1737 | * re-use decompression parameters from previous init; saves dictionary loading | |||
|
1738 | */ | |||
|
1739 | ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); | |||
1561 |
|
1740 | |||
1562 |
|
1741 | |||
1563 | /********************************************************************* |
|
1742 | /********************************************************************* | |
@@ -1696,7 +1875,7 b' typedef struct {' | |||||
1696 | unsigned checksumFlag; |
|
1875 | unsigned checksumFlag; | |
1697 | } ZSTD_frameHeader; |
|
1876 | } ZSTD_frameHeader; | |
1698 |
|
1877 | |||
1699 |
/* |
|
1878 | /*! ZSTD_getFrameHeader() : | |
1700 | * decode Frame Header, or requires larger `srcSize`. |
|
1879 | * decode Frame Header, or requires larger `srcSize`. | |
1701 | * @return : 0, `zfhPtr` is correctly filled, |
|
1880 | * @return : 0, `zfhPtr` is correctly filled, | |
1702 | * >0, `srcSize` is too small, value is wanted `srcSize` amount, |
|
1881 | * >0, `srcSize` is too small, value is wanted `srcSize` amount, | |
@@ -1730,7 +1909,7 b' ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne' | |||||
1730 | /*! |
|
1909 | /*! | |
1731 | Block functions produce and decode raw zstd blocks, without frame metadata. |
|
1910 | Block functions produce and decode raw zstd blocks, without frame metadata. | |
1732 | Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). |
|
1911 | Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). | |
1733 |
|
|
1912 | But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. | |
1734 |
|
1913 | |||
1735 | A few rules to respect : |
|
1914 | A few rules to respect : | |
1736 | - Compressing and decompressing require a context structure |
|
1915 | - Compressing and decompressing require a context structure | |
@@ -1741,12 +1920,14 b' ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne' | |||||
1741 | + copyCCtx() and copyDCtx() can be used too |
|
1920 | + copyCCtx() and copyDCtx() can be used too | |
1742 | - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB |
|
1921 | - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB | |
1743 | + If input is larger than a block size, it's necessary to split input data into multiple blocks |
|
1922 | + If input is larger than a block size, it's necessary to split input data into multiple blocks | |
1744 |
+ For inputs larger than a single block, |
|
1923 | + For inputs larger than a single block, consider using regular ZSTD_compress() instead. | |
1745 | Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. |
|
1924 | Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. | |
1746 |
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be |
|
1925 | - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! | |
1747 | In which case, nothing is produced into `dst` ! |
|
1926 | ===> In which case, nothing is produced into `dst` ! | |
1748 | + User must test for such outcome and deal directly with uncompressed data |
|
1927 | + User __must__ test for such outcome and deal directly with uncompressed data | |
1749 | + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! |
|
1928 | + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. | |
|
1929 | Doing so would mess up with statistics history, leading to potential data corruption. | |||
|
1930 | + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! | |||
1750 | + In case of multiple successive blocks, should some of them be uncompressed, |
|
1931 | + In case of multiple successive blocks, should some of them be uncompressed, | |
1751 | decoder must be informed of their existence in order to follow proper history. |
|
1932 | decoder must be informed of their existence in order to follow proper history. | |
1752 | Use ZSTD_insertBlock() for such a case. |
|
1933 | Use ZSTD_insertBlock() for such a case. |
@@ -169,7 +169,7 b' checking zstd options' | |||||
169 | > done |
|
169 | > done | |
170 |
|
170 | |||
171 | $ $RUNTESTDIR/f -s zstd-*/.hg/store/data/* |
|
171 | $ $RUNTESTDIR/f -s zstd-*/.hg/store/data/* | |
172 |
zstd-level-1/.hg/store/data/a.i: size=4 |
|
172 | zstd-level-1/.hg/store/data/a.i: size=4114 | |
173 | zstd-level-22/.hg/store/data/a.i: size=4091 |
|
173 | zstd-level-22/.hg/store/data/a.i: size=4091 | |
174 | zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re) |
|
174 | zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re) | |
175 |
|
175 |
General Comments 0
You need to be logged in to leave comments.
Login now