##// END OF EJS Templates
zstandard: vendor python-zstandard 0.11...
Gregory Szorc -
r42237:675775c3 default
parent child Browse files
Show More
@@ -0,0 +1,240 b''
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* zstd_ddict.c :
12 * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13
14 /*-*******************************************************
15 * Dependencies
16 *********************************************************/
17 #include <string.h> /* memcpy, memmove, memset */
18 #include "cpu.h" /* bmi2 */
19 #include "mem.h" /* low level memory routines */
20 #define FSE_STATIC_LINKING_ONLY
21 #include "fse.h"
22 #define HUF_STATIC_LINKING_ONLY
23 #include "huf.h"
24 #include "zstd_decompress_internal.h"
25 #include "zstd_ddict.h"
26
27 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28 # include "zstd_legacy.h"
29 #endif
30
31
32
33 /*-*******************************************************
34 * Types
35 *********************************************************/
36 struct ZSTD_DDict_s {
37 void* dictBuffer;
38 const void* dictContent;
39 size_t dictSize;
40 ZSTD_entropyDTables_t entropy;
41 U32 dictID;
42 U32 entropyPresent;
43 ZSTD_customMem cMem;
44 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
45
46 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47 {
48 assert(ddict != NULL);
49 return ddict->dictContent;
50 }
51
52 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53 {
54 assert(ddict != NULL);
55 return ddict->dictSize;
56 }
57
58 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59 {
60 DEBUGLOG(4, "ZSTD_copyDDictParameters");
61 assert(dctx != NULL);
62 assert(ddict != NULL);
63 dctx->dictID = ddict->dictID;
64 dctx->prefixStart = ddict->dictContent;
65 dctx->virtualStart = ddict->dictContent;
66 dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67 dctx->previousDstEnd = dctx->dictEnd;
68 if (ddict->entropyPresent) {
69 dctx->litEntropy = 1;
70 dctx->fseEntropy = 1;
71 dctx->LLTptr = ddict->entropy.LLTable;
72 dctx->MLTptr = ddict->entropy.MLTable;
73 dctx->OFTptr = ddict->entropy.OFTable;
74 dctx->HUFptr = ddict->entropy.hufTable;
75 dctx->entropy.rep[0] = ddict->entropy.rep[0];
76 dctx->entropy.rep[1] = ddict->entropy.rep[1];
77 dctx->entropy.rep[2] = ddict->entropy.rep[2];
78 } else {
79 dctx->litEntropy = 0;
80 dctx->fseEntropy = 0;
81 }
82 }
83
84
85 static size_t
86 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
87 ZSTD_dictContentType_e dictContentType)
88 {
89 ddict->dictID = 0;
90 ddict->entropyPresent = 0;
91 if (dictContentType == ZSTD_dct_rawContent) return 0;
92
93 if (ddict->dictSize < 8) {
94 if (dictContentType == ZSTD_dct_fullDict)
95 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
96 return 0; /* pure content mode */
97 }
98 { U32 const magic = MEM_readLE32(ddict->dictContent);
99 if (magic != ZSTD_MAGIC_DICTIONARY) {
100 if (dictContentType == ZSTD_dct_fullDict)
101 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
102 return 0; /* pure content mode */
103 }
104 }
105 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
106
107 /* load entropy tables */
108 CHECK_E( ZSTD_loadDEntropy(&ddict->entropy,
109 ddict->dictContent, ddict->dictSize),
110 dictionary_corrupted );
111 ddict->entropyPresent = 1;
112 return 0;
113 }
114
115
116 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
117 const void* dict, size_t dictSize,
118 ZSTD_dictLoadMethod_e dictLoadMethod,
119 ZSTD_dictContentType_e dictContentType)
120 {
121 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
122 ddict->dictBuffer = NULL;
123 ddict->dictContent = dict;
124 if (!dict) dictSize = 0;
125 } else {
126 void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
127 ddict->dictBuffer = internalBuffer;
128 ddict->dictContent = internalBuffer;
129 if (!internalBuffer) return ERROR(memory_allocation);
130 memcpy(internalBuffer, dict, dictSize);
131 }
132 ddict->dictSize = dictSize;
133 ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
134
135 /* parse dictionary content */
136 CHECK_F( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
137
138 return 0;
139 }
140
141 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
142 ZSTD_dictLoadMethod_e dictLoadMethod,
143 ZSTD_dictContentType_e dictContentType,
144 ZSTD_customMem customMem)
145 {
146 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
147
148 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
149 if (ddict == NULL) return NULL;
150 ddict->cMem = customMem;
151 { size_t const initResult = ZSTD_initDDict_internal(ddict,
152 dict, dictSize,
153 dictLoadMethod, dictContentType);
154 if (ZSTD_isError(initResult)) {
155 ZSTD_freeDDict(ddict);
156 return NULL;
157 } }
158 return ddict;
159 }
160 }
161
162 /*! ZSTD_createDDict() :
163 * Create a digested dictionary, to start decompression without startup delay.
164 * `dict` content is copied inside DDict.
165 * Consequently, `dict` can be released after `ZSTD_DDict` creation */
166 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
167 {
168 ZSTD_customMem const allocator = { NULL, NULL, NULL };
169 return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
170 }
171
172 /*! ZSTD_createDDict_byReference() :
173 * Create a digested dictionary, to start decompression without startup delay.
174 * Dictionary content is simply referenced, it will be accessed during decompression.
175 * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
176 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
177 {
178 ZSTD_customMem const allocator = { NULL, NULL, NULL };
179 return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
180 }
181
182
183 const ZSTD_DDict* ZSTD_initStaticDDict(
184 void* sBuffer, size_t sBufferSize,
185 const void* dict, size_t dictSize,
186 ZSTD_dictLoadMethod_e dictLoadMethod,
187 ZSTD_dictContentType_e dictContentType)
188 {
189 size_t const neededSpace = sizeof(ZSTD_DDict)
190 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
191 ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
192 assert(sBuffer != NULL);
193 assert(dict != NULL);
194 if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
195 if (sBufferSize < neededSpace) return NULL;
196 if (dictLoadMethod == ZSTD_dlm_byCopy) {
197 memcpy(ddict+1, dict, dictSize); /* local copy */
198 dict = ddict+1;
199 }
200 if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
201 dict, dictSize,
202 ZSTD_dlm_byRef, dictContentType) ))
203 return NULL;
204 return ddict;
205 }
206
207
208 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
209 {
210 if (ddict==NULL) return 0; /* support free on NULL */
211 { ZSTD_customMem const cMem = ddict->cMem;
212 ZSTD_free(ddict->dictBuffer, cMem);
213 ZSTD_free(ddict, cMem);
214 return 0;
215 }
216 }
217
218 /*! ZSTD_estimateDDictSize() :
219 * Estimate amount of memory that will be needed to create a dictionary for decompression.
220 * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
221 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
222 {
223 return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
224 }
225
226 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
227 {
228 if (ddict==NULL) return 0; /* support sizeof on NULL */
229 return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
230 }
231
232 /*! ZSTD_getDictID_fromDDict() :
233 * Provides the dictID of the dictionary loaded into `ddict`.
234 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
235 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
236 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
237 {
238 if (ddict==NULL) return 0;
239 return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
240 }
@@ -0,0 +1,44 b''
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12 #ifndef ZSTD_DDICT_H
13 #define ZSTD_DDICT_H
14
15 /*-*******************************************************
16 * Dependencies
17 *********************************************************/
18 #include <stddef.h> /* size_t */
19 #include "zstd.h" /* ZSTD_DDict, and several public functions */
20
21
22 /*-*******************************************************
23 * Interface
24 *********************************************************/
25
26 /* note: several prototypes are already published in `zstd.h` :
27 * ZSTD_createDDict()
28 * ZSTD_createDDict_byReference()
29 * ZSTD_createDDict_advanced()
30 * ZSTD_freeDDict()
31 * ZSTD_initStaticDDict()
32 * ZSTD_sizeof_DDict()
33 * ZSTD_estimateDDictSize()
34 * ZSTD_getDictID_fromDict()
35 */
36
37 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
38 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
39
40 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
41
42
43
44 #endif /* ZSTD_DDICT_H */
This diff has been collapsed as it changes many lines, (1307 lines changed) Show them Hide them
@@ -0,0 +1,1307 b''
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* zstd_decompress_block :
12 * this module takes care of decompressing _compressed_ block */
13
14 /*-*******************************************************
15 * Dependencies
16 *********************************************************/
17 #include <string.h> /* memcpy, memmove, memset */
18 #include "compiler.h" /* prefetch */
19 #include "cpu.h" /* bmi2 */
20 #include "mem.h" /* low level memory routines */
21 #define FSE_STATIC_LINKING_ONLY
22 #include "fse.h"
23 #define HUF_STATIC_LINKING_ONLY
24 #include "huf.h"
25 #include "zstd_internal.h"
26 #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27 #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28 #include "zstd_decompress_block.h"
29
30 /*_*******************************************************
31 * Macros
32 **********************************************************/
33
34 /* These two optional macros force the use one way or another of the two
35 * ZSTD_decompressSequences implementations. You can't force in both directions
36 * at the same time.
37 */
38 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39 defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41 #endif
42
43
44 /*_*******************************************************
45 * Memory operations
46 **********************************************************/
47 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
48
49
50 /*-*************************************************************
51 * Block decoding
52 ***************************************************************/
53
54 /*! ZSTD_getcBlockSize() :
55 * Provides the size of compressed block from block header `src` */
56 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57 blockProperties_t* bpPtr)
58 {
59 if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
60 { U32 const cBlockHeader = MEM_readLE24(src);
61 U32 const cSize = cBlockHeader >> 3;
62 bpPtr->lastBlock = cBlockHeader & 1;
63 bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
64 bpPtr->origSize = cSize; /* only useful for RLE */
65 if (bpPtr->blockType == bt_rle) return 1;
66 if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
67 return cSize;
68 }
69 }
70
71
72 /* Hidden declaration for fullbench */
73 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
74 const void* src, size_t srcSize);
75 /*! ZSTD_decodeLiteralsBlock() :
76 * @return : nb of bytes read from src (< srcSize )
77 * note : symbol not declared but exposed for fullbench */
78 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
80 {
81 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
82
83 { const BYTE* const istart = (const BYTE*) src;
84 symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
85
86 switch(litEncType)
87 {
88 case set_repeat:
89 if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
90 /* fall-through */
91
92 case set_compressed:
93 if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
94 { size_t lhSize, litSize, litCSize;
95 U32 singleStream=0;
96 U32 const lhlCode = (istart[0] >> 2) & 3;
97 U32 const lhc = MEM_readLE32(istart);
98 size_t hufSuccess;
99 switch(lhlCode)
100 {
101 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
102 /* 2 - 2 - 10 - 10 */
103 singleStream = !lhlCode;
104 lhSize = 3;
105 litSize = (lhc >> 4) & 0x3FF;
106 litCSize = (lhc >> 14) & 0x3FF;
107 break;
108 case 2:
109 /* 2 - 2 - 14 - 14 */
110 lhSize = 4;
111 litSize = (lhc >> 4) & 0x3FFF;
112 litCSize = lhc >> 18;
113 break;
114 case 3:
115 /* 2 - 2 - 18 - 18 */
116 lhSize = 5;
117 litSize = (lhc >> 4) & 0x3FFFF;
118 litCSize = (lhc >> 22) + (istart[4] << 10);
119 break;
120 }
121 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
122 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
123
124 /* prefetch huffman table if cold */
125 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
126 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
127 }
128
129 if (litEncType==set_repeat) {
130 if (singleStream) {
131 hufSuccess = HUF_decompress1X_usingDTable_bmi2(
132 dctx->litBuffer, litSize, istart+lhSize, litCSize,
133 dctx->HUFptr, dctx->bmi2);
134 } else {
135 hufSuccess = HUF_decompress4X_usingDTable_bmi2(
136 dctx->litBuffer, litSize, istart+lhSize, litCSize,
137 dctx->HUFptr, dctx->bmi2);
138 }
139 } else {
140 if (singleStream) {
141 #if defined(HUF_FORCE_DECOMPRESS_X2)
142 hufSuccess = HUF_decompress1X_DCtx_wksp(
143 dctx->entropy.hufTable, dctx->litBuffer, litSize,
144 istart+lhSize, litCSize, dctx->workspace,
145 sizeof(dctx->workspace));
146 #else
147 hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
148 dctx->entropy.hufTable, dctx->litBuffer, litSize,
149 istart+lhSize, litCSize, dctx->workspace,
150 sizeof(dctx->workspace), dctx->bmi2);
151 #endif
152 } else {
153 hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
154 dctx->entropy.hufTable, dctx->litBuffer, litSize,
155 istart+lhSize, litCSize, dctx->workspace,
156 sizeof(dctx->workspace), dctx->bmi2);
157 }
158 }
159
160 if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
161
162 dctx->litPtr = dctx->litBuffer;
163 dctx->litSize = litSize;
164 dctx->litEntropy = 1;
165 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
166 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
167 return litCSize + lhSize;
168 }
169
170 case set_basic:
171 { size_t litSize, lhSize;
172 U32 const lhlCode = ((istart[0]) >> 2) & 3;
173 switch(lhlCode)
174 {
175 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
176 lhSize = 1;
177 litSize = istart[0] >> 3;
178 break;
179 case 1:
180 lhSize = 2;
181 litSize = MEM_readLE16(istart) >> 4;
182 break;
183 case 3:
184 lhSize = 3;
185 litSize = MEM_readLE24(istart) >> 4;
186 break;
187 }
188
189 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
190 if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
191 memcpy(dctx->litBuffer, istart+lhSize, litSize);
192 dctx->litPtr = dctx->litBuffer;
193 dctx->litSize = litSize;
194 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
195 return lhSize+litSize;
196 }
197 /* direct reference into compressed stream */
198 dctx->litPtr = istart+lhSize;
199 dctx->litSize = litSize;
200 return lhSize+litSize;
201 }
202
203 case set_rle:
204 { U32 const lhlCode = ((istart[0]) >> 2) & 3;
205 size_t litSize, lhSize;
206 switch(lhlCode)
207 {
208 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
209 lhSize = 1;
210 litSize = istart[0] >> 3;
211 break;
212 case 1:
213 lhSize = 2;
214 litSize = MEM_readLE16(istart) >> 4;
215 break;
216 case 3:
217 lhSize = 3;
218 litSize = MEM_readLE24(istart) >> 4;
219 if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
220 break;
221 }
222 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
223 memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
224 dctx->litPtr = dctx->litBuffer;
225 dctx->litSize = litSize;
226 return lhSize+1;
227 }
228 default:
229 return ERROR(corruption_detected); /* impossible */
230 }
231 }
232 }
233
234 /* Default FSE distribution tables.
235 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
236 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
237 * They were generated programmatically with following method :
238 * - start from default distributions, present in /lib/common/zstd_internal.h
239 * - generate tables normally, using ZSTD_buildFSETable()
240 * - printout the content of tables
241 * - pretify output, report below, test with fuzzer to ensure it's correct */
242
243 /* Default FSE distribution table for Literal Lengths */
244 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
245 { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
246 /* nextState, nbAddBits, nbBits, baseVal */
247 { 0, 0, 4, 0}, { 16, 0, 4, 0},
248 { 32, 0, 5, 1}, { 0, 0, 5, 3},
249 { 0, 0, 5, 4}, { 0, 0, 5, 6},
250 { 0, 0, 5, 7}, { 0, 0, 5, 9},
251 { 0, 0, 5, 10}, { 0, 0, 5, 12},
252 { 0, 0, 6, 14}, { 0, 1, 5, 16},
253 { 0, 1, 5, 20}, { 0, 1, 5, 22},
254 { 0, 2, 5, 28}, { 0, 3, 5, 32},
255 { 0, 4, 5, 48}, { 32, 6, 5, 64},
256 { 0, 7, 5, 128}, { 0, 8, 6, 256},
257 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
258 { 32, 0, 4, 0}, { 0, 0, 4, 1},
259 { 0, 0, 5, 2}, { 32, 0, 5, 4},
260 { 0, 0, 5, 5}, { 32, 0, 5, 7},
261 { 0, 0, 5, 8}, { 32, 0, 5, 10},
262 { 0, 0, 5, 11}, { 0, 0, 6, 13},
263 { 32, 1, 5, 16}, { 0, 1, 5, 18},
264 { 32, 1, 5, 22}, { 0, 2, 5, 24},
265 { 32, 3, 5, 32}, { 0, 3, 5, 40},
266 { 0, 6, 4, 64}, { 16, 6, 4, 64},
267 { 32, 7, 5, 128}, { 0, 9, 6, 512},
268 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
269 { 16, 0, 4, 1}, { 32, 0, 5, 2},
270 { 32, 0, 5, 3}, { 32, 0, 5, 5},
271 { 32, 0, 5, 6}, { 32, 0, 5, 8},
272 { 32, 0, 5, 9}, { 32, 0, 5, 11},
273 { 32, 0, 5, 12}, { 0, 0, 6, 15},
274 { 32, 1, 5, 18}, { 32, 1, 5, 20},
275 { 32, 2, 5, 24}, { 32, 2, 5, 28},
276 { 32, 3, 5, 40}, { 32, 4, 5, 48},
277 { 0, 16, 6,65536}, { 0, 15, 6,32768},
278 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
279 }; /* LL_defaultDTable */
280
281 /* Default FSE distribution table for Offset Codes */
282 static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
283 { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
284 /* nextState, nbAddBits, nbBits, baseVal */
285 { 0, 0, 5, 0}, { 0, 6, 4, 61},
286 { 0, 9, 5, 509}, { 0, 15, 5,32765},
287 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
288 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
289 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
290 { 0, 5, 5, 29}, { 0, 8, 4, 253},
291 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
292 { 0, 2, 5, 1}, { 16, 7, 4, 125},
293 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
294 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
295 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
296 { 0, 19, 5,524285}, { 0, 1, 5, 1},
297 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
298 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
299 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
300 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
301 }; /* OF_defaultDTable */
302
303
304 /* Default FSE distribution table for Match Lengths */
305 static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
306 { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
307 /* nextState, nbAddBits, nbBits, baseVal */
308 { 0, 0, 6, 3}, { 0, 0, 4, 4},
309 { 32, 0, 5, 5}, { 0, 0, 5, 6},
310 { 0, 0, 5, 8}, { 0, 0, 5, 9},
311 { 0, 0, 5, 11}, { 0, 0, 6, 13},
312 { 0, 0, 6, 16}, { 0, 0, 6, 19},
313 { 0, 0, 6, 22}, { 0, 0, 6, 25},
314 { 0, 0, 6, 28}, { 0, 0, 6, 31},
315 { 0, 0, 6, 34}, { 0, 1, 6, 37},
316 { 0, 1, 6, 41}, { 0, 2, 6, 47},
317 { 0, 3, 6, 59}, { 0, 4, 6, 83},
318 { 0, 7, 6, 131}, { 0, 9, 6, 515},
319 { 16, 0, 4, 4}, { 0, 0, 4, 5},
320 { 32, 0, 5, 6}, { 0, 0, 5, 7},
321 { 32, 0, 5, 9}, { 0, 0, 5, 10},
322 { 0, 0, 6, 12}, { 0, 0, 6, 15},
323 { 0, 0, 6, 18}, { 0, 0, 6, 21},
324 { 0, 0, 6, 24}, { 0, 0, 6, 27},
325 { 0, 0, 6, 30}, { 0, 0, 6, 33},
326 { 0, 1, 6, 35}, { 0, 1, 6, 39},
327 { 0, 2, 6, 43}, { 0, 3, 6, 51},
328 { 0, 4, 6, 67}, { 0, 5, 6, 99},
329 { 0, 8, 6, 259}, { 32, 0, 4, 4},
330 { 48, 0, 4, 4}, { 16, 0, 4, 5},
331 { 32, 0, 5, 7}, { 32, 0, 5, 8},
332 { 32, 0, 5, 10}, { 32, 0, 5, 11},
333 { 0, 0, 6, 14}, { 0, 0, 6, 17},
334 { 0, 0, 6, 20}, { 0, 0, 6, 23},
335 { 0, 0, 6, 26}, { 0, 0, 6, 29},
336 { 0, 0, 6, 32}, { 0, 16, 6,65539},
337 { 0, 15, 6,32771}, { 0, 14, 6,16387},
338 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
339 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
340 }; /* ML_defaultDTable */
341
342
343 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
344 {
345 void* ptr = dt;
346 ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
347 ZSTD_seqSymbol* const cell = dt + 1;
348
349 DTableH->tableLog = 0;
350 DTableH->fastMode = 0;
351
352 cell->nbBits = 0;
353 cell->nextState = 0;
354 assert(nbAddBits < 255);
355 cell->nbAdditionalBits = (BYTE)nbAddBits;
356 cell->baseValue = baseValue;
357 }
358
359
360 /* ZSTD_buildFSETable() :
361 * generate FSE decoding table for one symbol (ll, ml or off)
362 * cannot fail if input is valid =>
363 * all inputs are presumed validated at this stage */
364 void
365 ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
366 const short* normalizedCounter, unsigned maxSymbolValue,
367 const U32* baseValue, const U32* nbAdditionalBits,
368 unsigned tableLog)
369 {
370 ZSTD_seqSymbol* const tableDecode = dt+1;
371 U16 symbolNext[MaxSeq+1];
372
373 U32 const maxSV1 = maxSymbolValue + 1;
374 U32 const tableSize = 1 << tableLog;
375 U32 highThreshold = tableSize-1;
376
377 /* Sanity Checks */
378 assert(maxSymbolValue <= MaxSeq);
379 assert(tableLog <= MaxFSELog);
380
381 /* Init, lay down lowprob symbols */
382 { ZSTD_seqSymbol_header DTableH;
383 DTableH.tableLog = tableLog;
384 DTableH.fastMode = 1;
385 { S16 const largeLimit= (S16)(1 << (tableLog-1));
386 U32 s;
387 for (s=0; s<maxSV1; s++) {
388 if (normalizedCounter[s]==-1) {
389 tableDecode[highThreshold--].baseValue = s;
390 symbolNext[s] = 1;
391 } else {
392 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
393 symbolNext[s] = normalizedCounter[s];
394 } } }
395 memcpy(dt, &DTableH, sizeof(DTableH));
396 }
397
398 /* Spread symbols */
399 { U32 const tableMask = tableSize-1;
400 U32 const step = FSE_TABLESTEP(tableSize);
401 U32 s, position = 0;
402 for (s=0; s<maxSV1; s++) {
403 int i;
404 for (i=0; i<normalizedCounter[s]; i++) {
405 tableDecode[position].baseValue = s;
406 position = (position + step) & tableMask;
407 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
408 } }
409 assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
410 }
411
412 /* Build Decoding table */
413 { U32 u;
414 for (u=0; u<tableSize; u++) {
415 U32 const symbol = tableDecode[u].baseValue;
416 U32 const nextState = symbolNext[symbol]++;
417 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
418 tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
419 assert(nbAdditionalBits[symbol] < 255);
420 tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
421 tableDecode[u].baseValue = baseValue[symbol];
422 } }
423 }
424
425
426 /*! ZSTD_buildSeqTable() :
427 * @return : nb bytes read from src,
428 * or an error code if it fails */
429 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
430 symbolEncodingType_e type, unsigned max, U32 maxLog,
431 const void* src, size_t srcSize,
432 const U32* baseValue, const U32* nbAdditionalBits,
433 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
434 int ddictIsCold, int nbSeq)
435 {
436 switch(type)
437 {
438 case set_rle :
439 if (!srcSize) return ERROR(srcSize_wrong);
440 if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
441 { U32 const symbol = *(const BYTE*)src;
442 U32 const baseline = baseValue[symbol];
443 U32 const nbBits = nbAdditionalBits[symbol];
444 ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
445 }
446 *DTablePtr = DTableSpace;
447 return 1;
448 case set_basic :
449 *DTablePtr = defaultTable;
450 return 0;
451 case set_repeat:
452 if (!flagRepeatTable) return ERROR(corruption_detected);
453 /* prefetch FSE table if used */
454 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
455 const void* const pStart = *DTablePtr;
456 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
457 PREFETCH_AREA(pStart, pSize);
458 }
459 return 0;
460 case set_compressed :
461 { unsigned tableLog;
462 S16 norm[MaxSeq+1];
463 size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
464 if (FSE_isError(headerSize)) return ERROR(corruption_detected);
465 if (tableLog > maxLog) return ERROR(corruption_detected);
466 ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
467 *DTablePtr = DTableSpace;
468 return headerSize;
469 }
470 default : /* impossible */
471 assert(0);
472 return ERROR(GENERIC);
473 }
474 }
475
476 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
477 const void* src, size_t srcSize)
478 {
479 const BYTE* const istart = (const BYTE* const)src;
480 const BYTE* const iend = istart + srcSize;
481 const BYTE* ip = istart;
482 int nbSeq;
483 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
484
485 /* check */
486 if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
487
488 /* SeqHead */
489 nbSeq = *ip++;
490 if (!nbSeq) {
491 *nbSeqPtr=0;
492 if (srcSize != 1) return ERROR(srcSize_wrong);
493 return 1;
494 }
495 if (nbSeq > 0x7F) {
496 if (nbSeq == 0xFF) {
497 if (ip+2 > iend) return ERROR(srcSize_wrong);
498 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
499 } else {
500 if (ip >= iend) return ERROR(srcSize_wrong);
501 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
502 }
503 }
504 *nbSeqPtr = nbSeq;
505
506 /* FSE table descriptors */
507 if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
508 { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
509 symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
510 symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
511 ip++;
512
513 /* Build DTables */
514 { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
515 LLtype, MaxLL, LLFSELog,
516 ip, iend-ip,
517 LL_base, LL_bits,
518 LL_defaultDTable, dctx->fseEntropy,
519 dctx->ddictIsCold, nbSeq);
520 if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
521 ip += llhSize;
522 }
523
524 { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
525 OFtype, MaxOff, OffFSELog,
526 ip, iend-ip,
527 OF_base, OF_bits,
528 OF_defaultDTable, dctx->fseEntropy,
529 dctx->ddictIsCold, nbSeq);
530 if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
531 ip += ofhSize;
532 }
533
534 { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
535 MLtype, MaxML, MLFSELog,
536 ip, iend-ip,
537 ML_base, ML_bits,
538 ML_defaultDTable, dctx->fseEntropy,
539 dctx->ddictIsCold, nbSeq);
540 if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
541 ip += mlhSize;
542 }
543 }
544
545 return ip-istart;
546 }
547
548
549 typedef struct {
550 size_t litLength;
551 size_t matchLength;
552 size_t offset;
553 const BYTE* match;
554 } seq_t;
555
556 typedef struct {
557 size_t state;
558 const ZSTD_seqSymbol* table;
559 } ZSTD_fseState;
560
561 typedef struct {
562 BIT_DStream_t DStream;
563 ZSTD_fseState stateLL;
564 ZSTD_fseState stateOffb;
565 ZSTD_fseState stateML;
566 size_t prevOffset[ZSTD_REP_NUM];
567 const BYTE* prefixStart;
568 const BYTE* dictEnd;
569 size_t pos;
570 } seqState_t;
571
572
573 /* ZSTD_execSequenceLast7():
574 * exceptional case : decompress a match starting within last 7 bytes of output buffer.
575 * requires more careful checks, to ensure there is no overflow.
576 * performance does not matter though.
577 * note : this case is supposed to be never generated "naturally" by reference encoder,
578 * since in most cases it needs at least 8 bytes to look for a match.
579 * but it's allowed by the specification. */
580 FORCE_NOINLINE
581 size_t ZSTD_execSequenceLast7(BYTE* op,
582 BYTE* const oend, seq_t sequence,
583 const BYTE** litPtr, const BYTE* const litLimit,
584 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
585 {
586 BYTE* const oLitEnd = op + sequence.litLength;
587 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
588 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
589 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
590 const BYTE* match = oLitEnd - sequence.offset;
591
592 /* check */
593 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */
594 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */
595
596 /* copy literals */
597 while (op < oLitEnd) *op++ = *(*litPtr)++;
598
599 /* copy Match */
600 if (sequence.offset > (size_t)(oLitEnd - base)) {
601 /* offset beyond prefix */
602 if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
603 match = dictEnd - (base-match);
604 if (match + sequence.matchLength <= dictEnd) {
605 memmove(oLitEnd, match, sequence.matchLength);
606 return sequenceLength;
607 }
608 /* span extDict & currentPrefixSegment */
609 { size_t const length1 = dictEnd - match;
610 memmove(oLitEnd, match, length1);
611 op = oLitEnd + length1;
612 sequence.matchLength -= length1;
613 match = base;
614 } }
615 while (op < oMatchEnd) *op++ = *match++;
616 return sequenceLength;
617 }
618
619
620 HINT_INLINE
621 size_t ZSTD_execSequence(BYTE* op,
622 BYTE* const oend, seq_t sequence,
623 const BYTE** litPtr, const BYTE* const litLimit,
624 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
625 {
626 BYTE* const oLitEnd = op + sequence.litLength;
627 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
628 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
629 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
630 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
631 const BYTE* match = oLitEnd - sequence.offset;
632
633 /* check */
634 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
635 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
636 if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
637
638 /* copy Literals */
639 ZSTD_copy8(op, *litPtr);
640 if (sequence.litLength > 8)
641 ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
642 op = oLitEnd;
643 *litPtr = iLitEnd; /* update for next sequence */
644
645 /* copy Match */
646 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
647 /* offset beyond prefix -> go into extDict */
648 if (sequence.offset > (size_t)(oLitEnd - virtualStart))
649 return ERROR(corruption_detected);
650 match = dictEnd + (match - prefixStart);
651 if (match + sequence.matchLength <= dictEnd) {
652 memmove(oLitEnd, match, sequence.matchLength);
653 return sequenceLength;
654 }
655 /* span extDict & currentPrefixSegment */
656 { size_t const length1 = dictEnd - match;
657 memmove(oLitEnd, match, length1);
658 op = oLitEnd + length1;
659 sequence.matchLength -= length1;
660 match = prefixStart;
661 if (op > oend_w || sequence.matchLength < MINMATCH) {
662 U32 i;
663 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
664 return sequenceLength;
665 }
666 } }
667 /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
668
669 /* match within prefix */
670 if (sequence.offset < 8) {
671 /* close range match, overlap */
672 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
673 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
674 int const sub2 = dec64table[sequence.offset];
675 op[0] = match[0];
676 op[1] = match[1];
677 op[2] = match[2];
678 op[3] = match[3];
679 match += dec32table[sequence.offset];
680 ZSTD_copy4(op+4, match);
681 match -= sub2;
682 } else {
683 ZSTD_copy8(op, match);
684 }
685 op += 8; match += 8;
686
687 if (oMatchEnd > oend-(16-MINMATCH)) {
688 if (op < oend_w) {
689 ZSTD_wildcopy(op, match, oend_w - op);
690 match += oend_w - op;
691 op = oend_w;
692 }
693 while (op < oMatchEnd) *op++ = *match++;
694 } else {
695 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
696 }
697 return sequenceLength;
698 }
699
700
701 HINT_INLINE
702 size_t ZSTD_execSequenceLong(BYTE* op,
703 BYTE* const oend, seq_t sequence,
704 const BYTE** litPtr, const BYTE* const litLimit,
705 const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
706 {
707 BYTE* const oLitEnd = op + sequence.litLength;
708 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
709 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
710 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
711 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
712 const BYTE* match = sequence.match;
713
714 /* check */
715 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
716 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
717 if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718
719 /* copy Literals */
720 ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
721 if (sequence.litLength > 8)
722 ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723 op = oLitEnd;
724 *litPtr = iLitEnd; /* update for next sequence */
725
726 /* copy Match */
727 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
728 /* offset beyond prefix */
729 if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
730 if (match + sequence.matchLength <= dictEnd) {
731 memmove(oLitEnd, match, sequence.matchLength);
732 return sequenceLength;
733 }
734 /* span extDict & currentPrefixSegment */
735 { size_t const length1 = dictEnd - match;
736 memmove(oLitEnd, match, length1);
737 op = oLitEnd + length1;
738 sequence.matchLength -= length1;
739 match = prefixStart;
740 if (op > oend_w || sequence.matchLength < MINMATCH) {
741 U32 i;
742 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
743 return sequenceLength;
744 }
745 } }
746 assert(op <= oend_w);
747 assert(sequence.matchLength >= MINMATCH);
748
749 /* match within prefix */
750 if (sequence.offset < 8) {
751 /* close range match, overlap */
752 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
753 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
754 int const sub2 = dec64table[sequence.offset];
755 op[0] = match[0];
756 op[1] = match[1];
757 op[2] = match[2];
758 op[3] = match[3];
759 match += dec32table[sequence.offset];
760 ZSTD_copy4(op+4, match);
761 match -= sub2;
762 } else {
763 ZSTD_copy8(op, match);
764 }
765 op += 8; match += 8;
766
767 if (oMatchEnd > oend-(16-MINMATCH)) {
768 if (op < oend_w) {
769 ZSTD_wildcopy(op, match, oend_w - op);
770 match += oend_w - op;
771 op = oend_w;
772 }
773 while (op < oMatchEnd) *op++ = *match++;
774 } else {
775 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
776 }
777 return sequenceLength;
778 }
779
780 static void
781 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
782 {
783 const void* ptr = dt;
784 const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
785 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
786 DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
787 (U32)DStatePtr->state, DTableH->tableLog);
788 BIT_reloadDStream(bitD);
789 DStatePtr->table = dt + 1;
790 }
791
792 FORCE_INLINE_TEMPLATE void
793 ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
794 {
795 ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
796 U32 const nbBits = DInfo.nbBits;
797 size_t const lowBits = BIT_readBits(bitD, nbBits);
798 DStatePtr->state = DInfo.nextState + lowBits;
799 }
800
801 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802 * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803 * bits before reloading. This value is the maximum number of bytes we read
804 * after reloading when we are decoding long offets.
805 */
806 #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
807 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
808 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
809 : 0)
810
811 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
812
813 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
814 FORCE_INLINE_TEMPLATE seq_t
815 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
816 {
817 seq_t seq;
818 U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
819 U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
820 U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
821 U32 const totalBits = llBits+mlBits+ofBits;
822 U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
823 U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
824 U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
825
826 /* sequence */
827 { size_t offset;
828 if (!ofBits)
829 offset = 0;
830 else {
831 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
832 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
833 assert(ofBits <= MaxOff);
834 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
835 U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
836 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
837 BIT_reloadDStream(&seqState->DStream);
838 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
839 assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
840 } else {
841 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
842 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
843 }
844 }
845
846 if (ofBits <= 1) {
847 offset += (llBase==0);
848 if (offset) {
849 size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
850 temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
851 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
852 seqState->prevOffset[1] = seqState->prevOffset[0];
853 seqState->prevOffset[0] = offset = temp;
854 } else { /* offset == 0 */
855 offset = seqState->prevOffset[0];
856 }
857 } else {
858 seqState->prevOffset[2] = seqState->prevOffset[1];
859 seqState->prevOffset[1] = seqState->prevOffset[0];
860 seqState->prevOffset[0] = offset;
861 }
862 seq.offset = offset;
863 }
864
865 seq.matchLength = mlBase
866 + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
867 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
868 BIT_reloadDStream(&seqState->DStream);
869 if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
870 BIT_reloadDStream(&seqState->DStream);
871 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
872 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
873
874 seq.litLength = llBase
875 + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
876 if (MEM_32bits())
877 BIT_reloadDStream(&seqState->DStream);
878
879 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
880 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
881
882 /* ANS state update */
883 ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
884 ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
885 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
886 ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
887
888 return seq;
889 }
890
891 FORCE_INLINE_TEMPLATE size_t
892 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
893 void* dst, size_t maxDstSize,
894 const void* seqStart, size_t seqSize, int nbSeq,
895 const ZSTD_longOffset_e isLongOffset)
896 {
897 const BYTE* ip = (const BYTE*)seqStart;
898 const BYTE* const iend = ip + seqSize;
899 BYTE* const ostart = (BYTE* const)dst;
900 BYTE* const oend = ostart + maxDstSize;
901 BYTE* op = ostart;
902 const BYTE* litPtr = dctx->litPtr;
903 const BYTE* const litEnd = litPtr + dctx->litSize;
904 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
905 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
906 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
907 DEBUGLOG(5, "ZSTD_decompressSequences_body");
908
909 /* Regen sequences */
910 if (nbSeq) {
911 seqState_t seqState;
912 dctx->fseEntropy = 1;
913 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
914 CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
915 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
916 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
917 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
918
919 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
920 nbSeq--;
921 { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
922 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
923 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
924 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
925 op += oneSeqSize;
926 } }
927
928 /* check if reached exact end */
929 DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
930 if (nbSeq) return ERROR(corruption_detected);
931 /* save reps for next block */
932 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
933 }
934
935 /* last literal segment */
936 { size_t const lastLLSize = litEnd - litPtr;
937 if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
938 memcpy(op, litPtr, lastLLSize);
939 op += lastLLSize;
940 }
941
942 return op-ostart;
943 }
944
945 static size_t
946 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
947 void* dst, size_t maxDstSize,
948 const void* seqStart, size_t seqSize, int nbSeq,
949 const ZSTD_longOffset_e isLongOffset)
950 {
951 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
952 }
953 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
954
955
956
957 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
958 FORCE_INLINE_TEMPLATE seq_t
959 ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
960 {
961 seq_t seq;
962 U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
963 U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
964 U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
965 U32 const totalBits = llBits+mlBits+ofBits;
966 U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
967 U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
968 U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
969
970 /* sequence */
971 { size_t offset;
972 if (!ofBits)
973 offset = 0;
974 else {
975 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
976 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
977 assert(ofBits <= MaxOff);
978 if (MEM_32bits() && longOffsets) {
979 U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
980 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
981 if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
982 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
983 } else {
984 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
985 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
986 }
987 }
988
989 if (ofBits <= 1) {
990 offset += (llBase==0);
991 if (offset) {
992 size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
993 temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
994 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
995 seqState->prevOffset[1] = seqState->prevOffset[0];
996 seqState->prevOffset[0] = offset = temp;
997 } else {
998 offset = seqState->prevOffset[0];
999 }
1000 } else {
1001 seqState->prevOffset[2] = seqState->prevOffset[1];
1002 seqState->prevOffset[1] = seqState->prevOffset[0];
1003 seqState->prevOffset[0] = offset;
1004 }
1005 seq.offset = offset;
1006 }
1007
1008 seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1009 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1010 BIT_reloadDStream(&seqState->DStream);
1011 if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1012 BIT_reloadDStream(&seqState->DStream);
1013 /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1014 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1015
1016 seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1017 if (MEM_32bits())
1018 BIT_reloadDStream(&seqState->DStream);
1019
1020 { size_t const pos = seqState->pos + seq.litLength;
1021 const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1022 seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1023 * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1024 seqState->pos = pos + seq.matchLength;
1025 }
1026
1027 /* ANS state update */
1028 ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1029 ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1030 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1031 ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1032
1033 return seq;
1034 }
1035
1036 FORCE_INLINE_TEMPLATE size_t
1037 ZSTD_decompressSequencesLong_body(
1038 ZSTD_DCtx* dctx,
1039 void* dst, size_t maxDstSize,
1040 const void* seqStart, size_t seqSize, int nbSeq,
1041 const ZSTD_longOffset_e isLongOffset)
1042 {
1043 const BYTE* ip = (const BYTE*)seqStart;
1044 const BYTE* const iend = ip + seqSize;
1045 BYTE* const ostart = (BYTE* const)dst;
1046 BYTE* const oend = ostart + maxDstSize;
1047 BYTE* op = ostart;
1048 const BYTE* litPtr = dctx->litPtr;
1049 const BYTE* const litEnd = litPtr + dctx->litSize;
1050 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1051 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1052 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1053
1054 /* Regen sequences */
1055 if (nbSeq) {
1056 #define STORED_SEQS 4
1057 #define STORED_SEQS_MASK (STORED_SEQS-1)
1058 #define ADVANCED_SEQS 4
1059 seq_t sequences[STORED_SEQS];
1060 int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1061 seqState_t seqState;
1062 int seqNb;
1063 dctx->fseEntropy = 1;
1064 { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1065 seqState.prefixStart = prefixStart;
1066 seqState.pos = (size_t)(op-prefixStart);
1067 seqState.dictEnd = dictEnd;
1068 assert(iend >= ip);
1069 CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1070 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1071 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1072 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1073
1074 /* prepare in advance */
1075 for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1076 sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1077 PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1078 }
1079 if (seqNb<seqAdvance) return ERROR(corruption_detected);
1080
1081 /* decode and decompress */
1082 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1083 seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1084 size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1085 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1086 PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1087 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1088 op += oneSeqSize;
1089 }
1090 if (seqNb<nbSeq) return ERROR(corruption_detected);
1091
1092 /* finish queue */
1093 seqNb -= seqAdvance;
1094 for ( ; seqNb<nbSeq ; seqNb++) {
1095 size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1096 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1097 op += oneSeqSize;
1098 }
1099
1100 /* save reps for next block */
1101 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1102 }
1103
1104 /* last literal segment */
1105 { size_t const lastLLSize = litEnd - litPtr;
1106 if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
1107 memcpy(op, litPtr, lastLLSize);
1108 op += lastLLSize;
1109 }
1110
1111 return op-ostart;
1112 }
1113
1114 static size_t
1115 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1116 void* dst, size_t maxDstSize,
1117 const void* seqStart, size_t seqSize, int nbSeq,
1118 const ZSTD_longOffset_e isLongOffset)
1119 {
1120 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1121 }
1122 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1123
1124
1125
1126 #if DYNAMIC_BMI2
1127
1128 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1129 static TARGET_ATTRIBUTE("bmi2") size_t
1130 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1131 void* dst, size_t maxDstSize,
1132 const void* seqStart, size_t seqSize, int nbSeq,
1133 const ZSTD_longOffset_e isLongOffset)
1134 {
1135 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1136 }
1137 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1138
1139 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1140 static TARGET_ATTRIBUTE("bmi2") size_t
1141 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1142 void* dst, size_t maxDstSize,
1143 const void* seqStart, size_t seqSize, int nbSeq,
1144 const ZSTD_longOffset_e isLongOffset)
1145 {
1146 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1147 }
1148 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1149
1150 #endif /* DYNAMIC_BMI2 */
1151
1152 typedef size_t (*ZSTD_decompressSequences_t)(
1153 ZSTD_DCtx* dctx,
1154 void* dst, size_t maxDstSize,
1155 const void* seqStart, size_t seqSize, int nbSeq,
1156 const ZSTD_longOffset_e isLongOffset);
1157
1158 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1159 static size_t
1160 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1161 const void* seqStart, size_t seqSize, int nbSeq,
1162 const ZSTD_longOffset_e isLongOffset)
1163 {
1164 DEBUGLOG(5, "ZSTD_decompressSequences");
1165 #if DYNAMIC_BMI2
1166 if (dctx->bmi2) {
1167 return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1168 }
1169 #endif
1170 return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1171 }
1172 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1173
1174
1175 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1176 /* ZSTD_decompressSequencesLong() :
1177 * decompression function triggered when a minimum share of offsets is considered "long",
1178 * aka out of cache.
1179 * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
1180 * This function will try to mitigate main memory latency through the use of prefetching */
1181 static size_t
1182 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1183 void* dst, size_t maxDstSize,
1184 const void* seqStart, size_t seqSize, int nbSeq,
1185 const ZSTD_longOffset_e isLongOffset)
1186 {
1187 DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1188 #if DYNAMIC_BMI2
1189 if (dctx->bmi2) {
1190 return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1191 }
1192 #endif
1193 return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1194 }
1195 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1196
1197
1198
1199 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1200 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1201 /* ZSTD_getLongOffsetsShare() :
1202 * condition : offTable must be valid
1203 * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1204 * compared to maximum possible of (1<<OffFSELog) */
1205 static unsigned
1206 ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1207 {
1208 const void* ptr = offTable;
1209 U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1210 const ZSTD_seqSymbol* table = offTable + 1;
1211 U32 const max = 1 << tableLog;
1212 U32 u, total = 0;
1213 DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1214
1215 assert(max <= (1 << OffFSELog)); /* max not too large */
1216 for (u=0; u<max; u++) {
1217 if (table[u].nbAdditionalBits > 22) total += 1;
1218 }
1219
1220 assert(tableLog <= OffFSELog);
1221 total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1222
1223 return total;
1224 }
1225 #endif
1226
1227
1228 size_t
1229 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1230 void* dst, size_t dstCapacity,
1231 const void* src, size_t srcSize, const int frame)
1232 { /* blockType == blockCompressed */
1233 const BYTE* ip = (const BYTE*)src;
1234 /* isLongOffset must be true if there are long offsets.
1235 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1236 * We don't expect that to be the case in 64-bit mode.
1237 * In block mode, window size is not known, so we have to be conservative.
1238 * (note: but it could be evaluated from current-lowLimit)
1239 */
1240 ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1241 DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1242
1243 if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
1244
1245 /* Decode literals section */
1246 { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
1247 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1248 if (ZSTD_isError(litCSize)) return litCSize;
1249 ip += litCSize;
1250 srcSize -= litCSize;
1251 }
1252
1253 /* Build Decoding Tables */
1254 {
1255 /* These macros control at build-time which decompressor implementation
1256 * we use. If neither is defined, we do some inspection and dispatch at
1257 * runtime.
1258 */
1259 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1260 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1261 int usePrefetchDecoder = dctx->ddictIsCold;
1262 #endif
1263 int nbSeq;
1264 size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
1265 if (ZSTD_isError(seqHSize)) return seqHSize;
1266 ip += seqHSize;
1267 srcSize -= seqHSize;
1268
1269 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1270 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1271 if ( !usePrefetchDecoder
1272 && (!frame || (dctx->fParams.windowSize > (1<<24)))
1273 && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
1274 U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1275 U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1276 usePrefetchDecoder = (shareLongOffsets >= minShare);
1277 }
1278 #endif
1279
1280 dctx->ddictIsCold = 0;
1281
1282 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1283 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1284 if (usePrefetchDecoder)
1285 #endif
1286 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1287 return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1288 #endif
1289
1290 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1291 /* else */
1292 return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1293 #endif
1294 }
1295 }
1296
1297
1298 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1299 void* dst, size_t dstCapacity,
1300 const void* src, size_t srcSize)
1301 {
1302 size_t dSize;
1303 ZSTD_checkContinuity(dctx, dst);
1304 dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
1305 dctx->previousDstEnd = (char*)dst + dSize;
1306 return dSize;
1307 }
@@ -0,0 +1,59 b''
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12 #ifndef ZSTD_DEC_BLOCK_H
13 #define ZSTD_DEC_BLOCK_H
14
15 /*-*******************************************************
16 * Dependencies
17 *********************************************************/
18 #include <stddef.h> /* size_t */
19 #include "zstd.h" /* DCtx, and some public functions */
20 #include "zstd_internal.h" /* blockProperties_t, and some public functions */
21 #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
22
23
24 /* === Prototypes === */
25
26 /* note: prototypes already published within `zstd.h` :
27 * ZSTD_decompressBlock()
28 */
29
30 /* note: prototypes already published within `zstd_internal.h` :
31 * ZSTD_getcBlockSize()
32 * ZSTD_decodeSeqHeaders()
33 */
34
35
36 /* ZSTD_decompressBlock_internal() :
37 * decompress block, starting at `src`,
38 * into destination buffer `dst`.
39 * @return : decompressed block size,
40 * or an error code (which can be tested using ZSTD_isError())
41 */
42 size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
43 void* dst, size_t dstCapacity,
44 const void* src, size_t srcSize, const int frame);
45
46 /* ZSTD_buildFSETable() :
47 * generate FSE decoding table for one symbol (ll, ml or off)
48 * this function must be called with valid parameters only
49 * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
50 * in which case it cannot fail.
51 * Internal use only.
52 */
53 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
54 const short* normalizedCounter, unsigned maxSymbolValue,
55 const U32* baseValue, const U32* nbAdditionalBits,
56 unsigned tableLog);
57
58
59 #endif /* ZSTD_DEC_BLOCK_H */
@@ -0,0 +1,168 b''
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12 /* zstd_decompress_internal:
13 * objects and definitions shared within lib/decompress modules */
14
15 #ifndef ZSTD_DECOMPRESS_INTERNAL_H
16 #define ZSTD_DECOMPRESS_INTERNAL_H
17
18
19 /*-*******************************************************
20 * Dependencies
21 *********************************************************/
22 #include "mem.h" /* BYTE, U16, U32 */
23 #include "zstd_internal.h" /* ZSTD_seqSymbol */
24
25
26
27 /*-*******************************************************
28 * Constants
29 *********************************************************/
30 static const U32 LL_base[MaxLL+1] = {
31 0, 1, 2, 3, 4, 5, 6, 7,
32 8, 9, 10, 11, 12, 13, 14, 15,
33 16, 18, 20, 22, 24, 28, 32, 40,
34 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35 0x2000, 0x4000, 0x8000, 0x10000 };
36
37 static const U32 OF_base[MaxOff+1] = {
38 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42
43 static const U32 OF_bits[MaxOff+1] = {
44 0, 1, 2, 3, 4, 5, 6, 7,
45 8, 9, 10, 11, 12, 13, 14, 15,
46 16, 17, 18, 19, 20, 21, 22, 23,
47 24, 25, 26, 27, 28, 29, 30, 31 };
48
49 static const U32 ML_base[MaxML+1] = {
50 3, 4, 5, 6, 7, 8, 9, 10,
51 11, 12, 13, 14, 15, 16, 17, 18,
52 19, 20, 21, 22, 23, 24, 25, 26,
53 27, 28, 29, 30, 31, 32, 33, 34,
54 35, 37, 39, 41, 43, 47, 51, 59,
55 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
56 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
57
58
59 /*-*******************************************************
60 * Decompression types
61 *********************************************************/
62 typedef struct {
63 U32 fastMode;
64 U32 tableLog;
65 } ZSTD_seqSymbol_header;
66
67 typedef struct {
68 U16 nextState;
69 BYTE nbAdditionalBits;
70 BYTE nbBits;
71 U32 baseValue;
72 } ZSTD_seqSymbol;
73
74 #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75
76 typedef struct {
77 ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
78 ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
79 ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
80 HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
81 U32 rep[ZSTD_REP_NUM];
82 } ZSTD_entropyDTables_t;
83
84 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
85 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
86 ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
87 ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
88
89 typedef enum { zdss_init=0, zdss_loadHeader,
90 zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
91
92 struct ZSTD_DCtx_s
93 {
94 const ZSTD_seqSymbol* LLTptr;
95 const ZSTD_seqSymbol* MLTptr;
96 const ZSTD_seqSymbol* OFTptr;
97 const HUF_DTable* HUFptr;
98 ZSTD_entropyDTables_t entropy;
99 U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
100 const void* previousDstEnd; /* detect continuity */
101 const void* prefixStart; /* start of current segment */
102 const void* virtualStart; /* virtual start of previous segment if it was just before current one */
103 const void* dictEnd; /* end of previous segment */
104 size_t expected;
105 ZSTD_frameHeader fParams;
106 U64 decodedSize;
107 blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
108 ZSTD_dStage stage;
109 U32 litEntropy;
110 U32 fseEntropy;
111 XXH64_state_t xxhState;
112 size_t headerSize;
113 ZSTD_format_e format;
114 const BYTE* litPtr;
115 ZSTD_customMem customMem;
116 size_t litSize;
117 size_t rleSize;
118 size_t staticSize;
119 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
120
121 /* dictionary */
122 ZSTD_DDict* ddictLocal;
123 const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
124 U32 dictID;
125 int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
126
127 /* streaming */
128 ZSTD_dStreamStage streamStage;
129 char* inBuff;
130 size_t inBuffSize;
131 size_t inPos;
132 size_t maxWindowSize;
133 char* outBuff;
134 size_t outBuffSize;
135 size_t outStart;
136 size_t outEnd;
137 size_t lhSize;
138 void* legacyContext;
139 U32 previousLegacyVersion;
140 U32 legacyVersion;
141 U32 hostageByte;
142 int noForwardProgress;
143
144 /* workspace */
145 BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
146 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
147 }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
148
149
150 /*-*******************************************************
151 * Shared internal functions
152 *********************************************************/
153
154 /*! ZSTD_loadDEntropy() :
155 * dict : must point at beginning of a valid zstd dictionary.
156 * @return : size of entropy tables read */
157 size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
158 const void* const dict, size_t const dictSize);
159
160 /*! ZSTD_checkContinuity() :
161 * check if next `dst` follows previous position, where decompression ended.
162 * If yes, do nothing (continue on current segment).
163 * If not, classify previous segment as "external dictionary", and start a new segment.
164 * This function cannot fail. */
165 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
166
167
168 #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
@@ -62,6 +62,11 b' contrib/python-zstandard/zstd/compress/z'
62 contrib/python-zstandard/zstd/compress/zstd_opt.c
62 contrib/python-zstandard/zstd/compress/zstd_opt.c
63 contrib/python-zstandard/zstd/compress/zstd_opt.h
63 contrib/python-zstandard/zstd/compress/zstd_opt.h
64 contrib/python-zstandard/zstd/decompress/huf_decompress.c
64 contrib/python-zstandard/zstd/decompress/huf_decompress.c
65 contrib/python-zstandard/zstd/decompress/zstd_ddict.c
66 contrib/python-zstandard/zstd/decompress/zstd_ddict.h
67 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c
68 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h
69 contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h
65 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
70 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
66 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
71 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
67 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
72 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
@@ -5,6 +5,5 b' graft tests'
5 include make_cffi.py
5 include make_cffi.py
6 include setup_zstd.py
6 include setup_zstd.py
7 include zstd.c
7 include zstd.c
8 include zstd_cffi.py
9 include LICENSE
8 include LICENSE
10 include NEWS.rst
9 include NEWS.rst
@@ -8,8 +8,18 b' 1.0.0 (not yet released)'
8 Actions Blocking Release
8 Actions Blocking Release
9 ------------------------
9 ------------------------
10
10
11 * compression and decompression APIs that support ``io.rawIOBase`` interface
11 * compression and decompression APIs that support ``io.RawIOBase`` interface
12 (#13).
12 (#13).
13 * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface.
14 * Properly handle non-blocking I/O and partial writes for objects implementing
15 ``io.RawIOBase``.
16 * Make ``write_return_read=True`` the default for objects implementing
17 ``io.RawIOBase``.
18 * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for
19 all objects implementing ``io.RawIOBase``. Is calling ``close()`` on
20 wrapped stream acceptable, should ``__exit__`` always call ``close()``,
21 should ``close()`` imply ``flush()``, etc.
22 * Consider making reads across frames configurable behavior.
13 * Refactor module names so C and CFFI extensions live under ``zstandard``
23 * Refactor module names so C and CFFI extensions live under ``zstandard``
14 package.
24 package.
15 * Overall API design review.
25 * Overall API design review.
@@ -43,6 +53,11 b' Actions Blocking Release'
43 * Consider a ``chunker()`` API for decompression.
53 * Consider a ``chunker()`` API for decompression.
44 * Consider stats for ``chunker()`` API, including finding the last consumed
54 * Consider stats for ``chunker()`` API, including finding the last consumed
45 offset of input data.
55 offset of input data.
56 * Consider exposing ``ZSTD_cParam_getBounds()`` and
57 ``ZSTD_dParam_getBounds()`` APIs.
58 * Consider controls over resetting compression contexts (session only, parameters,
59 or session and parameters).
60 * Actually use the CFFI backend in fuzzing tests.
46
61
47 Other Actions Not Blocking Release
62 Other Actions Not Blocking Release
48 ---------------------------------------
63 ---------------------------------------
@@ -51,6 +66,207 b' Other Actions Not Blocking Release'
51 * API for ensuring max memory ceiling isn't exceeded.
66 * API for ensuring max memory ceiling isn't exceeded.
52 * Move off nose for testing.
67 * Move off nose for testing.
53
68
69 0.11.0 (released 2019-02-24)
70 ============================
71
72 Backwards Compatibility Nodes
73 -----------------------------
74
75 * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0``
76 and defaults to ``-1``, per the documented behavior of
77 ``io.RawIOBase.read()``. Previously, we required an argument that was
78 a positive value.
79 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
80 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
81 instead of ``NotImplementedError``.
82 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
83 argument. The default value will likely be changed in a future release
84 and consumers are advised to pass the argument to avoid unwanted change
85 of behavior in the future.
86 * ``setup.py`` now always disables the CFFI backend if the installed
87 CFFI package does not meet the minimum version requirements. Before, it was
88 possible for the CFFI backend to be generated and a run-time error to
89 occur.
90 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
91 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
92 respectively so naming is identical to the C extension. This should have
93 no meaningful end-user impact, as instances aren't meant to be
94 constructed directly.
95 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
96 argument to control whether ``write()`` returns the number of bytes
97 read from the source / written to the decompressor. It defaults to off,
98 which preserves the existing behavior of returning the number of bytes
99 emitted from the decompressor. The default will change in a future release
100 so behavior aligns with the specified behavior of ``io.RawIOBase``.
101 * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This
102 will result in that stream plus the underlying stream being closed as
103 well. If this behavior is not desirable, do not use instances as
104 context managers.
105 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
106 argument to control whether ``write()`` returns the number of bytes read
107 from the source / written to the compressor. It defaults to off, which
108 preserves the existing behavior of returning the number of bytes emitted
109 from the compressor. The default will change in a future release so
110 behavior aligns with the specified behavior of ``io.RawIOBase``.
111 * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will
112 result in that stream plus any underlying stream being closed as well. If
113 this behavior is not desirable, do not use instances as context managers.
114 * ``ZstdDecompressionWriter`` no longer requires being used as a context
115 manager (#57).
116 * ``ZstdCompressionWriter`` no longer requires being used as a context
117 manager (#57).
118 * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances
119 has been deprecated and will be removed in a future release. The
120 ``overlap_log`` attribute should be used instead.
121 * The ``overlap_size_log`` argument to ``CompressionParameters`` has been
122 deprecated and will be removed in a future release. The ``overlap_log``
123 argument should be used instead.
124 * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances
125 has been deprecated and will be removed in a future release. The
126 ``ldm_hash_rate_log`` attribute should be used instead.
127 * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been
128 deprecated and will be removed in a future release. The ``ldm_hash_rate_log``
129 argument should be used instead.
130 * The ``compression_strategy`` argument to ``CompressionParameters`` has been
131 deprecated and will be removed in a future release. The ``strategy``
132 argument should be used instead.
133 * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated
134 and will be removed in a future release. Use ``MINMATCH_MIN`` and
135 ``MINMATCH_MAX`` instead.
136 * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had
137 been documented in the ``README`` file since the ``0.9.0`` release, the
138 module should not be imported directly at its new location. Instead,
139 ``import zstandard`` to cause an appropriate backend module to be loaded
140 automatically.
141
142 Bug Fixes
143 ---------
144
145 * CFFI backend could encounter a failure when sending an empty chunk into
146 ``ZstdDecompressionObj.decompress()``. The issue has been fixed.
147 * CFFI backend could encounter an error when calling
148 ``ZstdDecompressionReader.read()`` if there was data remaining in an
149 internal buffer. The issue has been fixed. (#71)
150
151 Changes
152 -------
153
154 * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in
155 the CFFI backend.
156 * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``.
157 These are part of the ``io.BufferedIOBase`` interface.
158 * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading
159 compressed output into an existing buffer.
160 * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts
161 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
162 behavior of ``io.RawIOBase``.
163 * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this
164 method raised ``NotImplementedError``.
165 * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``.
166 These are part of the ``io.BufferedIOBase`` interface.
167 * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts
168 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
169 behavior of ``io.RawIOBase``.
170 * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this
171 method raised ``NotImplementedError``.
172 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
173 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
174 instead of ``NotImplementedError``. This reflects a decision to never
175 implement text-based I/O on (de)compressors and keep the low-level API
176 operating in the binary domain. (#13)
177 * ``README.rst`` now documented how to achieve linewise iteration using
178 an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``.
179 * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for
180 reading decompressed output into an existing buffer. This allows chaining
181 to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``.
182 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
183 argument to control behavior when the input data has multiple zstd
184 *frames*. When ``False`` (the default for backwards compatibility), a
185 ``read()`` will stop when the end of a zstd *frame* is encountered. When
186 ``True``, ``read()`` can potentially return data spanning multiple zstd
187 *frames*. The default will likely be changed to ``True`` in a future
188 release.
189 * ``setup.py`` now performs CFFI version sniffing and disables the CFFI
190 backend if CFFI is too old. Previously, we only used ``install_requires``
191 to enforce the CFFI version and not all build modes would properly enforce
192 the minimum CFFI version. (#69)
193 * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data
194 remaining in any internal buffer. Before, repeated ``read()`` could
195 result in *random* errors. (#71)
196 * Upgraded various Python packages in CI environment.
197 * Upgrade to hypothesis 4.5.11.
198 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
199 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
200 respectively.
201 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
202 argument to control whether ``write()`` returns the number of bytes read
203 from the source. It defaults to ``False`` to preserve backwards
204 compatibility.
205 * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase``
206 interface and behaves as a proper stream object.
207 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
208 argument to control whether ``write()`` returns the number of bytes read
209 from the source. It defaults to ``False`` to preserve backwards
210 compatibility.
211 * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and
212 behaves as a proper stream object. ``close()`` will now close the stream
213 and the underlying stream (if possible). ``__exit__`` will now call
214 ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented.
215 * ``ZstdDecompressionWriter`` no longer must be used as a context manager.
216 * ``ZstdCompressionWriter`` no longer must be used as a context manager.
217 When not using as a context manager, it is important to call
218 ``flush(FRAME_FRAME)`` or the compression stream won't be properly
219 terminated and decoders may complain about malformed input.
220 * ``ZstdCompressionWriter.flush()`` (what is returned from
221 ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the
222 flush behavior. Its value can be one of the new constants
223 ``FLUSH_BLOCK`` or ``FLUSH_FRAME``.
224 * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method.
225 This provides parity with standard library equivalent types. (#65)
226 * ``CompressionParameters`` no longer redundantly store individual compression
227 parameters on each instance. Instead, compression parameters are stored inside
228 the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining
229 parameters are now properties rather than instance variables.
230 * Exposed the ``STRATEGY_BTULTRA2`` constant.
231 * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute.
232 This behaves identically to the ``overlap_size_log`` attribute.
233 * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that
234 behaves identically to the ``overlap_size_log`` argument. An error will be
235 raised if both arguments are specified.
236 * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log``
237 attribute. This behaves identically to the ``ldm_hash_every_log`` attribute.
238 * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that
239 behaves identically to the ``ldm_hash_every_log`` argument. An error will be
240 raised if both arguments are specified.
241 * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves
242 identically to the ``compression_strategy`` argument. An error will be raised
243 if both arguments are specified.
244 * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are
245 semantically equivalent to the old ``SEARCHLENGTH_MIN`` and
246 ``SEARCHLENGTH_MAX`` constants.
247 * Bundled zstandard library upgraded from 1.3.7 to 1.3.8.
248 * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and
249 tested in the 0.10 release).
250 * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``.
251 * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid
252 allocating a new buffer for every operation. This should result in faster
253 performance in cases where ``write()`` or ``flush()`` are being called
254 frequently. (#62)
255 * Bundled zstandard library upgraded from 1.3.6 to 1.3.7.
256
257 0.10.2 (released 2018-11-03)
258 ============================
259
260 Bug Fixes
261 ---------
262
263 * ``zstd_cffi.py`` added to ``setup.py`` (#60).
264
265 Changes
266 -------
267
268 * Change some integer casts to avoid ``ssize_t`` (#61).
269
54 0.10.1 (released 2018-10-08)
270 0.10.1 (released 2018-10-08)
55 ============================
271 ============================
56
272
@@ -20,9 +20,9 b' https://github.com/indygreg/python-zstan'
20 Requirements
20 Requirements
21 ============
21 ============
22
22
23 This extension is designed to run with Python 2.7, 3.4, 3.5, and 3.6
23 This extension is designed to run with Python 2.7, 3.4, 3.5, 3.6, and 3.7
24 on common platforms (Linux, Windows, and OS X). x86 and x86_64 are well-tested
24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
25 on Windows. Only x86_64 is well-tested on Linux and macOS.
25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
26
26
27 Installing
27 Installing
28 ==========
28 ==========
@@ -215,7 +215,7 b' Instances can also be used as context ma'
215
215
216 # Do something with compressed chunk.
216 # Do something with compressed chunk.
217
217
218 When the context manager exists or ``close()`` is called, the stream is closed,
218 When the context manager exits or ``close()`` is called, the stream is closed,
219 underlying resources are released, and future operations against the compression
219 underlying resources are released, and future operations against the compression
220 stream will fail.
220 stream will fail.
221
221
@@ -251,8 +251,54 b' emitted so far.'
251 Streaming Input API
251 Streaming Input API
252 ^^^^^^^^^^^^^^^^^^^
252 ^^^^^^^^^^^^^^^^^^^
253
253
254 ``stream_writer(fh)`` (which behaves as a context manager) allows you to *stream*
254 ``stream_writer(fh)`` allows you to *stream* data into a compressor.
255 data into a compressor.::
255
256 Returned instances implement the ``io.RawIOBase`` interface. Only methods
257 that involve writing will do useful things.
258
259 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
260 compressed data is available, ``write()`` will be called with the compressed
261 data as its argument. Many common Python types implement ``write()``, including
262 open file handles and ``io.BytesIO``.
263
264 The ``write(data)`` method is used to feed data into the compressor.
265
266 The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
267 data remains within the compressor's internal state into the output object. This
268 may result in 0 or more ``write()`` calls to the output object. This method
269 accepts an optional ``flush_mode`` argument to control the flushing behavior.
270 Its value can be any of the ``FLUSH_*`` constants.
271
272 Both ``write()`` and ``flush()`` return the number of bytes written to the
273 object's ``write()``. In many cases, small inputs do not accumulate enough
274 data to cause a write and ``write()`` will return ``0``.
275
276 Calling ``close()`` will mark the stream as closed and subsequent I/O
277 operations will raise ``ValueError`` (per the documented behavior of
278 ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
279 stream if such a method exists.
280
281 Typically usage is as follows::
282
283 cctx = zstd.ZstdCompressor(level=10)
284 compressor = cctx.stream_writer(fh)
285
286 compressor.write(b'chunk 0\n')
287 compressor.write(b'chunk 1\n')
288 compressor.flush()
289 # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
290 # Receiver is also expecting more data in the zstd *frame*.
291
292 compressor.write(b'chunk 2\n')
293 compressor.flush(zstd.FLUSH_FRAME)
294 # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
295 # Receiver is expecting no more data, as the zstd frame is closed.
296 # Any future calls to ``write()`` at this point will construct a new
297 # zstd frame.
298
299 Instances can be used as context managers. Exiting the context manager is
300 the equivalent of calling ``close()``, which is equivalent to calling
301 ``flush(zstd.FLUSH_FRAME)``::
256
302
257 cctx = zstd.ZstdCompressor(level=10)
303 cctx = zstd.ZstdCompressor(level=10)
258 with cctx.stream_writer(fh) as compressor:
304 with cctx.stream_writer(fh) as compressor:
@@ -260,22 +306,12 b' data into a compressor.::'
260 compressor.write(b'chunk 1')
306 compressor.write(b'chunk 1')
261 ...
307 ...
262
308
263 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
309 .. important::
264 compressed data is available, ``write()`` will be called with the compressed
265 data as its argument. Many common Python types implement ``write()``, including
266 open file handles and ``io.BytesIO``.
267
310
268 ``stream_writer()`` returns an object representing a streaming compressor
311 If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
269 instance. It **must** be used as a context manager. That object's
312 a full zstd *frame* and consumers of this data may complain about malformed
270 ``write(data)`` method is used to feed data into the compressor.
313 input. It is recommended to use instances as a context manager to ensure
271
314 *frames* are properly finished.
272 A ``flush()`` method can be called to evict whatever data remains within the
273 compressor's internal state into the output object. This may result in 0 or
274 more ``write()`` calls to the output object.
275
276 Both ``write()`` and ``flush()`` return the number of bytes written to the
277 object's ``write()``. In many cases, small inputs do not accumulate enough
278 data to cause a write and ``write()`` will return ``0``.
279
315
280 If the size of the data being fed to this streaming compressor is known,
316 If the size of the data being fed to this streaming compressor is known,
281 you can declare it before compression begins::
317 you can declare it before compression begins::
@@ -310,6 +346,14 b' Thte total number of bytes written so fa'
310 ...
346 ...
311 total_written = compressor.tell()
347 total_written = compressor.tell()
312
348
349 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
350 the return value of ``write()``. When ``False`` (the default), ``write()`` returns
351 the number of bytes that were ``write()``en to the underlying object. When
352 ``True``, ``write()`` returns the number of bytes read from the input that
353 were subsequently written to the compressor. ``True`` is the *proper* behavior
354 for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
355 the default value in a future release.
356
313 Streaming Output API
357 Streaming Output API
314 ^^^^^^^^^^^^^^^^^^^^
358 ^^^^^^^^^^^^^^^^^^^^
315
359
@@ -654,27 +698,63 b' will raise ``ValueError`` if attempted.'
654 ``tell()`` returns the number of decompressed bytes read so far.
698 ``tell()`` returns the number of decompressed bytes read so far.
655
699
656 Not all I/O methods are implemented. Notably missing is support for
700 Not all I/O methods are implemented. Notably missing is support for
657 ``readline()``, ``readlines()``, and linewise iteration support. Support for
701 ``readline()``, ``readlines()``, and linewise iteration support. This is
658 these is planned for a future release.
702 because streams operate on binary data - not text data. If you want to
703 convert decompressed output to text, you can chain an ``io.TextIOWrapper``
704 to the stream::
705
706 with open(path, 'rb') as fh:
707 dctx = zstd.ZstdDecompressor()
708 stream_reader = dctx.stream_reader(fh)
709 text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
710
711 for line in text_stream:
712 ...
713
714 The ``read_across_frames`` argument to ``stream_reader()`` controls the
715 behavior of read operations when the end of a zstd *frame* is encountered.
716 When ``False`` (the default), a read will complete when the end of a
717 zstd *frame* is encountered. When ``True``, a read can potentially
718 return data spanning multiple zstd *frames*.
659
719
660 Streaming Input API
720 Streaming Input API
661 ^^^^^^^^^^^^^^^^^^^
721 ^^^^^^^^^^^^^^^^^^^
662
722
663 ``stream_writer(fh)`` can be used to incrementally send compressed data to a
723 ``stream_writer(fh)`` allows you to *stream* data into a decompressor.
664 decompressor.::
724
725 Returned instances implement the ``io.RawIOBase`` interface. Only methods
726 that involve writing will do useful things.
727
728 The argument to ``stream_writer()`` is typically an object that also implements
729 ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
730 common Python types conform to this interface, including open file handles
731 and ``io.BytesIO``.
732
733 Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
734 is sent to the decompressor by calling ``write(data)`` and decompressed
735 output is written to the underlying stream by calling its ``write(data)``
736 method.::
665
737
666 dctx = zstd.ZstdDecompressor()
738 dctx = zstd.ZstdDecompressor()
667 with dctx.stream_writer(fh) as decompressor:
739 decompressor = dctx.stream_writer(fh)
668 decompressor.write(compressed_data)
669
740
670 This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
741 decompressor.write(compressed_data)
671 the decompressor by calling ``write(data)`` and decompressed output is written
742 ...
672 to the output object by calling its ``write(data)`` method.
743
673
744
674 Calls to ``write()`` will return the number of bytes written to the output
745 Calls to ``write()`` will return the number of bytes written to the output
675 object. Not all inputs will result in bytes being written, so return values
746 object. Not all inputs will result in bytes being written, so return values
676 of ``0`` are possible.
747 of ``0`` are possible.
677
748
749 Like the ``stream_writer()`` compressor, instances can be used as context
750 managers. However, context managers add no extra special behavior and offer
751 little to no benefit to being used.
752
753 Calling ``close()`` will mark the stream as closed and subsequent I/O operations
754 will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
755 ``close()`` will also call ``close()`` on the underlying stream if such a
756 method exists.
757
678 The size of chunks being ``write()`` to the destination can be specified::
758 The size of chunks being ``write()`` to the destination can be specified::
679
759
680 dctx = zstd.ZstdDecompressor()
760 dctx = zstd.ZstdDecompressor()
@@ -687,6 +767,13 b' You can see how much memory is being use'
687 with dctx.stream_writer(fh) as decompressor:
767 with dctx.stream_writer(fh) as decompressor:
688 byte_size = decompressor.memory_size()
768 byte_size = decompressor.memory_size()
689
769
770 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
771 the return value of ``write()``. When ``False`` (the default)``, ``write()``
772 returns the number of bytes that were ``write()``en to the underlying stream.
773 When ``True``, ``write()`` returns the number of bytes read from the input.
774 ``True`` is the *proper* behavior for ``write()`` as specified by the
775 ``io.RawIOBase`` interface and will become the default in a future release.
776
690 Streaming Output API
777 Streaming Output API
691 ^^^^^^^^^^^^^^^^^^^^
778 ^^^^^^^^^^^^^^^^^^^^
692
779
@@ -791,6 +878,10 b' these temporary chunks by passing ``writ'
791 memory (re)allocations, this streaming decompression API isn't as
878 memory (re)allocations, this streaming decompression API isn't as
792 efficient as other APIs.
879 efficient as other APIs.
793
880
881 For compatibility with the standard library APIs, instances expose a
882 ``flush([length=None])`` method. This method no-ops and has no meaningful
883 side-effects, making it safe to call any time.
884
794 Batch Decompression API
885 Batch Decompression API
795 ^^^^^^^^^^^^^^^^^^^^^^^
886 ^^^^^^^^^^^^^^^^^^^^^^^
796
887
@@ -1147,18 +1238,21 b' follows:'
1147 * search_log
1238 * search_log
1148 * min_match
1239 * min_match
1149 * target_length
1240 * target_length
1150 * compression_strategy
1241 * strategy
1242 * compression_strategy (deprecated: same as ``strategy``)
1151 * write_content_size
1243 * write_content_size
1152 * write_checksum
1244 * write_checksum
1153 * write_dict_id
1245 * write_dict_id
1154 * job_size
1246 * job_size
1155 * overlap_size_log
1247 * overlap_log
1248 * overlap_size_log (deprecated: same as ``overlap_log``)
1156 * force_max_window
1249 * force_max_window
1157 * enable_ldm
1250 * enable_ldm
1158 * ldm_hash_log
1251 * ldm_hash_log
1159 * ldm_min_match
1252 * ldm_min_match
1160 * ldm_bucket_size_log
1253 * ldm_bucket_size_log
1161 * ldm_hash_every_log
1254 * ldm_hash_rate_log
1255 * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
1162 * threads
1256 * threads
1163
1257
1164 Some of these are very low-level settings. It may help to consult the official
1258 Some of these are very low-level settings. It may help to consult the official
@@ -1240,6 +1334,13 b' FRAME_HEADER'
1240 MAGIC_NUMBER
1334 MAGIC_NUMBER
1241 Frame header as an integer
1335 Frame header as an integer
1242
1336
1337 FLUSH_BLOCK
1338 Flushing behavior that denotes to flush a zstd block. A decompressor will
1339 be able to decode all data fed into the compressor so far.
1340 FLUSH_FRAME
1341 Flushing behavior that denotes to end a zstd frame. Any new data fed
1342 to the compressor will start a new frame.
1343
1243 CONTENTSIZE_UNKNOWN
1344 CONTENTSIZE_UNKNOWN
1244 Value for content size when the content size is unknown.
1345 Value for content size when the content size is unknown.
1245 CONTENTSIZE_ERROR
1346 CONTENTSIZE_ERROR
@@ -1261,10 +1362,18 b' SEARCHLOG_MIN'
1261 Minimum value for compression parameter
1362 Minimum value for compression parameter
1262 SEARCHLOG_MAX
1363 SEARCHLOG_MAX
1263 Maximum value for compression parameter
1364 Maximum value for compression parameter
1365 MINMATCH_MIN
1366 Minimum value for compression parameter
1367 MINMATCH_MAX
1368 Maximum value for compression parameter
1264 SEARCHLENGTH_MIN
1369 SEARCHLENGTH_MIN
1265 Minimum value for compression parameter
1370 Minimum value for compression parameter
1371
1372 Deprecated: use ``MINMATCH_MIN``
1266 SEARCHLENGTH_MAX
1373 SEARCHLENGTH_MAX
1267 Maximum value for compression parameter
1374 Maximum value for compression parameter
1375
1376 Deprecated: use ``MINMATCH_MAX``
1268 TARGETLENGTH_MIN
1377 TARGETLENGTH_MIN
1269 Minimum value for compression parameter
1378 Minimum value for compression parameter
1270 STRATEGY_FAST
1379 STRATEGY_FAST
@@ -1283,6 +1392,8 b' STRATEGY_BTOPT'
1283 Compression strategy
1392 Compression strategy
1284 STRATEGY_BTULTRA
1393 STRATEGY_BTULTRA
1285 Compression strategy
1394 Compression strategy
1395 STRATEGY_BTULTRA2
1396 Compression strategy
1286
1397
1287 FORMAT_ZSTD1
1398 FORMAT_ZSTD1
1288 Zstandard frame format
1399 Zstandard frame format
@@ -43,7 +43,7 b' static PyObject* ZstdCompressionChunkerI'
43 /* If we have data left in the input, consume it. */
43 /* If we have data left in the input, consume it. */
44 while (chunker->input.pos < chunker->input.size) {
44 while (chunker->input.pos < chunker->input.size) {
45 Py_BEGIN_ALLOW_THREADS
45 Py_BEGIN_ALLOW_THREADS
46 zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
46 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
47 &chunker->input, ZSTD_e_continue);
47 &chunker->input, ZSTD_e_continue);
48 Py_END_ALLOW_THREADS
48 Py_END_ALLOW_THREADS
49
49
@@ -104,7 +104,7 b' static PyObject* ZstdCompressionChunkerI'
104 }
104 }
105
105
106 Py_BEGIN_ALLOW_THREADS
106 Py_BEGIN_ALLOW_THREADS
107 zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
107 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
108 &chunker->input, zFlushMode);
108 &chunker->input, zFlushMode);
109 Py_END_ALLOW_THREADS
109 Py_END_ALLOW_THREADS
110
110
@@ -298,13 +298,9 b' static PyObject* ZstdCompressionDict_pre'
298 cParams = ZSTD_getCParams(level, 0, self->dictSize);
298 cParams = ZSTD_getCParams(level, 0, self->dictSize);
299 }
299 }
300 else {
300 else {
301 cParams.chainLog = compressionParams->chainLog;
301 if (to_cparams(compressionParams, &cParams)) {
302 cParams.hashLog = compressionParams->hashLog;
302 return NULL;
303 cParams.searchLength = compressionParams->minMatch;
303 }
304 cParams.searchLog = compressionParams->searchLog;
305 cParams.strategy = compressionParams->compressionStrategy;
306 cParams.targetLength = compressionParams->targetLength;
307 cParams.windowLog = compressionParams->windowLog;
308 }
304 }
309
305
310 assert(!self->cdict);
306 assert(!self->cdict);
@@ -10,7 +10,7 b''
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) {
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
14 size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value);
14 size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value);
15 if (ZSTD_isError(zresult)) {
15 if (ZSTD_isError(zresult)) {
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
@@ -23,28 +23,41 b' int set_parameter(ZSTD_CCtx_params* para'
23
23
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
25
25
26 #define TRY_COPY_PARAMETER(source, dest, param) { \
27 int result; \
28 size_t zresult = ZSTD_CCtxParam_getParameter(source, param, &result); \
29 if (ZSTD_isError(zresult)) { \
30 return 1; \
31 } \
32 zresult = ZSTD_CCtxParam_setParameter(dest, param, result); \
33 if (ZSTD_isError(zresult)) { \
34 return 1; \
35 } \
36 }
37
26 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
38 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
27 TRY_SET_PARAMETER(params, ZSTD_p_format, obj->format);
39 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
28 TRY_SET_PARAMETER(params, ZSTD_p_compressionLevel, (unsigned)obj->compressionLevel);
40
29 TRY_SET_PARAMETER(params, ZSTD_p_windowLog, obj->windowLog);
41 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
30 TRY_SET_PARAMETER(params, ZSTD_p_hashLog, obj->hashLog);
42 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
31 TRY_SET_PARAMETER(params, ZSTD_p_chainLog, obj->chainLog);
43 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
32 TRY_SET_PARAMETER(params, ZSTD_p_searchLog, obj->searchLog);
44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
33 TRY_SET_PARAMETER(params, ZSTD_p_minMatch, obj->minMatch);
45 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
34 TRY_SET_PARAMETER(params, ZSTD_p_targetLength, obj->targetLength);
46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
35 TRY_SET_PARAMETER(params, ZSTD_p_compressionStrategy, obj->compressionStrategy);
47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
36 TRY_SET_PARAMETER(params, ZSTD_p_contentSizeFlag, obj->contentSizeFlag);
48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
37 TRY_SET_PARAMETER(params, ZSTD_p_checksumFlag, obj->checksumFlag);
49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
38 TRY_SET_PARAMETER(params, ZSTD_p_dictIDFlag, obj->dictIDFlag);
50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
39 TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads);
51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
40 TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize);
52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
41 TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog);
53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
42 TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow);
54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
43 TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching);
55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
44 TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog);
56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
45 TRY_SET_PARAMETER(params, ZSTD_p_ldmMinMatch, obj->ldmMinMatch);
57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
46 TRY_SET_PARAMETER(params, ZSTD_p_ldmBucketSizeLog, obj->ldmBucketSizeLog);
58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
47 TRY_SET_PARAMETER(params, ZSTD_p_ldmHashEveryLog, obj->ldmHashEveryLog);
59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
48
61
49 return 0;
62 return 0;
50 }
63 }
@@ -64,6 +77,41 b' int reset_params(ZstdCompressionParamete'
64 return set_parameters(params->params, params);
77 return set_parameters(params->params, params);
65 }
78 }
66
79
80 #define TRY_GET_PARAMETER(params, param, value) { \
81 size_t zresult = ZSTD_CCtxParam_getParameter(params, param, value); \
82 if (ZSTD_isError(zresult)) { \
83 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
84 return 1; \
85 } \
86 }
87
88 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
89 int value;
90
91 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
92 cparams->windowLog = value;
93
94 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
95 cparams->chainLog = value;
96
97 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
98 cparams->hashLog = value;
99
100 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
101 cparams->searchLog = value;
102
103 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
104 cparams->minMatch = value;
105
106 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
107 cparams->targetLength = value;
108
109 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
110 cparams->strategy = value;
111
112 return 0;
113 }
114
67 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
115 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
68 static char* kwlist[] = {
116 static char* kwlist[] = {
69 "format",
117 "format",
@@ -75,50 +123,60 b' static int ZstdCompressionParameters_ini'
75 "min_match",
123 "min_match",
76 "target_length",
124 "target_length",
77 "compression_strategy",
125 "compression_strategy",
126 "strategy",
78 "write_content_size",
127 "write_content_size",
79 "write_checksum",
128 "write_checksum",
80 "write_dict_id",
129 "write_dict_id",
81 "job_size",
130 "job_size",
131 "overlap_log",
82 "overlap_size_log",
132 "overlap_size_log",
83 "force_max_window",
133 "force_max_window",
84 "enable_ldm",
134 "enable_ldm",
85 "ldm_hash_log",
135 "ldm_hash_log",
86 "ldm_min_match",
136 "ldm_min_match",
87 "ldm_bucket_size_log",
137 "ldm_bucket_size_log",
138 "ldm_hash_rate_log",
88 "ldm_hash_every_log",
139 "ldm_hash_every_log",
89 "threads",
140 "threads",
90 NULL
141 NULL
91 };
142 };
92
143
93 unsigned format = 0;
144 int format = 0;
94 int compressionLevel = 0;
145 int compressionLevel = 0;
95 unsigned windowLog = 0;
146 int windowLog = 0;
96 unsigned hashLog = 0;
147 int hashLog = 0;
97 unsigned chainLog = 0;
148 int chainLog = 0;
98 unsigned searchLog = 0;
149 int searchLog = 0;
99 unsigned minMatch = 0;
150 int minMatch = 0;
100 unsigned targetLength = 0;
151 int targetLength = 0;
101 unsigned compressionStrategy = 0;
152 int compressionStrategy = -1;
102 unsigned contentSizeFlag = 1;
153 int strategy = -1;
103 unsigned checksumFlag = 0;
154 int contentSizeFlag = 1;
104 unsigned dictIDFlag = 0;
155 int checksumFlag = 0;
105 unsigned jobSize = 0;
156 int dictIDFlag = 0;
106 unsigned overlapSizeLog = 0;
157 int jobSize = 0;
107 unsigned forceMaxWindow = 0;
158 int overlapLog = -1;
108 unsigned enableLDM = 0;
159 int overlapSizeLog = -1;
109 unsigned ldmHashLog = 0;
160 int forceMaxWindow = 0;
110 unsigned ldmMinMatch = 0;
161 int enableLDM = 0;
111 unsigned ldmBucketSizeLog = 0;
162 int ldmHashLog = 0;
112 unsigned ldmHashEveryLog = 0;
163 int ldmMinMatch = 0;
164 int ldmBucketSizeLog = 0;
165 int ldmHashRateLog = -1;
166 int ldmHashEveryLog = -1;
113 int threads = 0;
167 int threads = 0;
114
168
115 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
169 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
116 "|IiIIIIIIIIIIIIIIIIIIi:CompressionParameters",
170 "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
117 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
171 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
118 &searchLog, &minMatch, &targetLength, &compressionStrategy,
172 &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
119 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog,
173 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
120 &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
174 &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
121 &ldmHashEveryLog, &threads)) {
175 &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
176 return -1;
177 }
178
179 if (reset_params(self)) {
122 return -1;
180 return -1;
123 }
181 }
124
182
@@ -126,32 +184,70 b' static int ZstdCompressionParameters_ini'
126 threads = cpu_count();
184 threads = cpu_count();
127 }
185 }
128
186
129 self->format = format;
187 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
130 self->compressionLevel = compressionLevel;
188 * because setting ZSTD_c_nbWorkers resets the other parameters. */
131 self->windowLog = windowLog;
189 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
132 self->hashLog = hashLog;
190
133 self->chainLog = chainLog;
191 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
134 self->searchLog = searchLog;
192 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
135 self->minMatch = minMatch;
193 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
136 self->targetLength = targetLength;
194 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
137 self->compressionStrategy = compressionStrategy;
195 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
138 self->contentSizeFlag = contentSizeFlag;
196 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
139 self->checksumFlag = checksumFlag;
197 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
140 self->dictIDFlag = dictIDFlag;
198 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
141 self->threads = threads;
142 self->jobSize = jobSize;
143 self->overlapSizeLog = overlapSizeLog;
144 self->forceMaxWindow = forceMaxWindow;
145 self->enableLongDistanceMatching = enableLDM;
146 self->ldmHashLog = ldmHashLog;
147 self->ldmMinMatch = ldmMinMatch;
148 self->ldmBucketSizeLog = ldmBucketSizeLog;
149 self->ldmHashEveryLog = ldmHashEveryLog;
150
199
151 if (reset_params(self)) {
200 if (compressionStrategy != -1 && strategy != -1) {
201 PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
202 return -1;
203 }
204
205 if (compressionStrategy != -1) {
206 strategy = compressionStrategy;
207 }
208 else if (strategy == -1) {
209 strategy = 0;
210 }
211
212 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
213 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
214 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
215 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
216 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
217
218 if (overlapLog != -1 && overlapSizeLog != -1) {
219 PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
152 return -1;
220 return -1;
153 }
221 }
154
222
223 if (overlapSizeLog != -1) {
224 overlapLog = overlapSizeLog;
225 }
226 else if (overlapLog == -1) {
227 overlapLog = 0;
228 }
229
230 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
231 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
232 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
233 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
234 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
235 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
236
237 if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
238 PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
239 return -1;
240 }
241
242 if (ldmHashEveryLog != -1) {
243 ldmHashRateLog = ldmHashEveryLog;
244 }
245 else if (ldmHashRateLog == -1) {
246 ldmHashRateLog = 0;
247 }
248
249 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
250
155 return 0;
251 return 0;
156 }
252 }
157
253
@@ -259,7 +355,7 b' ZstdCompressionParametersObject* Compres'
259
355
260 val = PyDict_GetItemString(kwargs, "min_match");
356 val = PyDict_GetItemString(kwargs, "min_match");
261 if (!val) {
357 if (!val) {
262 val = PyLong_FromUnsignedLong(params.searchLength);
358 val = PyLong_FromUnsignedLong(params.minMatch);
263 if (!val) {
359 if (!val) {
264 goto cleanup;
360 goto cleanup;
265 }
361 }
@@ -336,6 +432,41 b' static void ZstdCompressionParameters_de'
336 PyObject_Del(self);
432 PyObject_Del(self);
337 }
433 }
338
434
435 #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
436 int result; \
437 size_t zresult; \
438 ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
439 zresult = ZSTD_CCtxParam_getParameter(p->params, param, &result); \
440 if (ZSTD_isError(zresult)) { \
441 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
442 ZSTD_getErrorName(zresult)); \
443 return NULL; \
444 } \
445 return PyLong_FromLong(result); \
446 }
447
448 PARAM_GETTER(format, ZSTD_c_format)
449 PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
450 PARAM_GETTER(window_log, ZSTD_c_windowLog)
451 PARAM_GETTER(hash_log, ZSTD_c_hashLog)
452 PARAM_GETTER(chain_log, ZSTD_c_chainLog)
453 PARAM_GETTER(search_log, ZSTD_c_searchLog)
454 PARAM_GETTER(min_match, ZSTD_c_minMatch)
455 PARAM_GETTER(target_length, ZSTD_c_targetLength)
456 PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
457 PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
458 PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
459 PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
460 PARAM_GETTER(job_size, ZSTD_c_jobSize)
461 PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
462 PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
463 PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
464 PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
465 PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
466 PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
467 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
468 PARAM_GETTER(threads, ZSTD_c_nbWorkers)
469
339 static PyMethodDef ZstdCompressionParameters_methods[] = {
470 static PyMethodDef ZstdCompressionParameters_methods[] = {
340 {
471 {
341 "from_level",
472 "from_level",
@@ -352,70 +483,34 b' static PyMethodDef ZstdCompressionParame'
352 { NULL, NULL }
483 { NULL, NULL }
353 };
484 };
354
485
355 static PyMemberDef ZstdCompressionParameters_members[] = {
486 #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
356 { "format", T_UINT,
487
357 offsetof(ZstdCompressionParametersObject, format), READONLY,
488 static PyGetSetDef ZstdCompressionParameters_getset[] = {
358 "compression format" },
489 GET_SET_ENTRY(format),
359 { "compression_level", T_INT,
490 GET_SET_ENTRY(compression_level),
360 offsetof(ZstdCompressionParametersObject, compressionLevel), READONLY,
491 GET_SET_ENTRY(window_log),
361 "compression level" },
492 GET_SET_ENTRY(hash_log),
362 { "window_log", T_UINT,
493 GET_SET_ENTRY(chain_log),
363 offsetof(ZstdCompressionParametersObject, windowLog), READONLY,
494 GET_SET_ENTRY(search_log),
364 "window log" },
495 GET_SET_ENTRY(min_match),
365 { "hash_log", T_UINT,
496 GET_SET_ENTRY(target_length),
366 offsetof(ZstdCompressionParametersObject, hashLog), READONLY,
497 GET_SET_ENTRY(compression_strategy),
367 "hash log" },
498 GET_SET_ENTRY(write_content_size),
368 { "chain_log", T_UINT,
499 GET_SET_ENTRY(write_checksum),
369 offsetof(ZstdCompressionParametersObject, chainLog), READONLY,
500 GET_SET_ENTRY(write_dict_id),
370 "chain log" },
501 GET_SET_ENTRY(threads),
371 { "search_log", T_UINT,
502 GET_SET_ENTRY(job_size),
372 offsetof(ZstdCompressionParametersObject, searchLog), READONLY,
503 GET_SET_ENTRY(overlap_log),
373 "search log" },
504 /* TODO remove this deprecated attribute */
374 { "min_match", T_UINT,
505 { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
375 offsetof(ZstdCompressionParametersObject, minMatch), READONLY,
506 GET_SET_ENTRY(force_max_window),
376 "search length" },
507 GET_SET_ENTRY(enable_ldm),
377 { "target_length", T_UINT,
508 GET_SET_ENTRY(ldm_hash_log),
378 offsetof(ZstdCompressionParametersObject, targetLength), READONLY,
509 GET_SET_ENTRY(ldm_min_match),
379 "target length" },
510 GET_SET_ENTRY(ldm_bucket_size_log),
380 { "compression_strategy", T_UINT,
511 GET_SET_ENTRY(ldm_hash_rate_log),
381 offsetof(ZstdCompressionParametersObject, compressionStrategy), READONLY,
512 /* TODO remove this deprecated attribute */
382 "compression strategy" },
513 { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
383 { "write_content_size", T_UINT,
384 offsetof(ZstdCompressionParametersObject, contentSizeFlag), READONLY,
385 "whether to write content size in frames" },
386 { "write_checksum", T_UINT,
387 offsetof(ZstdCompressionParametersObject, checksumFlag), READONLY,
388 "whether to write checksum in frames" },
389 { "write_dict_id", T_UINT,
390 offsetof(ZstdCompressionParametersObject, dictIDFlag), READONLY,
391 "whether to write dictionary ID in frames" },
392 { "threads", T_UINT,
393 offsetof(ZstdCompressionParametersObject, threads), READONLY,
394 "number of threads to use" },
395 { "job_size", T_UINT,
396 offsetof(ZstdCompressionParametersObject, jobSize), READONLY,
397 "size of compression job when using multiple threads" },
398 { "overlap_size_log", T_UINT,
399 offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY,
400 "Size of previous input reloaded at the beginning of each job" },
401 { "force_max_window", T_UINT,
402 offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY,
403 "force back references to remain smaller than window size" },
404 { "enable_ldm", T_UINT,
405 offsetof(ZstdCompressionParametersObject, enableLongDistanceMatching), READONLY,
406 "whether to enable long distance matching" },
407 { "ldm_hash_log", T_UINT,
408 offsetof(ZstdCompressionParametersObject, ldmHashLog), READONLY,
409 "Size of the table for long distance matching, as a power of 2" },
410 { "ldm_min_match", T_UINT,
411 offsetof(ZstdCompressionParametersObject, ldmMinMatch), READONLY,
412 "minimum size of searched matches for long distance matcher" },
413 { "ldm_bucket_size_log", T_UINT,
414 offsetof(ZstdCompressionParametersObject, ldmBucketSizeLog), READONLY,
415 "log size of each bucket in the LDM hash table for collision resolution" },
416 { "ldm_hash_every_log", T_UINT,
417 offsetof(ZstdCompressionParametersObject, ldmHashEveryLog), READONLY,
418 "frequency of inserting/looking up entries in the LDM hash table" },
419 { NULL }
514 { NULL }
420 };
515 };
421
516
@@ -448,8 +543,8 b' PyTypeObject ZstdCompressionParametersTy'
448 0, /* tp_iter */
543 0, /* tp_iter */
449 0, /* tp_iternext */
544 0, /* tp_iternext */
450 ZstdCompressionParameters_methods, /* tp_methods */
545 ZstdCompressionParameters_methods, /* tp_methods */
451 ZstdCompressionParameters_members, /* tp_members */
546 0, /* tp_members */
452 0, /* tp_getset */
547 ZstdCompressionParameters_getset, /* tp_getset */
453 0, /* tp_base */
548 0, /* tp_base */
454 0, /* tp_dict */
549 0, /* tp_dict */
455 0, /* tp_descr_get */
550 0, /* tp_descr_get */
This diff has been collapsed as it changes many lines, (604 lines changed) Show them Hide them
@@ -128,6 +128,96 b' static PyObject* reader_tell(ZstdCompres'
128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
129 }
129 }
130
130
131 int read_compressor_input(ZstdCompressionReader* self) {
132 if (self->finishedInput) {
133 return 0;
134 }
135
136 if (self->input.pos != self->input.size) {
137 return 0;
138 }
139
140 if (self->reader) {
141 Py_buffer buffer;
142
143 assert(self->readResult == NULL);
144
145 self->readResult = PyObject_CallMethod(self->reader, "read",
146 "k", self->readSize);
147
148 if (NULL == self->readResult) {
149 return -1;
150 }
151
152 memset(&buffer, 0, sizeof(buffer));
153
154 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
155 return -1;
156 }
157
158 /* EOF */
159 if (0 == buffer.len) {
160 self->finishedInput = 1;
161 Py_CLEAR(self->readResult);
162 }
163 else {
164 self->input.src = buffer.buf;
165 self->input.size = buffer.len;
166 self->input.pos = 0;
167 }
168
169 PyBuffer_Release(&buffer);
170 }
171 else {
172 assert(self->buffer.buf);
173
174 self->input.src = self->buffer.buf;
175 self->input.size = self->buffer.len;
176 self->input.pos = 0;
177 }
178
179 return 1;
180 }
181
182 int compress_input(ZstdCompressionReader* self, ZSTD_outBuffer* output) {
183 size_t oldPos;
184 size_t zresult;
185
186 /* If we have data left over, consume it. */
187 if (self->input.pos < self->input.size) {
188 oldPos = output->pos;
189
190 Py_BEGIN_ALLOW_THREADS
191 zresult = ZSTD_compressStream2(self->compressor->cctx,
192 output, &self->input, ZSTD_e_continue);
193 Py_END_ALLOW_THREADS
194
195 self->bytesCompressed += output->pos - oldPos;
196
197 /* Input exhausted. Clear out state tracking. */
198 if (self->input.pos == self->input.size) {
199 memset(&self->input, 0, sizeof(self->input));
200 Py_CLEAR(self->readResult);
201
202 if (self->buffer.buf) {
203 self->finishedInput = 1;
204 }
205 }
206
207 if (ZSTD_isError(zresult)) {
208 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
209 return -1;
210 }
211 }
212
213 if (output->pos && output->pos == output->size) {
214 return 1;
215 }
216 else {
217 return 0;
218 }
219 }
220
131 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
221 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
132 static char* kwlist[] = {
222 static char* kwlist[] = {
133 "size",
223 "size",
@@ -140,25 +230,30 b' static PyObject* reader_read(ZstdCompres'
140 Py_ssize_t resultSize;
230 Py_ssize_t resultSize;
141 size_t zresult;
231 size_t zresult;
142 size_t oldPos;
232 size_t oldPos;
233 int readResult, compressResult;
143
234
144 if (self->closed) {
235 if (self->closed) {
145 PyErr_SetString(PyExc_ValueError, "stream is closed");
236 PyErr_SetString(PyExc_ValueError, "stream is closed");
146 return NULL;
237 return NULL;
147 }
238 }
148
239
149 if (self->finishedOutput) {
240 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
150 return PyBytes_FromStringAndSize("", 0);
151 }
152
153 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
154 return NULL;
241 return NULL;
155 }
242 }
156
243
157 if (size < 1) {
244 if (size < -1) {
158 PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
245 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
159 return NULL;
246 return NULL;
160 }
247 }
161
248
249 if (size == -1) {
250 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
251 }
252
253 if (self->finishedOutput || size == 0) {
254 return PyBytes_FromStringAndSize("", 0);
255 }
256
162 result = PyBytes_FromStringAndSize(NULL, size);
257 result = PyBytes_FromStringAndSize(NULL, size);
163 if (NULL == result) {
258 if (NULL == result) {
164 return NULL;
259 return NULL;
@@ -172,86 +267,34 b' static PyObject* reader_read(ZstdCompres'
172
267
173 readinput:
268 readinput:
174
269
175 /* If we have data left over, consume it. */
270 compressResult = compress_input(self, &self->output);
176 if (self->input.pos < self->input.size) {
177 oldPos = self->output.pos;
178
179 Py_BEGIN_ALLOW_THREADS
180 zresult = ZSTD_compress_generic(self->compressor->cctx,
181 &self->output, &self->input, ZSTD_e_continue);
182
183 Py_END_ALLOW_THREADS
184
185 self->bytesCompressed += self->output.pos - oldPos;
186
187 /* Input exhausted. Clear out state tracking. */
188 if (self->input.pos == self->input.size) {
189 memset(&self->input, 0, sizeof(self->input));
190 Py_CLEAR(self->readResult);
191
271
192 if (self->buffer.buf) {
272 if (-1 == compressResult) {
193 self->finishedInput = 1;
273 Py_XDECREF(result);
194 }
274 return NULL;
195 }
275 }
196
276 else if (0 == compressResult) {
197 if (ZSTD_isError(zresult)) {
277 /* There is room in the output. We fall through to below, which will
198 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
278 * either get more input for us or will attempt to end the stream.
199 return NULL;
279 */
200 }
280 }
201
281 else if (1 == compressResult) {
202 if (self->output.pos) {
282 memset(&self->output, 0, sizeof(self->output));
203 /* If no more room in output, emit it. */
283 return result;
204 if (self->output.pos == self->output.size) {
284 }
205 memset(&self->output, 0, sizeof(self->output));
285 else {
206 return result;
286 assert(0);
207 }
208
209 /*
210 * There is room in the output. We fall through to below, which will either
211 * get more input for us or will attempt to end the stream.
212 */
213 }
214
215 /* Fall through to gather more input. */
216 }
287 }
217
288
218 if (!self->finishedInput) {
289 readResult = read_compressor_input(self);
219 if (self->reader) {
220 Py_buffer buffer;
221
222 assert(self->readResult == NULL);
223 self->readResult = PyObject_CallMethod(self->reader, "read",
224 "k", self->readSize);
225 if (self->readResult == NULL) {
226 return NULL;
227 }
228
229 memset(&buffer, 0, sizeof(buffer));
230
231 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
232 return NULL;
233 }
234
290
235 /* EOF */
291 if (-1 == readResult) {
236 if (0 == buffer.len) {
292 return NULL;
237 self->finishedInput = 1;
293 }
238 Py_CLEAR(self->readResult);
294 else if (0 == readResult) { }
239 }
295 else if (1 == readResult) { }
240 else {
296 else {
241 self->input.src = buffer.buf;
297 assert(0);
242 self->input.size = buffer.len;
243 self->input.pos = 0;
244 }
245
246 PyBuffer_Release(&buffer);
247 }
248 else {
249 assert(self->buffer.buf);
250
251 self->input.src = self->buffer.buf;
252 self->input.size = self->buffer.len;
253 self->input.pos = 0;
254 }
255 }
298 }
256
299
257 if (self->input.size) {
300 if (self->input.size) {
@@ -261,7 +304,7 b' readinput:'
261 /* Else EOF */
304 /* Else EOF */
262 oldPos = self->output.pos;
305 oldPos = self->output.pos;
263
306
264 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
307 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
265 &self->input, ZSTD_e_end);
308 &self->input, ZSTD_e_end);
266
309
267 self->bytesCompressed += self->output.pos - oldPos;
310 self->bytesCompressed += self->output.pos - oldPos;
@@ -269,6 +312,7 b' readinput:'
269 if (ZSTD_isError(zresult)) {
312 if (ZSTD_isError(zresult)) {
270 PyErr_Format(ZstdError, "error ending compression stream: %s",
313 PyErr_Format(ZstdError, "error ending compression stream: %s",
271 ZSTD_getErrorName(zresult));
314 ZSTD_getErrorName(zresult));
315 Py_XDECREF(result);
272 return NULL;
316 return NULL;
273 }
317 }
274
318
@@ -288,9 +332,394 b' readinput:'
288 return result;
332 return result;
289 }
333 }
290
334
335 static PyObject* reader_read1(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
336 static char* kwlist[] = {
337 "size",
338 NULL
339 };
340
341 Py_ssize_t size = -1;
342 PyObject* result = NULL;
343 char* resultBuffer;
344 Py_ssize_t resultSize;
345 ZSTD_outBuffer output;
346 int compressResult;
347 size_t oldPos;
348 size_t zresult;
349
350 if (self->closed) {
351 PyErr_SetString(PyExc_ValueError, "stream is closed");
352 return NULL;
353 }
354
355 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
356 return NULL;
357 }
358
359 if (size < -1) {
360 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
361 return NULL;
362 }
363
364 if (self->finishedOutput || size == 0) {
365 return PyBytes_FromStringAndSize("", 0);
366 }
367
368 if (size == -1) {
369 size = ZSTD_CStreamOutSize();
370 }
371
372 result = PyBytes_FromStringAndSize(NULL, size);
373 if (NULL == result) {
374 return NULL;
375 }
376
377 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
378
379 output.dst = resultBuffer;
380 output.size = resultSize;
381 output.pos = 0;
382
383 /* read1() is supposed to use at most 1 read() from the underlying stream.
384 However, we can't satisfy this requirement with compression because
385 not every input will generate output. We /could/ flush the compressor,
386 but this may not be desirable. We allow multiple read() from the
387 underlying stream. But unlike read(), we return as soon as output data
388 is available.
389 */
390
391 compressResult = compress_input(self, &output);
392
393 if (-1 == compressResult) {
394 Py_XDECREF(result);
395 return NULL;
396 }
397 else if (0 == compressResult || 1 == compressResult) { }
398 else {
399 assert(0);
400 }
401
402 if (output.pos) {
403 goto finally;
404 }
405
406 while (!self->finishedInput) {
407 int readResult = read_compressor_input(self);
408
409 if (-1 == readResult) {
410 Py_XDECREF(result);
411 return NULL;
412 }
413 else if (0 == readResult || 1 == readResult) { }
414 else {
415 assert(0);
416 }
417
418 compressResult = compress_input(self, &output);
419
420 if (-1 == compressResult) {
421 Py_XDECREF(result);
422 return NULL;
423 }
424 else if (0 == compressResult || 1 == compressResult) { }
425 else {
426 assert(0);
427 }
428
429 if (output.pos) {
430 goto finally;
431 }
432 }
433
434 /* EOF */
435 oldPos = output.pos;
436
437 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
438 ZSTD_e_end);
439
440 self->bytesCompressed += output.pos - oldPos;
441
442 if (ZSTD_isError(zresult)) {
443 PyErr_Format(ZstdError, "error ending compression stream: %s",
444 ZSTD_getErrorName(zresult));
445 Py_XDECREF(result);
446 return NULL;
447 }
448
449 if (zresult == 0) {
450 self->finishedOutput = 1;
451 }
452
453 finally:
454 if (result) {
455 if (safe_pybytes_resize(&result, output.pos)) {
456 Py_XDECREF(result);
457 return NULL;
458 }
459 }
460
461 return result;
462 }
463
291 static PyObject* reader_readall(PyObject* self) {
464 static PyObject* reader_readall(PyObject* self) {
292 PyErr_SetNone(PyExc_NotImplementedError);
465 PyObject* chunks = NULL;
293 return NULL;
466 PyObject* empty = NULL;
467 PyObject* result = NULL;
468
469 /* Our strategy is to collect chunks into a list then join all the
470 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
471 * this feels simple enough to implement and avoids potentially expensive
472 * reallocations of large buffers.
473 */
474 chunks = PyList_New(0);
475 if (NULL == chunks) {
476 return NULL;
477 }
478
479 while (1) {
480 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
481 if (NULL == chunk) {
482 Py_DECREF(chunks);
483 return NULL;
484 }
485
486 if (!PyBytes_Size(chunk)) {
487 Py_DECREF(chunk);
488 break;
489 }
490
491 if (PyList_Append(chunks, chunk)) {
492 Py_DECREF(chunk);
493 Py_DECREF(chunks);
494 return NULL;
495 }
496
497 Py_DECREF(chunk);
498 }
499
500 empty = PyBytes_FromStringAndSize("", 0);
501 if (NULL == empty) {
502 Py_DECREF(chunks);
503 return NULL;
504 }
505
506 result = PyObject_CallMethod(empty, "join", "O", chunks);
507
508 Py_DECREF(empty);
509 Py_DECREF(chunks);
510
511 return result;
512 }
513
514 static PyObject* reader_readinto(ZstdCompressionReader* self, PyObject* args) {
515 Py_buffer dest;
516 ZSTD_outBuffer output;
517 int readResult, compressResult;
518 PyObject* result = NULL;
519 size_t zresult;
520 size_t oldPos;
521
522 if (self->closed) {
523 PyErr_SetString(PyExc_ValueError, "stream is closed");
524 return NULL;
525 }
526
527 if (self->finishedOutput) {
528 return PyLong_FromLong(0);
529 }
530
531 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
532 return NULL;
533 }
534
535 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
536 PyErr_SetString(PyExc_ValueError,
537 "destination buffer should be contiguous and have at most one dimension");
538 goto finally;
539 }
540
541 output.dst = dest.buf;
542 output.size = dest.len;
543 output.pos = 0;
544
545 compressResult = compress_input(self, &output);
546
547 if (-1 == compressResult) {
548 goto finally;
549 }
550 else if (0 == compressResult) { }
551 else if (1 == compressResult) {
552 result = PyLong_FromSize_t(output.pos);
553 goto finally;
554 }
555 else {
556 assert(0);
557 }
558
559 while (!self->finishedInput) {
560 readResult = read_compressor_input(self);
561
562 if (-1 == readResult) {
563 goto finally;
564 }
565 else if (0 == readResult || 1 == readResult) {}
566 else {
567 assert(0);
568 }
569
570 compressResult = compress_input(self, &output);
571
572 if (-1 == compressResult) {
573 goto finally;
574 }
575 else if (0 == compressResult) { }
576 else if (1 == compressResult) {
577 result = PyLong_FromSize_t(output.pos);
578 goto finally;
579 }
580 else {
581 assert(0);
582 }
583 }
584
585 /* EOF */
586 oldPos = output.pos;
587
588 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
589 ZSTD_e_end);
590
591 self->bytesCompressed += self->output.pos - oldPos;
592
593 if (ZSTD_isError(zresult)) {
594 PyErr_Format(ZstdError, "error ending compression stream: %s",
595 ZSTD_getErrorName(zresult));
596 goto finally;
597 }
598
599 assert(output.pos);
600
601 if (0 == zresult) {
602 self->finishedOutput = 1;
603 }
604
605 result = PyLong_FromSize_t(output.pos);
606
607 finally:
608 PyBuffer_Release(&dest);
609
610 return result;
611 }
612
613 static PyObject* reader_readinto1(ZstdCompressionReader* self, PyObject* args) {
614 Py_buffer dest;
615 PyObject* result = NULL;
616 ZSTD_outBuffer output;
617 int compressResult;
618 size_t oldPos;
619 size_t zresult;
620
621 if (self->closed) {
622 PyErr_SetString(PyExc_ValueError, "stream is closed");
623 return NULL;
624 }
625
626 if (self->finishedOutput) {
627 return PyLong_FromLong(0);
628 }
629
630 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
631 return NULL;
632 }
633
634 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
635 PyErr_SetString(PyExc_ValueError,
636 "destination buffer should be contiguous and have at most one dimension");
637 goto finally;
638 }
639
640 output.dst = dest.buf;
641 output.size = dest.len;
642 output.pos = 0;
643
644 compressResult = compress_input(self, &output);
645
646 if (-1 == compressResult) {
647 goto finally;
648 }
649 else if (0 == compressResult || 1 == compressResult) { }
650 else {
651 assert(0);
652 }
653
654 if (output.pos) {
655 result = PyLong_FromSize_t(output.pos);
656 goto finally;
657 }
658
659 while (!self->finishedInput) {
660 int readResult = read_compressor_input(self);
661
662 if (-1 == readResult) {
663 goto finally;
664 }
665 else if (0 == readResult || 1 == readResult) { }
666 else {
667 assert(0);
668 }
669
670 compressResult = compress_input(self, &output);
671
672 if (-1 == compressResult) {
673 goto finally;
674 }
675 else if (0 == compressResult) { }
676 else if (1 == compressResult) {
677 result = PyLong_FromSize_t(output.pos);
678 goto finally;
679 }
680 else {
681 assert(0);
682 }
683
684 /* If we produced output and we're not done with input, emit
685 * that output now, as we've hit restrictions of read1().
686 */
687 if (output.pos && !self->finishedInput) {
688 result = PyLong_FromSize_t(output.pos);
689 goto finally;
690 }
691
692 /* Otherwise we either have no output or we've exhausted the
693 * input. Either we try to get more input or we fall through
694 * to EOF below */
695 }
696
697 /* EOF */
698 oldPos = output.pos;
699
700 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
701 ZSTD_e_end);
702
703 self->bytesCompressed += self->output.pos - oldPos;
704
705 if (ZSTD_isError(zresult)) {
706 PyErr_Format(ZstdError, "error ending compression stream: %s",
707 ZSTD_getErrorName(zresult));
708 goto finally;
709 }
710
711 assert(output.pos);
712
713 if (0 == zresult) {
714 self->finishedOutput = 1;
715 }
716
717 result = PyLong_FromSize_t(output.pos);
718
719 finally:
720 PyBuffer_Release(&dest);
721
722 return result;
294 }
723 }
295
724
296 static PyObject* reader_iter(PyObject* self) {
725 static PyObject* reader_iter(PyObject* self) {
@@ -315,7 +744,10 b' static PyMethodDef reader_methods[] = {'
315 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
744 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
316 PyDoc_STR("Returns True") },
745 PyDoc_STR("Returns True") },
317 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
746 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
747 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL },
318 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
748 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
749 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
750 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
319 { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
751 { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
320 { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
752 { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
321 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
753 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
@@ -18,24 +18,23 b' static void ZstdCompressionWriter_deallo'
18 Py_XDECREF(self->compressor);
18 Py_XDECREF(self->compressor);
19 Py_XDECREF(self->writer);
19 Py_XDECREF(self->writer);
20
20
21 PyMem_Free(self->output.dst);
22 self->output.dst = NULL;
23
21 PyObject_Del(self);
24 PyObject_Del(self);
22 }
25 }
23
26
24 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
27 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
25 size_t zresult;
28 if (self->closed) {
29 PyErr_SetString(PyExc_ValueError, "stream is closed");
30 return NULL;
31 }
26
32
27 if (self->entered) {
33 if (self->entered) {
28 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
34 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
29 return NULL;
35 return NULL;
30 }
36 }
31
37
32 zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
33 if (ZSTD_isError(zresult)) {
34 PyErr_Format(ZstdError, "error setting source size: %s",
35 ZSTD_getErrorName(zresult));
36 return NULL;
37 }
38
39 self->entered = 1;
38 self->entered = 1;
40
39
41 Py_INCREF(self);
40 Py_INCREF(self);
@@ -46,10 +45,6 b' static PyObject* ZstdCompressionWriter_e'
46 PyObject* exc_type;
45 PyObject* exc_type;
47 PyObject* exc_value;
46 PyObject* exc_value;
48 PyObject* exc_tb;
47 PyObject* exc_tb;
49 size_t zresult;
50
51 ZSTD_outBuffer output;
52 PyObject* res;
53
48
54 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
49 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
55 return NULL;
50 return NULL;
@@ -58,46 +53,11 b' static PyObject* ZstdCompressionWriter_e'
58 self->entered = 0;
53 self->entered = 0;
59
54
60 if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
55 if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
61 ZSTD_inBuffer inBuffer;
56 PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL);
62
63 inBuffer.src = NULL;
64 inBuffer.size = 0;
65 inBuffer.pos = 0;
66
67 output.dst = PyMem_Malloc(self->outSize);
68 if (!output.dst) {
69 return PyErr_NoMemory();
70 }
71 output.size = self->outSize;
72 output.pos = 0;
73
57
74 while (1) {
58 if (NULL == result) {
75 zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &inBuffer, ZSTD_e_end);
59 return NULL;
76 if (ZSTD_isError(zresult)) {
77 PyErr_Format(ZstdError, "error ending compression stream: %s",
78 ZSTD_getErrorName(zresult));
79 PyMem_Free(output.dst);
80 return NULL;
81 }
82
83 if (output.pos) {
84 #if PY_MAJOR_VERSION >= 3
85 res = PyObject_CallMethod(self->writer, "write", "y#",
86 #else
87 res = PyObject_CallMethod(self->writer, "write", "s#",
88 #endif
89 output.dst, output.pos);
90 Py_XDECREF(res);
91 }
92
93 if (!zresult) {
94 break;
95 }
96
97 output.pos = 0;
98 }
60 }
99
100 PyMem_Free(output.dst);
101 }
61 }
102
62
103 Py_RETURN_FALSE;
63 Py_RETURN_FALSE;
@@ -117,7 +77,6 b' static PyObject* ZstdCompressionWriter_w'
117 Py_buffer source;
77 Py_buffer source;
118 size_t zresult;
78 size_t zresult;
119 ZSTD_inBuffer input;
79 ZSTD_inBuffer input;
120 ZSTD_outBuffer output;
121 PyObject* res;
80 PyObject* res;
122 Py_ssize_t totalWrite = 0;
81 Py_ssize_t totalWrite = 0;
123
82
@@ -130,143 +89,240 b' static PyObject* ZstdCompressionWriter_w'
130 return NULL;
89 return NULL;
131 }
90 }
132
91
133 if (!self->entered) {
134 PyErr_SetString(ZstdError, "compress must be called from an active context manager");
135 goto finally;
136 }
137
138 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
92 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
139 PyErr_SetString(PyExc_ValueError,
93 PyErr_SetString(PyExc_ValueError,
140 "data buffer should be contiguous and have at most one dimension");
94 "data buffer should be contiguous and have at most one dimension");
141 goto finally;
95 goto finally;
142 }
96 }
143
97
144 output.dst = PyMem_Malloc(self->outSize);
98 if (self->closed) {
145 if (!output.dst) {
99 PyErr_SetString(PyExc_ValueError, "stream is closed");
146 PyErr_NoMemory();
100 return NULL;
147 goto finally;
148 }
101 }
149 output.size = self->outSize;
102
150 output.pos = 0;
103 self->output.pos = 0;
151
104
152 input.src = source.buf;
105 input.src = source.buf;
153 input.size = source.len;
106 input.size = source.len;
154 input.pos = 0;
107 input.pos = 0;
155
108
156 while ((ssize_t)input.pos < source.len) {
109 while (input.pos < (size_t)source.len) {
157 Py_BEGIN_ALLOW_THREADS
110 Py_BEGIN_ALLOW_THREADS
158 zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_continue);
111 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
159 Py_END_ALLOW_THREADS
112 Py_END_ALLOW_THREADS
160
113
161 if (ZSTD_isError(zresult)) {
114 if (ZSTD_isError(zresult)) {
162 PyMem_Free(output.dst);
163 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
115 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
164 goto finally;
116 goto finally;
165 }
117 }
166
118
167 /* Copy data from output buffer to writer. */
119 /* Copy data from output buffer to writer. */
168 if (output.pos) {
120 if (self->output.pos) {
169 #if PY_MAJOR_VERSION >= 3
121 #if PY_MAJOR_VERSION >= 3
170 res = PyObject_CallMethod(self->writer, "write", "y#",
122 res = PyObject_CallMethod(self->writer, "write", "y#",
171 #else
123 #else
172 res = PyObject_CallMethod(self->writer, "write", "s#",
124 res = PyObject_CallMethod(self->writer, "write", "s#",
173 #endif
125 #endif
174 output.dst, output.pos);
126 self->output.dst, self->output.pos);
175 Py_XDECREF(res);
127 Py_XDECREF(res);
176 totalWrite += output.pos;
128 totalWrite += self->output.pos;
177 self->bytesCompressed += output.pos;
129 self->bytesCompressed += self->output.pos;
178 }
130 }
179 output.pos = 0;
131 self->output.pos = 0;
180 }
132 }
181
133
182 PyMem_Free(output.dst);
134 if (self->writeReturnRead) {
183
135 result = PyLong_FromSize_t(input.pos);
184 result = PyLong_FromSsize_t(totalWrite);
136 }
137 else {
138 result = PyLong_FromSsize_t(totalWrite);
139 }
185
140
186 finally:
141 finally:
187 PyBuffer_Release(&source);
142 PyBuffer_Release(&source);
188 return result;
143 return result;
189 }
144 }
190
145
191 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
146 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
147 static char* kwlist[] = {
148 "flush_mode",
149 NULL
150 };
151
192 size_t zresult;
152 size_t zresult;
193 ZSTD_outBuffer output;
194 ZSTD_inBuffer input;
153 ZSTD_inBuffer input;
195 PyObject* res;
154 PyObject* res;
196 Py_ssize_t totalWrite = 0;
155 Py_ssize_t totalWrite = 0;
156 unsigned flush_mode = 0;
157 ZSTD_EndDirective flush;
197
158
198 if (!self->entered) {
159 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush",
199 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
160 kwlist, &flush_mode)) {
200 return NULL;
161 return NULL;
201 }
162 }
202
163
164 switch (flush_mode) {
165 case 0:
166 flush = ZSTD_e_flush;
167 break;
168 case 1:
169 flush = ZSTD_e_end;
170 break;
171 default:
172 PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
173 return NULL;
174 }
175
176 if (self->closed) {
177 PyErr_SetString(PyExc_ValueError, "stream is closed");
178 return NULL;
179 }
180
181 self->output.pos = 0;
182
203 input.src = NULL;
183 input.src = NULL;
204 input.size = 0;
184 input.size = 0;
205 input.pos = 0;
185 input.pos = 0;
206
186
207 output.dst = PyMem_Malloc(self->outSize);
208 if (!output.dst) {
209 return PyErr_NoMemory();
210 }
211 output.size = self->outSize;
212 output.pos = 0;
213
214 while (1) {
187 while (1) {
215 Py_BEGIN_ALLOW_THREADS
188 Py_BEGIN_ALLOW_THREADS
216 zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_flush);
189 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush);
217 Py_END_ALLOW_THREADS
190 Py_END_ALLOW_THREADS
218
191
219 if (ZSTD_isError(zresult)) {
192 if (ZSTD_isError(zresult)) {
220 PyMem_Free(output.dst);
221 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
193 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
222 return NULL;
194 return NULL;
223 }
195 }
224
196
225 /* Copy data from output buffer to writer. */
197 /* Copy data from output buffer to writer. */
226 if (output.pos) {
198 if (self->output.pos) {
227 #if PY_MAJOR_VERSION >= 3
199 #if PY_MAJOR_VERSION >= 3
228 res = PyObject_CallMethod(self->writer, "write", "y#",
200 res = PyObject_CallMethod(self->writer, "write", "y#",
229 #else
201 #else
230 res = PyObject_CallMethod(self->writer, "write", "s#",
202 res = PyObject_CallMethod(self->writer, "write", "s#",
231 #endif
203 #endif
232 output.dst, output.pos);
204 self->output.dst, self->output.pos);
233 Py_XDECREF(res);
205 Py_XDECREF(res);
234 totalWrite += output.pos;
206 totalWrite += self->output.pos;
235 self->bytesCompressed += output.pos;
207 self->bytesCompressed += self->output.pos;
236 }
208 }
237
209
238 output.pos = 0;
210 self->output.pos = 0;
239
211
240 if (!zresult) {
212 if (!zresult) {
241 break;
213 break;
242 }
214 }
243 }
215 }
244
216
245 PyMem_Free(output.dst);
217 return PyLong_FromSsize_t(totalWrite);
218 }
219
220 static PyObject* ZstdCompressionWriter_close(ZstdCompressionWriter* self) {
221 PyObject* result;
222
223 if (self->closed) {
224 Py_RETURN_NONE;
225 }
226
227 result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1);
228 self->closed = 1;
229
230 if (NULL == result) {
231 return NULL;
232 }
246
233
247 return PyLong_FromSsize_t(totalWrite);
234 /* Call close on underlying stream as well. */
235 if (PyObject_HasAttrString(self->writer, "close")) {
236 return PyObject_CallMethod(self->writer, "close", NULL);
237 }
238
239 Py_RETURN_NONE;
240 }
241
242 static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) {
243 if (PyObject_HasAttrString(self->writer, "fileno")) {
244 return PyObject_CallMethod(self->writer, "fileno", NULL);
245 }
246 else {
247 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
248 return NULL;
249 }
248 }
250 }
249
251
250 static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
252 static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
251 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
253 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
252 }
254 }
253
255
256 static PyObject* ZstdCompressionWriter_writelines(PyObject* self, PyObject* args) {
257 PyErr_SetNone(PyExc_NotImplementedError);
258 return NULL;
259 }
260
261 static PyObject* ZstdCompressionWriter_false(PyObject* self, PyObject* args) {
262 Py_RETURN_FALSE;
263 }
264
265 static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) {
266 Py_RETURN_TRUE;
267 }
268
269 static PyObject* ZstdCompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
270 PyObject* iomod;
271 PyObject* exc;
272
273 iomod = PyImport_ImportModule("io");
274 if (NULL == iomod) {
275 return NULL;
276 }
277
278 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
279 if (NULL == exc) {
280 Py_DECREF(iomod);
281 return NULL;
282 }
283
284 PyErr_SetNone(exc);
285 Py_DECREF(exc);
286 Py_DECREF(iomod);
287
288 return NULL;
289 }
290
254 static PyMethodDef ZstdCompressionWriter_methods[] = {
291 static PyMethodDef ZstdCompressionWriter_methods[] = {
255 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
292 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
256 PyDoc_STR("Enter a compression context.") },
293 PyDoc_STR("Enter a compression context.") },
257 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
294 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
258 PyDoc_STR("Exit a compression context.") },
295 PyDoc_STR("Exit a compression context.") },
296 { "close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL },
297 { "fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL },
298 { "isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
299 { "readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
300 { "readline", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
301 { "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
302 { "seek", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
303 { "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL },
304 { "truncate", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
305 { "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL },
306 { "writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL },
307 { "read", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
308 { "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
309 { "readinto", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
259 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
310 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
260 PyDoc_STR("Obtain the memory size of the underlying compressor") },
311 PyDoc_STR("Obtain the memory size of the underlying compressor") },
261 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
312 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
262 PyDoc_STR("Compress data") },
313 PyDoc_STR("Compress data") },
263 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
314 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS,
264 PyDoc_STR("Flush data and finish a zstd frame") },
315 PyDoc_STR("Flush data and finish a zstd frame") },
265 { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
316 { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
266 PyDoc_STR("Returns current number of bytes compressed") },
317 PyDoc_STR("Returns current number of bytes compressed") },
267 { NULL, NULL }
318 { NULL, NULL }
268 };
319 };
269
320
321 static PyMemberDef ZstdCompressionWriter_members[] = {
322 { "closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL },
323 { NULL }
324 };
325
270 PyTypeObject ZstdCompressionWriterType = {
326 PyTypeObject ZstdCompressionWriterType = {
271 PyVarObject_HEAD_INIT(NULL, 0)
327 PyVarObject_HEAD_INIT(NULL, 0)
272 "zstd.ZstdCompressionWriter", /* tp_name */
328 "zstd.ZstdCompressionWriter", /* tp_name */
@@ -296,7 +352,7 b' PyTypeObject ZstdCompressionWriterType ='
296 0, /* tp_iter */
352 0, /* tp_iter */
297 0, /* tp_iternext */
353 0, /* tp_iternext */
298 ZstdCompressionWriter_methods, /* tp_methods */
354 ZstdCompressionWriter_methods, /* tp_methods */
299 0, /* tp_members */
355 ZstdCompressionWriter_members, /* tp_members */
300 0, /* tp_getset */
356 0, /* tp_getset */
301 0, /* tp_base */
357 0, /* tp_base */
302 0, /* tp_dict */
358 0, /* tp_dict */
@@ -59,9 +59,9 b' static PyObject* ZstdCompressionObj_comp'
59 input.size = source.len;
59 input.size = source.len;
60 input.pos = 0;
60 input.pos = 0;
61
61
62 while ((ssize_t)input.pos < source.len) {
62 while (input.pos < (size_t)source.len) {
63 Py_BEGIN_ALLOW_THREADS
63 Py_BEGIN_ALLOW_THREADS
64 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
64 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
65 &input, ZSTD_e_continue);
65 &input, ZSTD_e_continue);
66 Py_END_ALLOW_THREADS
66 Py_END_ALLOW_THREADS
67
67
@@ -154,7 +154,7 b' static PyObject* ZstdCompressionObj_flus'
154
154
155 while (1) {
155 while (1) {
156 Py_BEGIN_ALLOW_THREADS
156 Py_BEGIN_ALLOW_THREADS
157 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
157 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
158 &input, zFlushMode);
158 &input, zFlushMode);
159 Py_END_ALLOW_THREADS
159 Py_END_ALLOW_THREADS
160
160
@@ -204,27 +204,27 b' static int ZstdCompressor_init(ZstdCompr'
204 }
204 }
205 }
205 }
206 else {
206 else {
207 if (set_parameter(self->params, ZSTD_p_compressionLevel, level)) {
207 if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) {
208 return -1;
208 return -1;
209 }
209 }
210
210
211 if (set_parameter(self->params, ZSTD_p_contentSizeFlag,
211 if (set_parameter(self->params, ZSTD_c_contentSizeFlag,
212 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
212 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
213 return -1;
213 return -1;
214 }
214 }
215
215
216 if (set_parameter(self->params, ZSTD_p_checksumFlag,
216 if (set_parameter(self->params, ZSTD_c_checksumFlag,
217 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
217 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
218 return -1;
218 return -1;
219 }
219 }
220
220
221 if (set_parameter(self->params, ZSTD_p_dictIDFlag,
221 if (set_parameter(self->params, ZSTD_c_dictIDFlag,
222 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
222 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
223 return -1;
223 return -1;
224 }
224 }
225
225
226 if (threads) {
226 if (threads) {
227 if (set_parameter(self->params, ZSTD_p_nbWorkers, threads)) {
227 if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) {
228 return -1;
228 return -1;
229 }
229 }
230 }
230 }
@@ -344,7 +344,7 b' static PyObject* ZstdCompressor_copy_str'
344 return NULL;
344 return NULL;
345 }
345 }
346
346
347 ZSTD_CCtx_reset(self->cctx);
347 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
348
348
349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
350 if (ZSTD_isError(zresult)) {
350 if (ZSTD_isError(zresult)) {
@@ -391,7 +391,7 b' static PyObject* ZstdCompressor_copy_str'
391
391
392 while (input.pos < input.size) {
392 while (input.pos < input.size) {
393 Py_BEGIN_ALLOW_THREADS
393 Py_BEGIN_ALLOW_THREADS
394 zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_continue);
394 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_continue);
395 Py_END_ALLOW_THREADS
395 Py_END_ALLOW_THREADS
396
396
397 if (ZSTD_isError(zresult)) {
397 if (ZSTD_isError(zresult)) {
@@ -421,7 +421,7 b' static PyObject* ZstdCompressor_copy_str'
421
421
422 while (1) {
422 while (1) {
423 Py_BEGIN_ALLOW_THREADS
423 Py_BEGIN_ALLOW_THREADS
424 zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_end);
424 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end);
425 Py_END_ALLOW_THREADS
425 Py_END_ALLOW_THREADS
426
426
427 if (ZSTD_isError(zresult)) {
427 if (ZSTD_isError(zresult)) {
@@ -517,7 +517,7 b' static ZstdCompressionReader* ZstdCompre'
517 goto except;
517 goto except;
518 }
518 }
519
519
520 ZSTD_CCtx_reset(self->cctx);
520 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
521
521
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
523 if (ZSTD_isError(zresult)) {
523 if (ZSTD_isError(zresult)) {
@@ -577,7 +577,7 b' static PyObject* ZstdCompressor_compress'
577 goto finally;
577 goto finally;
578 }
578 }
579
579
580 ZSTD_CCtx_reset(self->cctx);
580 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
581
581
582 destSize = ZSTD_compressBound(source.len);
582 destSize = ZSTD_compressBound(source.len);
583 output = PyBytes_FromStringAndSize(NULL, destSize);
583 output = PyBytes_FromStringAndSize(NULL, destSize);
@@ -605,7 +605,7 b' static PyObject* ZstdCompressor_compress'
605 /* By avoiding ZSTD_compress(), we don't necessarily write out content
605 /* By avoiding ZSTD_compress(), we don't necessarily write out content
606 size. This means the argument to ZstdCompressor to control frame
606 size. This means the argument to ZstdCompressor to control frame
607 parameters is honored. */
607 parameters is honored. */
608 zresult = ZSTD_compress_generic(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
608 zresult = ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
609 Py_END_ALLOW_THREADS
609 Py_END_ALLOW_THREADS
610
610
611 if (ZSTD_isError(zresult)) {
611 if (ZSTD_isError(zresult)) {
@@ -651,7 +651,7 b' static ZstdCompressionObj* ZstdCompresso'
651 return NULL;
651 return NULL;
652 }
652 }
653
653
654 ZSTD_CCtx_reset(self->cctx);
654 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
655
655
656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
657 if (ZSTD_isError(zresult)) {
657 if (ZSTD_isError(zresult)) {
@@ -740,7 +740,7 b' static ZstdCompressorIterator* ZstdCompr'
740 goto except;
740 goto except;
741 }
741 }
742
742
743 ZSTD_CCtx_reset(self->cctx);
743 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
744
744
745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
746 if (ZSTD_isError(zresult)) {
746 if (ZSTD_isError(zresult)) {
@@ -794,16 +794,19 b' static ZstdCompressionWriter* ZstdCompre'
794 "writer",
794 "writer",
795 "size",
795 "size",
796 "write_size",
796 "write_size",
797 "write_return_read",
797 NULL
798 NULL
798 };
799 };
799
800
800 PyObject* writer;
801 PyObject* writer;
801 ZstdCompressionWriter* result;
802 ZstdCompressionWriter* result;
803 size_t zresult;
802 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
804 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
803 size_t outSize = ZSTD_CStreamOutSize();
805 size_t outSize = ZSTD_CStreamOutSize();
806 PyObject* writeReturnRead = NULL;
804
807
805 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_writer", kwlist,
808 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_writer", kwlist,
806 &writer, &sourceSize, &outSize)) {
809 &writer, &sourceSize, &outSize, &writeReturnRead)) {
807 return NULL;
810 return NULL;
808 }
811 }
809
812
@@ -812,22 +815,38 b' static ZstdCompressionWriter* ZstdCompre'
812 return NULL;
815 return NULL;
813 }
816 }
814
817
815 ZSTD_CCtx_reset(self->cctx);
818 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
819
820 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
821 if (ZSTD_isError(zresult)) {
822 PyErr_Format(ZstdError, "error setting source size: %s",
823 ZSTD_getErrorName(zresult));
824 return NULL;
825 }
816
826
817 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
827 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
818 if (!result) {
828 if (!result) {
819 return NULL;
829 return NULL;
820 }
830 }
821
831
832 result->output.dst = PyMem_Malloc(outSize);
833 if (!result->output.dst) {
834 Py_DECREF(result);
835 return (ZstdCompressionWriter*)PyErr_NoMemory();
836 }
837
838 result->output.pos = 0;
839 result->output.size = outSize;
840
822 result->compressor = self;
841 result->compressor = self;
823 Py_INCREF(result->compressor);
842 Py_INCREF(result->compressor);
824
843
825 result->writer = writer;
844 result->writer = writer;
826 Py_INCREF(result->writer);
845 Py_INCREF(result->writer);
827
846
828 result->sourceSize = sourceSize;
829 result->outSize = outSize;
847 result->outSize = outSize;
830 result->bytesCompressed = 0;
848 result->bytesCompressed = 0;
849 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
831
850
832 return result;
851 return result;
833 }
852 }
@@ -853,7 +872,7 b' static ZstdCompressionChunker* ZstdCompr'
853 return NULL;
872 return NULL;
854 }
873 }
855
874
856 ZSTD_CCtx_reset(self->cctx);
875 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
857
876
858 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
877 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
859 if (ZSTD_isError(zresult)) {
878 if (ZSTD_isError(zresult)) {
@@ -1115,7 +1134,7 b' static void compress_worker(WorkerState*'
1115 break;
1134 break;
1116 }
1135 }
1117
1136
1118 zresult = ZSTD_compress_generic(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
1137 zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
1119 if (ZSTD_isError(zresult)) {
1138 if (ZSTD_isError(zresult)) {
1120 state->error = WorkerError_zstd;
1139 state->error = WorkerError_zstd;
1121 state->zresult = zresult;
1140 state->zresult = zresult;
@@ -57,7 +57,7 b' feedcompressor:'
57 /* If we have data left in the input, consume it. */
57 /* If we have data left in the input, consume it. */
58 if (self->input.pos < self->input.size) {
58 if (self->input.pos < self->input.size) {
59 Py_BEGIN_ALLOW_THREADS
59 Py_BEGIN_ALLOW_THREADS
60 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
60 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
61 &self->input, ZSTD_e_continue);
61 &self->input, ZSTD_e_continue);
62 Py_END_ALLOW_THREADS
62 Py_END_ALLOW_THREADS
63
63
@@ -127,7 +127,7 b' feedcompressor:'
127 self->input.size = 0;
127 self->input.size = 0;
128 self->input.pos = 0;
128 self->input.pos = 0;
129
129
130 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
130 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
131 &self->input, ZSTD_e_end);
131 &self->input, ZSTD_e_end);
132 if (ZSTD_isError(zresult)) {
132 if (ZSTD_isError(zresult)) {
133 PyErr_Format(ZstdError, "error ending compression stream: %s",
133 PyErr_Format(ZstdError, "error ending compression stream: %s",
@@ -152,7 +152,7 b' feedcompressor:'
152 self->input.pos = 0;
152 self->input.pos = 0;
153
153
154 Py_BEGIN_ALLOW_THREADS
154 Py_BEGIN_ALLOW_THREADS
155 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
155 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
156 &self->input, ZSTD_e_continue);
156 &self->input, ZSTD_e_continue);
157 Py_END_ALLOW_THREADS
157 Py_END_ALLOW_THREADS
158
158
@@ -32,6 +32,9 b' void constants_module_init(PyObject* mod'
32 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
32 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
33 PyModule_AddObject(mod, "ZstdError", ZstdError);
33 PyModule_AddObject(mod, "ZstdError", ZstdError);
34
34
35 PyModule_AddIntConstant(mod, "FLUSH_BLOCK", 0);
36 PyModule_AddIntConstant(mod, "FLUSH_FRAME", 1);
37
35 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
38 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
36 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
39 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
37
40
@@ -77,8 +80,11 b' void constants_module_init(PyObject* mod'
77 PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX);
80 PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX);
78 PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
81 PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
79 PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
82 PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
80 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
83 PyModule_AddIntConstant(mod, "MINMATCH_MIN", ZSTD_MINMATCH_MIN);
81 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
84 PyModule_AddIntConstant(mod, "MINMATCH_MAX", ZSTD_MINMATCH_MAX);
85 /* TODO SEARCHLENGTH_* is deprecated. */
86 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_MINMATCH_MIN);
87 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_MINMATCH_MAX);
82 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
88 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
83 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
89 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
84 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
90 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
@@ -93,6 +99,7 b' void constants_module_init(PyObject* mod'
93 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
99 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
94 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
100 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
95 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
101 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
102 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA2", ZSTD_btultra2);
96
103
97 PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
104 PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
98 PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
105 PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
This diff has been collapsed as it changes many lines, (511 lines changed) Show them Hide them
@@ -102,6 +102,114 b' static PyObject* reader_isatty(PyObject*'
102 Py_RETURN_FALSE;
102 Py_RETURN_FALSE;
103 }
103 }
104
104
105 /**
106 * Read available input.
107 *
108 * Returns 0 if no data was added to input.
109 * Returns 1 if new input data is available.
110 * Returns -1 on error and sets a Python exception as a side-effect.
111 */
112 int read_decompressor_input(ZstdDecompressionReader* self) {
113 if (self->finishedInput) {
114 return 0;
115 }
116
117 if (self->input.pos != self->input.size) {
118 return 0;
119 }
120
121 if (self->reader) {
122 Py_buffer buffer;
123
124 assert(self->readResult == NULL);
125 self->readResult = PyObject_CallMethod(self->reader, "read",
126 "k", self->readSize);
127 if (NULL == self->readResult) {
128 return -1;
129 }
130
131 memset(&buffer, 0, sizeof(buffer));
132
133 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
134 return -1;
135 }
136
137 /* EOF */
138 if (0 == buffer.len) {
139 self->finishedInput = 1;
140 Py_CLEAR(self->readResult);
141 }
142 else {
143 self->input.src = buffer.buf;
144 self->input.size = buffer.len;
145 self->input.pos = 0;
146 }
147
148 PyBuffer_Release(&buffer);
149 }
150 else {
151 assert(self->buffer.buf);
152 /*
153 * We should only get here once since expectation is we always
154 * exhaust input buffer before reading again.
155 */
156 assert(self->input.src == NULL);
157
158 self->input.src = self->buffer.buf;
159 self->input.size = self->buffer.len;
160 self->input.pos = 0;
161 }
162
163 return 1;
164 }
165
166 /**
167 * Decompresses available input into an output buffer.
168 *
169 * Returns 0 if we need more input.
170 * Returns 1 if output buffer should be emitted.
171 * Returns -1 on error and sets a Python exception.
172 */
173 int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
174 size_t zresult;
175
176 if (self->input.pos >= self->input.size) {
177 return 0;
178 }
179
180 Py_BEGIN_ALLOW_THREADS
181 zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
182 Py_END_ALLOW_THREADS
183
184 /* Input exhausted. Clear our state tracking. */
185 if (self->input.pos == self->input.size) {
186 memset(&self->input, 0, sizeof(self->input));
187 Py_CLEAR(self->readResult);
188
189 if (self->buffer.buf) {
190 self->finishedInput = 1;
191 }
192 }
193
194 if (ZSTD_isError(zresult)) {
195 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
196 return -1;
197 }
198
199 /* We fulfilled the full read request. Signal to emit. */
200 if (output->pos && output->pos == output->size) {
201 return 1;
202 }
203 /* We're at the end of a frame and we aren't allowed to return data
204 spanning frames. */
205 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
206 return 1;
207 }
208
209 /* There is more room in the output. Signal to collect more data. */
210 return 0;
211 }
212
105 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
213 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
106 static char* kwlist[] = {
214 static char* kwlist[] = {
107 "size",
215 "size",
@@ -113,26 +221,30 b' static PyObject* reader_read(ZstdDecompr'
113 char* resultBuffer;
221 char* resultBuffer;
114 Py_ssize_t resultSize;
222 Py_ssize_t resultSize;
115 ZSTD_outBuffer output;
223 ZSTD_outBuffer output;
116 size_t zresult;
224 int decompressResult, readResult;
117
225
118 if (self->closed) {
226 if (self->closed) {
119 PyErr_SetString(PyExc_ValueError, "stream is closed");
227 PyErr_SetString(PyExc_ValueError, "stream is closed");
120 return NULL;
228 return NULL;
121 }
229 }
122
230
123 if (self->finishedOutput) {
231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
124 return PyBytes_FromStringAndSize("", 0);
125 }
126
127 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
128 return NULL;
232 return NULL;
129 }
233 }
130
234
131 if (size < 1) {
235 if (size < -1) {
132 PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
236 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
133 return NULL;
237 return NULL;
134 }
238 }
135
239
240 if (size == -1) {
241 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
242 }
243
244 if (self->finishedOutput || size == 0) {
245 return PyBytes_FromStringAndSize("", 0);
246 }
247
136 result = PyBytes_FromStringAndSize(NULL, size);
248 result = PyBytes_FromStringAndSize(NULL, size);
137 if (NULL == result) {
249 if (NULL == result) {
138 return NULL;
250 return NULL;
@@ -146,85 +258,38 b' static PyObject* reader_read(ZstdDecompr'
146
258
147 readinput:
259 readinput:
148
260
149 /* Consume input data left over from last time. */
261 decompressResult = decompress_input(self, &output);
150 if (self->input.pos < self->input.size) {
151 Py_BEGIN_ALLOW_THREADS
152 zresult = ZSTD_decompress_generic(self->decompressor->dctx,
153 &output, &self->input);
154 Py_END_ALLOW_THREADS
155
262
156 /* Input exhausted. Clear our state tracking. */
263 if (-1 == decompressResult) {
157 if (self->input.pos == self->input.size) {
264 Py_XDECREF(result);
158 memset(&self->input, 0, sizeof(self->input));
265 return NULL;
159 Py_CLEAR(self->readResult);
266 }
267 else if (0 == decompressResult) { }
268 else if (1 == decompressResult) {
269 self->bytesDecompressed += output.pos;
160
270
161 if (self->buffer.buf) {
271 if (output.pos != output.size) {
162 self->finishedInput = 1;
272 if (safe_pybytes_resize(&result, output.pos)) {
273 Py_XDECREF(result);
274 return NULL;
163 }
275 }
164 }
276 }
165
277 return result;
166 if (ZSTD_isError(zresult)) {
278 }
167 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
279 else {
168 return NULL;
280 assert(0);
169 }
170 else if (0 == zresult) {
171 self->finishedOutput = 1;
172 }
173
174 /* We fulfilled the full read request. Emit it. */
175 if (output.pos && output.pos == output.size) {
176 self->bytesDecompressed += output.size;
177 return result;
178 }
179
180 /*
181 * There is more room in the output. Fall through to try to collect
182 * more data so we can try to fill the output.
183 */
184 }
281 }
185
282
186 if (!self->finishedInput) {
283 readResult = read_decompressor_input(self);
187 if (self->reader) {
188 Py_buffer buffer;
189
190 assert(self->readResult == NULL);
191 self->readResult = PyObject_CallMethod(self->reader, "read",
192 "k", self->readSize);
193 if (NULL == self->readResult) {
194 return NULL;
195 }
196
197 memset(&buffer, 0, sizeof(buffer));
198
199 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
200 return NULL;
201 }
202
284
203 /* EOF */
285 if (-1 == readResult) {
204 if (0 == buffer.len) {
286 Py_XDECREF(result);
205 self->finishedInput = 1;
287 return NULL;
206 Py_CLEAR(self->readResult);
288 }
207 }
289 else if (0 == readResult) {}
208 else {
290 else if (1 == readResult) {}
209 self->input.src = buffer.buf;
291 else {
210 self->input.size = buffer.len;
292 assert(0);
211 self->input.pos = 0;
212 }
213
214 PyBuffer_Release(&buffer);
215 }
216 else {
217 assert(self->buffer.buf);
218 /*
219 * We should only get here once since above block will exhaust
220 * source buffer until finishedInput is set.
221 */
222 assert(self->input.src == NULL);
223
224 self->input.src = self->buffer.buf;
225 self->input.size = self->buffer.len;
226 self->input.pos = 0;
227 }
228 }
293 }
229
294
230 if (self->input.size) {
295 if (self->input.size) {
@@ -242,18 +307,288 b' readinput:'
242 return result;
307 return result;
243 }
308 }
244
309
310 static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
311 static char* kwlist[] = {
312 "size",
313 NULL
314 };
315
316 Py_ssize_t size = -1;
317 PyObject* result = NULL;
318 char* resultBuffer;
319 Py_ssize_t resultSize;
320 ZSTD_outBuffer output;
321
322 if (self->closed) {
323 PyErr_SetString(PyExc_ValueError, "stream is closed");
324 return NULL;
325 }
326
327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
328 return NULL;
329 }
330
331 if (size < -1) {
332 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
333 return NULL;
334 }
335
336 if (self->finishedOutput || size == 0) {
337 return PyBytes_FromStringAndSize("", 0);
338 }
339
340 if (size == -1) {
341 size = ZSTD_DStreamOutSize();
342 }
343
344 result = PyBytes_FromStringAndSize(NULL, size);
345 if (NULL == result) {
346 return NULL;
347 }
348
349 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
350
351 output.dst = resultBuffer;
352 output.size = resultSize;
353 output.pos = 0;
354
355 /* read1() is supposed to use at most 1 read() from the underlying stream.
356 * However, we can't satisfy this requirement with decompression due to the
357 * nature of how decompression works. Our strategy is to read + decompress
358 * until we get any output, at which point we return. This satisfies the
359 * intent of the read1() API to limit read operations.
360 */
361 while (!self->finishedInput) {
362 int readResult, decompressResult;
363
364 readResult = read_decompressor_input(self);
365 if (-1 == readResult) {
366 Py_XDECREF(result);
367 return NULL;
368 }
369 else if (0 == readResult || 1 == readResult) { }
370 else {
371 assert(0);
372 }
373
374 decompressResult = decompress_input(self, &output);
375
376 if (-1 == decompressResult) {
377 Py_XDECREF(result);
378 return NULL;
379 }
380 else if (0 == decompressResult || 1 == decompressResult) { }
381 else {
382 assert(0);
383 }
384
385 if (output.pos) {
386 break;
387 }
388 }
389
390 self->bytesDecompressed += output.pos;
391 if (safe_pybytes_resize(&result, output.pos)) {
392 Py_XDECREF(result);
393 return NULL;
394 }
395
396 return result;
397 }
398
399 static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
400 Py_buffer dest;
401 ZSTD_outBuffer output;
402 int decompressResult, readResult;
403 PyObject* result = NULL;
404
405 if (self->closed) {
406 PyErr_SetString(PyExc_ValueError, "stream is closed");
407 return NULL;
408 }
409
410 if (self->finishedOutput) {
411 return PyLong_FromLong(0);
412 }
413
414 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
415 return NULL;
416 }
417
418 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
419 PyErr_SetString(PyExc_ValueError,
420 "destination buffer should be contiguous and have at most one dimension");
421 goto finally;
422 }
423
424 output.dst = dest.buf;
425 output.size = dest.len;
426 output.pos = 0;
427
428 readinput:
429
430 decompressResult = decompress_input(self, &output);
431
432 if (-1 == decompressResult) {
433 goto finally;
434 }
435 else if (0 == decompressResult) { }
436 else if (1 == decompressResult) {
437 self->bytesDecompressed += output.pos;
438 result = PyLong_FromSize_t(output.pos);
439 goto finally;
440 }
441 else {
442 assert(0);
443 }
444
445 readResult = read_decompressor_input(self);
446
447 if (-1 == readResult) {
448 goto finally;
449 }
450 else if (0 == readResult) {}
451 else if (1 == readResult) {}
452 else {
453 assert(0);
454 }
455
456 if (self->input.size) {
457 goto readinput;
458 }
459
460 /* EOF */
461 self->bytesDecompressed += output.pos;
462 result = PyLong_FromSize_t(output.pos);
463
464 finally:
465 PyBuffer_Release(&dest);
466
467 return result;
468 }
469
470 static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
471 Py_buffer dest;
472 ZSTD_outBuffer output;
473 PyObject* result = NULL;
474
475 if (self->closed) {
476 PyErr_SetString(PyExc_ValueError, "stream is closed");
477 return NULL;
478 }
479
480 if (self->finishedOutput) {
481 return PyLong_FromLong(0);
482 }
483
484 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
485 return NULL;
486 }
487
488 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
489 PyErr_SetString(PyExc_ValueError,
490 "destination buffer should be contiguous and have at most one dimension");
491 goto finally;
492 }
493
494 output.dst = dest.buf;
495 output.size = dest.len;
496 output.pos = 0;
497
498 while (!self->finishedInput && !self->finishedOutput) {
499 int decompressResult, readResult;
500
501 readResult = read_decompressor_input(self);
502
503 if (-1 == readResult) {
504 goto finally;
505 }
506 else if (0 == readResult || 1 == readResult) {}
507 else {
508 assert(0);
509 }
510
511 decompressResult = decompress_input(self, &output);
512
513 if (-1 == decompressResult) {
514 goto finally;
515 }
516 else if (0 == decompressResult || 1 == decompressResult) {}
517 else {
518 assert(0);
519 }
520
521 if (output.pos) {
522 break;
523 }
524 }
525
526 self->bytesDecompressed += output.pos;
527 result = PyLong_FromSize_t(output.pos);
528
529 finally:
530 PyBuffer_Release(&dest);
531
532 return result;
533 }
534
245 static PyObject* reader_readall(PyObject* self) {
535 static PyObject* reader_readall(PyObject* self) {
246 PyErr_SetNone(PyExc_NotImplementedError);
536 PyObject* chunks = NULL;
247 return NULL;
537 PyObject* empty = NULL;
538 PyObject* result = NULL;
539
540 /* Our strategy is to collect chunks into a list then join all the
541 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
542 * this feels simple enough to implement and avoids potentially expensive
543 * reallocations of large buffers.
544 */
545 chunks = PyList_New(0);
546 if (NULL == chunks) {
547 return NULL;
548 }
549
550 while (1) {
551 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
552 if (NULL == chunk) {
553 Py_DECREF(chunks);
554 return NULL;
555 }
556
557 if (!PyBytes_Size(chunk)) {
558 Py_DECREF(chunk);
559 break;
560 }
561
562 if (PyList_Append(chunks, chunk)) {
563 Py_DECREF(chunk);
564 Py_DECREF(chunks);
565 return NULL;
566 }
567
568 Py_DECREF(chunk);
569 }
570
571 empty = PyBytes_FromStringAndSize("", 0);
572 if (NULL == empty) {
573 Py_DECREF(chunks);
574 return NULL;
575 }
576
577 result = PyObject_CallMethod(empty, "join", "O", chunks);
578
579 Py_DECREF(empty);
580 Py_DECREF(chunks);
581
582 return result;
248 }
583 }
249
584
250 static PyObject* reader_readline(PyObject* self) {
585 static PyObject* reader_readline(PyObject* self) {
251 PyErr_SetNone(PyExc_NotImplementedError);
586 set_unsupported_operation();
252 return NULL;
587 return NULL;
253 }
588 }
254
589
255 static PyObject* reader_readlines(PyObject* self) {
590 static PyObject* reader_readlines(PyObject* self) {
256 PyErr_SetNone(PyExc_NotImplementedError);
591 set_unsupported_operation();
257 return NULL;
592 return NULL;
258 }
593 }
259
594
@@ -345,12 +680,12 b' static PyObject* reader_writelines(PyObj'
345 }
680 }
346
681
347 static PyObject* reader_iter(PyObject* self) {
682 static PyObject* reader_iter(PyObject* self) {
348 PyErr_SetNone(PyExc_NotImplementedError);
683 set_unsupported_operation();
349 return NULL;
684 return NULL;
350 }
685 }
351
686
352 static PyObject* reader_iternext(PyObject* self) {
687 static PyObject* reader_iternext(PyObject* self) {
353 PyErr_SetNone(PyExc_NotImplementedError);
688 set_unsupported_operation();
354 return NULL;
689 return NULL;
355 }
690 }
356
691
@@ -367,6 +702,10 b' static PyMethodDef reader_methods[] = {'
367 PyDoc_STR("Returns True") },
702 PyDoc_STR("Returns True") },
368 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
703 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
369 PyDoc_STR("read compressed data") },
704 PyDoc_STR("read compressed data") },
705 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
706 PyDoc_STR("read compressed data") },
707 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
708 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
370 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
709 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
371 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
710 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
372 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
711 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
@@ -22,12 +22,13 b' static void ZstdDecompressionWriter_deal'
22 }
22 }
23
23
24 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
24 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
25 if (self->entered) {
25 if (self->closed) {
26 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
26 PyErr_SetString(PyExc_ValueError, "stream is closed");
27 return NULL;
27 return NULL;
28 }
28 }
29
29
30 if (ensure_dctx(self->decompressor, 1)) {
30 if (self->entered) {
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
31 return NULL;
32 return NULL;
32 }
33 }
33
34
@@ -40,6 +41,10 b' static PyObject* ZstdDecompressionWriter'
40 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
41 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
41 self->entered = 0;
42 self->entered = 0;
42
43
44 if (NULL == PyObject_CallMethod((PyObject*)self, "close", NULL)) {
45 return NULL;
46 }
47
43 Py_RETURN_FALSE;
48 Py_RETURN_FALSE;
44 }
49 }
45
50
@@ -76,9 +81,9 b' static PyObject* ZstdDecompressionWriter'
76 goto finally;
81 goto finally;
77 }
82 }
78
83
79 if (!self->entered) {
84 if (self->closed) {
80 PyErr_SetString(ZstdError, "write must be called from an active context manager");
85 PyErr_SetString(PyExc_ValueError, "stream is closed");
81 goto finally;
86 return NULL;
82 }
87 }
83
88
84 output.dst = PyMem_Malloc(self->outSize);
89 output.dst = PyMem_Malloc(self->outSize);
@@ -93,9 +98,9 b' static PyObject* ZstdDecompressionWriter'
93 input.size = source.len;
98 input.size = source.len;
94 input.pos = 0;
99 input.pos = 0;
95
100
96 while ((ssize_t)input.pos < source.len) {
101 while (input.pos < (size_t)source.len) {
97 Py_BEGIN_ALLOW_THREADS
102 Py_BEGIN_ALLOW_THREADS
98 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
103 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
99 Py_END_ALLOW_THREADS
104 Py_END_ALLOW_THREADS
100
105
101 if (ZSTD_isError(zresult)) {
106 if (ZSTD_isError(zresult)) {
@@ -120,13 +125,94 b' static PyObject* ZstdDecompressionWriter'
120
125
121 PyMem_Free(output.dst);
126 PyMem_Free(output.dst);
122
127
123 result = PyLong_FromSsize_t(totalWrite);
128 if (self->writeReturnRead) {
129 result = PyLong_FromSize_t(input.pos);
130 }
131 else {
132 result = PyLong_FromSsize_t(totalWrite);
133 }
124
134
125 finally:
135 finally:
126 PyBuffer_Release(&source);
136 PyBuffer_Release(&source);
127 return result;
137 return result;
128 }
138 }
129
139
140 static PyObject* ZstdDecompressionWriter_close(ZstdDecompressionWriter* self) {
141 PyObject* result;
142
143 if (self->closed) {
144 Py_RETURN_NONE;
145 }
146
147 result = PyObject_CallMethod((PyObject*)self, "flush", NULL);
148 self->closed = 1;
149
150 if (NULL == result) {
151 return NULL;
152 }
153
154 /* Call close on underlying stream as well. */
155 if (PyObject_HasAttrString(self->writer, "close")) {
156 return PyObject_CallMethod(self->writer, "close", NULL);
157 }
158
159 Py_RETURN_NONE;
160 }
161
162 static PyObject* ZstdDecompressionWriter_fileno(ZstdDecompressionWriter* self) {
163 if (PyObject_HasAttrString(self->writer, "fileno")) {
164 return PyObject_CallMethod(self->writer, "fileno", NULL);
165 }
166 else {
167 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
168 return NULL;
169 }
170 }
171
172 static PyObject* ZstdDecompressionWriter_flush(ZstdDecompressionWriter* self) {
173 if (self->closed) {
174 PyErr_SetString(PyExc_ValueError, "stream is closed");
175 return NULL;
176 }
177
178 if (PyObject_HasAttrString(self->writer, "flush")) {
179 return PyObject_CallMethod(self->writer, "flush", NULL);
180 }
181 else {
182 Py_RETURN_NONE;
183 }
184 }
185
186 static PyObject* ZstdDecompressionWriter_false(PyObject* self, PyObject* args) {
187 Py_RETURN_FALSE;
188 }
189
190 static PyObject* ZstdDecompressionWriter_true(PyObject* self, PyObject* args) {
191 Py_RETURN_TRUE;
192 }
193
194 static PyObject* ZstdDecompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
195 PyObject* iomod;
196 PyObject* exc;
197
198 iomod = PyImport_ImportModule("io");
199 if (NULL == iomod) {
200 return NULL;
201 }
202
203 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
204 if (NULL == exc) {
205 Py_DECREF(iomod);
206 return NULL;
207 }
208
209 PyErr_SetNone(exc);
210 Py_DECREF(exc);
211 Py_DECREF(iomod);
212
213 return NULL;
214 }
215
130 static PyMethodDef ZstdDecompressionWriter_methods[] = {
216 static PyMethodDef ZstdDecompressionWriter_methods[] = {
131 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
217 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
132 PyDoc_STR("Enter a decompression context.") },
218 PyDoc_STR("Enter a decompression context.") },
@@ -134,11 +220,32 b' static PyMethodDef ZstdDecompressionWrit'
134 PyDoc_STR("Exit a decompression context.") },
220 PyDoc_STR("Exit a decompression context.") },
135 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
221 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
136 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
222 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
223 { "close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL },
224 { "fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL },
225 { "flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL },
226 { "isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
227 { "readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
228 { "readline", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
229 { "readlines", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
230 { "seek", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
231 { "seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
232 { "tell", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
233 { "truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
234 { "writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL },
235 { "writelines" , (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
236 { "read", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
237 { "readall", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
238 { "readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
137 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
239 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
138 PyDoc_STR("Compress data") },
240 PyDoc_STR("Compress data") },
139 { NULL, NULL }
241 { NULL, NULL }
140 };
242 };
141
243
244 static PyMemberDef ZstdDecompressionWriter_members[] = {
245 { "closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY, NULL },
246 { NULL }
247 };
248
142 PyTypeObject ZstdDecompressionWriterType = {
249 PyTypeObject ZstdDecompressionWriterType = {
143 PyVarObject_HEAD_INIT(NULL, 0)
250 PyVarObject_HEAD_INIT(NULL, 0)
144 "zstd.ZstdDecompressionWriter", /* tp_name */
251 "zstd.ZstdDecompressionWriter", /* tp_name */
@@ -168,7 +275,7 b' PyTypeObject ZstdDecompressionWriterType'
168 0, /* tp_iter */
275 0, /* tp_iter */
169 0, /* tp_iternext */
276 0, /* tp_iternext */
170 ZstdDecompressionWriter_methods,/* tp_methods */
277 ZstdDecompressionWriter_methods,/* tp_methods */
171 0, /* tp_members */
278 ZstdDecompressionWriter_members,/* tp_members */
172 0, /* tp_getset */
279 0, /* tp_getset */
173 0, /* tp_base */
280 0, /* tp_base */
174 0, /* tp_dict */
281 0, /* tp_dict */
@@ -75,7 +75,7 b' static PyObject* DecompressionObj_decomp'
75
75
76 while (1) {
76 while (1) {
77 Py_BEGIN_ALLOW_THREADS
77 Py_BEGIN_ALLOW_THREADS
78 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
78 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
79 Py_END_ALLOW_THREADS
79 Py_END_ALLOW_THREADS
80
80
81 if (ZSTD_isError(zresult)) {
81 if (ZSTD_isError(zresult)) {
@@ -130,9 +130,26 b' finally:'
130 return result;
130 return result;
131 }
131 }
132
132
133 static PyObject* DecompressionObj_flush(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
134 static char* kwlist[] = {
135 "length",
136 NULL
137 };
138
139 PyObject* length = NULL;
140
141 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:flush", kwlist, &length)) {
142 return NULL;
143 }
144
145 Py_RETURN_NONE;
146 }
147
133 static PyMethodDef DecompressionObj_methods[] = {
148 static PyMethodDef DecompressionObj_methods[] = {
134 { "decompress", (PyCFunction)DecompressionObj_decompress,
149 { "decompress", (PyCFunction)DecompressionObj_decompress,
135 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
150 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
151 { "flush", (PyCFunction)DecompressionObj_flush,
152 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("no-op") },
136 { NULL, NULL }
153 { NULL, NULL }
137 };
154 };
138
155
@@ -17,7 +17,7 b' extern PyObject* ZstdError;'
17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
18 size_t zresult;
18 size_t zresult;
19
19
20 ZSTD_DCtx_reset(decompressor->dctx);
20 ZSTD_DCtx_reset(decompressor->dctx, ZSTD_reset_session_only);
21
21
22 if (decompressor->maxWindowSize) {
22 if (decompressor->maxWindowSize) {
23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
@@ -229,7 +229,7 b' static PyObject* Decompressor_copy_strea'
229
229
230 while (input.pos < input.size) {
230 while (input.pos < input.size) {
231 Py_BEGIN_ALLOW_THREADS
231 Py_BEGIN_ALLOW_THREADS
232 zresult = ZSTD_decompress_generic(self->dctx, &output, &input);
232 zresult = ZSTD_decompressStream(self->dctx, &output, &input);
233 Py_END_ALLOW_THREADS
233 Py_END_ALLOW_THREADS
234
234
235 if (ZSTD_isError(zresult)) {
235 if (ZSTD_isError(zresult)) {
@@ -379,7 +379,7 b' PyObject* Decompressor_decompress(ZstdDe'
379 inBuffer.pos = 0;
379 inBuffer.pos = 0;
380
380
381 Py_BEGIN_ALLOW_THREADS
381 Py_BEGIN_ALLOW_THREADS
382 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
382 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
383 Py_END_ALLOW_THREADS
383 Py_END_ALLOW_THREADS
384
384
385 if (ZSTD_isError(zresult)) {
385 if (ZSTD_isError(zresult)) {
@@ -550,28 +550,35 b' finally:'
550 }
550 }
551
551
552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
553 "stream_reader(source, [read_size=default])\n"
553 "stream_reader(source, [read_size=default, [read_across_frames=False]])\n"
554 "\n"
554 "\n"
555 "Obtain an object that behaves like an I/O stream that can be used for\n"
555 "Obtain an object that behaves like an I/O stream that can be used for\n"
556 "reading decompressed output from an object.\n"
556 "reading decompressed output from an object.\n"
557 "\n"
557 "\n"
558 "The source object can be any object with a ``read(size)`` method or that\n"
558 "The source object can be any object with a ``read(size)`` method or that\n"
559 "conforms to the buffer protocol.\n"
559 "conforms to the buffer protocol.\n"
560 "\n"
561 "``read_across_frames`` controls the behavior of ``read()`` when the end\n"
562 "of a zstd frame is reached. When ``True``, ``read()`` can potentially\n"
563 "return data belonging to multiple zstd frames. When ``False``, ``read()``\n"
564 "will return when the end of a frame is reached.\n"
560 );
565 );
561
566
562 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
567 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
563 static char* kwlist[] = {
568 static char* kwlist[] = {
564 "source",
569 "source",
565 "read_size",
570 "read_size",
571 "read_across_frames",
566 NULL
572 NULL
567 };
573 };
568
574
569 PyObject* source;
575 PyObject* source;
570 size_t readSize = ZSTD_DStreamInSize();
576 size_t readSize = ZSTD_DStreamInSize();
577 PyObject* readAcrossFrames = NULL;
571 ZstdDecompressionReader* result;
578 ZstdDecompressionReader* result;
572
579
573 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_reader", kwlist,
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_reader", kwlist,
574 &source, &readSize)) {
581 &source, &readSize, &readAcrossFrames)) {
575 return NULL;
582 return NULL;
576 }
583 }
577
584
@@ -604,6 +611,7 b' static ZstdDecompressionReader* Decompre'
604
611
605 result->decompressor = self;
612 result->decompressor = self;
606 Py_INCREF(self);
613 Py_INCREF(self);
614 result->readAcrossFrames = readAcrossFrames ? PyObject_IsTrue(readAcrossFrames) : 0;
607
615
608 return result;
616 return result;
609 }
617 }
@@ -625,15 +633,17 b' static ZstdDecompressionWriter* Decompre'
625 static char* kwlist[] = {
633 static char* kwlist[] = {
626 "writer",
634 "writer",
627 "write_size",
635 "write_size",
636 "write_return_read",
628 NULL
637 NULL
629 };
638 };
630
639
631 PyObject* writer;
640 PyObject* writer;
632 size_t outSize = ZSTD_DStreamOutSize();
641 size_t outSize = ZSTD_DStreamOutSize();
642 PyObject* writeReturnRead = NULL;
633 ZstdDecompressionWriter* result;
643 ZstdDecompressionWriter* result;
634
644
635 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_writer", kwlist,
645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_writer", kwlist,
636 &writer, &outSize)) {
646 &writer, &outSize, &writeReturnRead)) {
637 return NULL;
647 return NULL;
638 }
648 }
639
649
@@ -642,6 +652,10 b' static ZstdDecompressionWriter* Decompre'
642 return NULL;
652 return NULL;
643 }
653 }
644
654
655 if (ensure_dctx(self, 1)) {
656 return NULL;
657 }
658
645 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
659 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
646 if (!result) {
660 if (!result) {
647 return NULL;
661 return NULL;
@@ -654,6 +668,7 b' static ZstdDecompressionWriter* Decompre'
654 Py_INCREF(result->writer);
668 Py_INCREF(result->writer);
655
669
656 result->outSize = outSize;
670 result->outSize = outSize;
671 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
657
672
658 return result;
673 return result;
659 }
674 }
@@ -756,7 +771,7 b' static PyObject* Decompressor_decompress'
756 inBuffer.pos = 0;
771 inBuffer.pos = 0;
757
772
758 Py_BEGIN_ALLOW_THREADS
773 Py_BEGIN_ALLOW_THREADS
759 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
774 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
760 Py_END_ALLOW_THREADS
775 Py_END_ALLOW_THREADS
761 if (ZSTD_isError(zresult)) {
776 if (ZSTD_isError(zresult)) {
762 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
777 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
@@ -852,7 +867,7 b' static PyObject* Decompressor_decompress'
852 outBuffer.pos = 0;
867 outBuffer.pos = 0;
853
868
854 Py_BEGIN_ALLOW_THREADS
869 Py_BEGIN_ALLOW_THREADS
855 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
870 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
856 Py_END_ALLOW_THREADS
871 Py_END_ALLOW_THREADS
857 if (ZSTD_isError(zresult)) {
872 if (ZSTD_isError(zresult)) {
858 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
873 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
@@ -892,7 +907,7 b' static PyObject* Decompressor_decompress'
892 outBuffer.pos = 0;
907 outBuffer.pos = 0;
893
908
894 Py_BEGIN_ALLOW_THREADS
909 Py_BEGIN_ALLOW_THREADS
895 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
910 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
896 Py_END_ALLOW_THREADS
911 Py_END_ALLOW_THREADS
897 if (ZSTD_isError(zresult)) {
912 if (ZSTD_isError(zresult)) {
898 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
913 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
@@ -1176,7 +1191,7 b' static void decompress_worker(WorkerStat'
1176 inBuffer.size = sourceSize;
1191 inBuffer.size = sourceSize;
1177 inBuffer.pos = 0;
1192 inBuffer.pos = 0;
1178
1193
1179 zresult = ZSTD_decompress_generic(state->dctx, &outBuffer, &inBuffer);
1194 zresult = ZSTD_decompressStream(state->dctx, &outBuffer, &inBuffer);
1180 if (ZSTD_isError(zresult)) {
1195 if (ZSTD_isError(zresult)) {
1181 state->error = WorkerError_zstd;
1196 state->error = WorkerError_zstd;
1182 state->zresult = zresult;
1197 state->zresult = zresult;
@@ -57,7 +57,7 b' static DecompressorIteratorResult read_d'
57 self->output.pos = 0;
57 self->output.pos = 0;
58
58
59 Py_BEGIN_ALLOW_THREADS
59 Py_BEGIN_ALLOW_THREADS
60 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &self->output, &self->input);
60 zresult = ZSTD_decompressStream(self->decompressor->dctx, &self->output, &self->input);
61 Py_END_ALLOW_THREADS
61 Py_END_ALLOW_THREADS
62
62
63 /* We're done with the pointer. Nullify to prevent anyone from getting a
63 /* We're done with the pointer. Nullify to prevent anyone from getting a
@@ -16,7 +16,7 b''
16 #include <zdict.h>
16 #include <zdict.h>
17
17
18 /* Remember to change the string in zstandard/__init__ as well */
18 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.10.1"
19 #define PYTHON_ZSTANDARD_VERSION "0.11.0"
20
20
21 typedef enum {
21 typedef enum {
22 compressorobj_flush_finish,
22 compressorobj_flush_finish,
@@ -31,27 +31,6 b' typedef enum {'
31 typedef struct {
31 typedef struct {
32 PyObject_HEAD
32 PyObject_HEAD
33 ZSTD_CCtx_params* params;
33 ZSTD_CCtx_params* params;
34 unsigned format;
35 int compressionLevel;
36 unsigned windowLog;
37 unsigned hashLog;
38 unsigned chainLog;
39 unsigned searchLog;
40 unsigned minMatch;
41 unsigned targetLength;
42 unsigned compressionStrategy;
43 unsigned contentSizeFlag;
44 unsigned checksumFlag;
45 unsigned dictIDFlag;
46 unsigned threads;
47 unsigned jobSize;
48 unsigned overlapSizeLog;
49 unsigned forceMaxWindow;
50 unsigned enableLongDistanceMatching;
51 unsigned ldmHashLog;
52 unsigned ldmMinMatch;
53 unsigned ldmBucketSizeLog;
54 unsigned ldmHashEveryLog;
55 } ZstdCompressionParametersObject;
34 } ZstdCompressionParametersObject;
56
35
57 extern PyTypeObject ZstdCompressionParametersType;
36 extern PyTypeObject ZstdCompressionParametersType;
@@ -129,9 +108,11 b' typedef struct {'
129
108
130 ZstdCompressor* compressor;
109 ZstdCompressor* compressor;
131 PyObject* writer;
110 PyObject* writer;
132 unsigned long long sourceSize;
111 ZSTD_outBuffer output;
133 size_t outSize;
112 size_t outSize;
134 int entered;
113 int entered;
114 int closed;
115 int writeReturnRead;
135 unsigned long long bytesCompressed;
116 unsigned long long bytesCompressed;
136 } ZstdCompressionWriter;
117 } ZstdCompressionWriter;
137
118
@@ -235,6 +216,8 b' typedef struct {'
235 PyObject* reader;
216 PyObject* reader;
236 /* Size for read() operations on reader. */
217 /* Size for read() operations on reader. */
237 size_t readSize;
218 size_t readSize;
219 /* Whether a read() can return data spanning multiple zstd frames. */
220 int readAcrossFrames;
238 /* Buffer to read from (if reading from a buffer). */
221 /* Buffer to read from (if reading from a buffer). */
239 Py_buffer buffer;
222 Py_buffer buffer;
240
223
@@ -267,6 +250,8 b' typedef struct {'
267 PyObject* writer;
250 PyObject* writer;
268 size_t outSize;
251 size_t outSize;
269 int entered;
252 int entered;
253 int closed;
254 int writeReturnRead;
270 } ZstdDecompressionWriter;
255 } ZstdDecompressionWriter;
271
256
272 extern PyTypeObject ZstdDecompressionWriterType;
257 extern PyTypeObject ZstdDecompressionWriterType;
@@ -360,8 +345,9 b' typedef struct {'
360
345
361 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
346 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
362
347
363 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
348 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
364 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
349 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
350 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
365 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
351 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
366 int ensure_ddict(ZstdCompressionDict* dict);
352 int ensure_ddict(ZstdCompressionDict* dict);
367 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
353 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
@@ -36,7 +36,9 b" SOURCES = ['zstd/%s' % p for p in ("
36 'compress/zstd_opt.c',
36 'compress/zstd_opt.c',
37 'compress/zstdmt_compress.c',
37 'compress/zstdmt_compress.c',
38 'decompress/huf_decompress.c',
38 'decompress/huf_decompress.c',
39 'decompress/zstd_ddict.c',
39 'decompress/zstd_decompress.c',
40 'decompress/zstd_decompress.c',
41 'decompress/zstd_decompress_block.c',
40 'dictBuilder/cover.c',
42 'dictBuilder/cover.c',
41 'dictBuilder/fastcover.c',
43 'dictBuilder/fastcover.c',
42 'dictBuilder/divsufsort.c',
44 'dictBuilder/divsufsort.c',
@@ -5,12 +5,32 b''
5 # This software may be modified and distributed under the terms
5 # This software may be modified and distributed under the terms
6 # of the BSD license. See the LICENSE file for details.
6 # of the BSD license. See the LICENSE file for details.
7
7
8 from __future__ import print_function
9
10 from distutils.version import LooseVersion
8 import os
11 import os
9 import sys
12 import sys
10 from setuptools import setup
13 from setuptools import setup
11
14
15 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
16 # (like memoryview).
17 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
18 # garbage collection pitfalls.
19 MINIMUM_CFFI_VERSION = '1.11'
20
12 try:
21 try:
13 import cffi
22 import cffi
23
24 # PyPy (and possibly other distros) have CFFI distributed as part of
25 # them. The install_requires for CFFI below won't work. We need to sniff
26 # out the CFFI version here and reject CFFI if it is too old.
27 cffi_version = LooseVersion(cffi.__version__)
28 if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION):
29 print('CFFI 1.11 or newer required (%s found); '
30 'not building CFFI backend' % cffi_version,
31 file=sys.stderr)
32 cffi = None
33
14 except ImportError:
34 except ImportError:
15 cffi = None
35 cffi = None
16
36
@@ -49,12 +69,7 b' install_requires = []'
49 if cffi:
69 if cffi:
50 import make_cffi
70 import make_cffi
51 extensions.append(make_cffi.ffi.distutils_extension())
71 extensions.append(make_cffi.ffi.distutils_extension())
52
72 install_requires.append('cffi>=%s' % MINIMUM_CFFI_VERSION)
53 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
54 # (like memoryview).
55 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
56 # garbage collection pitfalls.
57 install_requires.append('cffi>=1.11')
58
73
59 version = None
74 version = None
60
75
@@ -88,6 +103,7 b' setup('
88 'Programming Language :: Python :: 3.4',
103 'Programming Language :: Python :: 3.4',
89 'Programming Language :: Python :: 3.5',
104 'Programming Language :: Python :: 3.5',
90 'Programming Language :: Python :: 3.6',
105 'Programming Language :: Python :: 3.6',
106 'Programming Language :: Python :: 3.7',
91 ],
107 ],
92 keywords='zstandard zstd compression',
108 keywords='zstandard zstd compression',
93 packages=['zstandard'],
109 packages=['zstandard'],
@@ -30,7 +30,9 b" zstd_sources = ['zstd/%s' % p for p in ("
30 'compress/zstd_opt.c',
30 'compress/zstd_opt.c',
31 'compress/zstdmt_compress.c',
31 'compress/zstdmt_compress.c',
32 'decompress/huf_decompress.c',
32 'decompress/huf_decompress.c',
33 'decompress/zstd_ddict.c',
33 'decompress/zstd_decompress.c',
34 'decompress/zstd_decompress.c',
35 'decompress/zstd_decompress_block.c',
34 'dictBuilder/cover.c',
36 'dictBuilder/cover.c',
35 'dictBuilder/divsufsort.c',
37 'dictBuilder/divsufsort.c',
36 'dictBuilder/fastcover.c',
38 'dictBuilder/fastcover.c',
@@ -79,12 +79,37 b' def make_cffi(cls):'
79 return cls
79 return cls
80
80
81
81
82 class OpCountingBytesIO(io.BytesIO):
82 class NonClosingBytesIO(io.BytesIO):
83 """BytesIO that saves the underlying buffer on close().
84
85 This allows us to access written data after close().
86 """
83 def __init__(self, *args, **kwargs):
87 def __init__(self, *args, **kwargs):
88 super(NonClosingBytesIO, self).__init__(*args, **kwargs)
89 self._saved_buffer = None
90
91 def close(self):
92 self._saved_buffer = self.getvalue()
93 return super(NonClosingBytesIO, self).close()
94
95 def getvalue(self):
96 if self.closed:
97 return self._saved_buffer
98 else:
99 return super(NonClosingBytesIO, self).getvalue()
100
101
102 class OpCountingBytesIO(NonClosingBytesIO):
103 def __init__(self, *args, **kwargs):
104 self._flush_count = 0
84 self._read_count = 0
105 self._read_count = 0
85 self._write_count = 0
106 self._write_count = 0
86 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
107 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
87
108
109 def flush(self):
110 self._flush_count += 1
111 return super(OpCountingBytesIO, self).flush()
112
88 def read(self, *args):
113 def read(self, *args):
89 self._read_count += 1
114 self._read_count += 1
90 return super(OpCountingBytesIO, self).read(*args)
115 return super(OpCountingBytesIO, self).read(*args)
@@ -117,6 +142,13 b' def random_input_data():'
117 except OSError:
142 except OSError:
118 pass
143 pass
119
144
145 # Also add some actual random data.
146 _source_files.append(os.urandom(100))
147 _source_files.append(os.urandom(1000))
148 _source_files.append(os.urandom(10000))
149 _source_files.append(os.urandom(100000))
150 _source_files.append(os.urandom(1000000))
151
120 return _source_files
152 return _source_files
121
153
122
154
@@ -140,12 +172,14 b' def generate_samples():'
140
172
141
173
142 if hypothesis:
174 if hypothesis:
143 default_settings = hypothesis.settings()
175 default_settings = hypothesis.settings(deadline=10000)
144 hypothesis.settings.register_profile('default', default_settings)
176 hypothesis.settings.register_profile('default', default_settings)
145
177
146 ci_settings = hypothesis.settings(max_examples=2500,
178 ci_settings = hypothesis.settings(deadline=20000, max_examples=1000)
147 max_iterations=2500)
148 hypothesis.settings.register_profile('ci', ci_settings)
179 hypothesis.settings.register_profile('ci', ci_settings)
149
180
181 expensive_settings = hypothesis.settings(deadline=None, max_examples=10000)
182 hypothesis.settings.register_profile('expensive', expensive_settings)
183
150 hypothesis.settings.load_profile(
184 hypothesis.settings.load_profile(
151 os.environ.get('HYPOTHESIS_PROFILE', 'default'))
185 os.environ.get('HYPOTHESIS_PROFILE', 'default'))
@@ -8,6 +8,9 b" ss = struct.Struct('=QQ')"
8
8
9 class TestBufferWithSegments(unittest.TestCase):
9 class TestBufferWithSegments(unittest.TestCase):
10 def test_arguments(self):
10 def test_arguments(self):
11 if not hasattr(zstd, 'BufferWithSegments'):
12 self.skipTest('BufferWithSegments not available')
13
11 with self.assertRaises(TypeError):
14 with self.assertRaises(TypeError):
12 zstd.BufferWithSegments()
15 zstd.BufferWithSegments()
13
16
@@ -19,10 +22,16 b' class TestBufferWithSegments(unittest.Te'
19 zstd.BufferWithSegments(b'foo', b'\x00\x00')
22 zstd.BufferWithSegments(b'foo', b'\x00\x00')
20
23
21 def test_invalid_offset(self):
24 def test_invalid_offset(self):
25 if not hasattr(zstd, 'BufferWithSegments'):
26 self.skipTest('BufferWithSegments not available')
27
22 with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
28 with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
23 zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
29 zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
24
30
25 def test_invalid_getitem(self):
31 def test_invalid_getitem(self):
32 if not hasattr(zstd, 'BufferWithSegments'):
33 self.skipTest('BufferWithSegments not available')
34
26 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
35 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
27
36
28 with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
37 with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
@@ -35,6 +44,9 b' class TestBufferWithSegments(unittest.Te'
35 test = b[2]
44 test = b[2]
36
45
37 def test_single(self):
46 def test_single(self):
47 if not hasattr(zstd, 'BufferWithSegments'):
48 self.skipTest('BufferWithSegments not available')
49
38 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
50 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
39 self.assertEqual(len(b), 1)
51 self.assertEqual(len(b), 1)
40 self.assertEqual(b.size, 3)
52 self.assertEqual(b.size, 3)
@@ -45,6 +57,9 b' class TestBufferWithSegments(unittest.Te'
45 self.assertEqual(b[0].tobytes(), b'foo')
57 self.assertEqual(b[0].tobytes(), b'foo')
46
58
47 def test_multiple(self):
59 def test_multiple(self):
60 if not hasattr(zstd, 'BufferWithSegments'):
61 self.skipTest('BufferWithSegments not available')
62
48 b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
63 b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
49 ss.pack(3, 4),
64 ss.pack(3, 4),
50 ss.pack(7, 5)]))
65 ss.pack(7, 5)]))
@@ -59,10 +74,16 b' class TestBufferWithSegments(unittest.Te'
59
74
60 class TestBufferWithSegmentsCollection(unittest.TestCase):
75 class TestBufferWithSegmentsCollection(unittest.TestCase):
61 def test_empty_constructor(self):
76 def test_empty_constructor(self):
77 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
78 self.skipTest('BufferWithSegmentsCollection not available')
79
62 with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
80 with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
63 zstd.BufferWithSegmentsCollection()
81 zstd.BufferWithSegmentsCollection()
64
82
65 def test_argument_validation(self):
83 def test_argument_validation(self):
84 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
85 self.skipTest('BufferWithSegmentsCollection not available')
86
66 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
87 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
67 zstd.BufferWithSegmentsCollection(None)
88 zstd.BufferWithSegmentsCollection(None)
68
89
@@ -74,6 +95,9 b' class TestBufferWithSegmentsCollection(u'
74 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
95 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
75
96
76 def test_length(self):
97 def test_length(self):
98 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
99 self.skipTest('BufferWithSegmentsCollection not available')
100
77 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
101 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
78 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
102 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
79 ss.pack(3, 3)]))
103 ss.pack(3, 3)]))
@@ -91,6 +115,9 b' class TestBufferWithSegmentsCollection(u'
91 self.assertEqual(c.size(), 9)
115 self.assertEqual(c.size(), 9)
92
116
93 def test_getitem(self):
117 def test_getitem(self):
118 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
119 self.skipTest('BufferWithSegmentsCollection not available')
120
94 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
121 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
95 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
122 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
96 ss.pack(3, 3)]))
123 ss.pack(3, 3)]))
@@ -1,14 +1,17 b''
1 import hashlib
1 import hashlib
2 import io
2 import io
3 import os
3 import struct
4 import struct
4 import sys
5 import sys
5 import tarfile
6 import tarfile
7 import tempfile
6 import unittest
8 import unittest
7
9
8 import zstandard as zstd
10 import zstandard as zstd
9
11
10 from .common import (
12 from .common import (
11 make_cffi,
13 make_cffi,
14 NonClosingBytesIO,
12 OpCountingBytesIO,
15 OpCountingBytesIO,
13 )
16 )
14
17
@@ -272,7 +275,7 b' class TestCompressor_compressobj(unittes'
272
275
273 params = zstd.get_frame_parameters(result)
276 params = zstd.get_frame_parameters(result)
274 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
277 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
275 self.assertEqual(params.window_size, 1048576)
278 self.assertEqual(params.window_size, 2097152)
276 self.assertEqual(params.dict_id, 0)
279 self.assertEqual(params.dict_id, 0)
277 self.assertFalse(params.has_checksum)
280 self.assertFalse(params.has_checksum)
278
281
@@ -321,7 +324,7 b' class TestCompressor_compressobj(unittes'
321 cobj.compress(b'foo')
324 cobj.compress(b'foo')
322 cobj.flush()
325 cobj.flush()
323
326
324 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'):
327 with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'):
325 cobj.compress(b'foo')
328 cobj.compress(b'foo')
326
329
327 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
330 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
@@ -453,7 +456,7 b' class TestCompressor_copy_stream(unittes'
453
456
454 params = zstd.get_frame_parameters(dest.getvalue())
457 params = zstd.get_frame_parameters(dest.getvalue())
455 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
458 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
456 self.assertEqual(params.window_size, 1048576)
459 self.assertEqual(params.window_size, 2097152)
457 self.assertEqual(params.dict_id, 0)
460 self.assertEqual(params.dict_id, 0)
458 self.assertFalse(params.has_checksum)
461 self.assertFalse(params.has_checksum)
459
462
@@ -605,10 +608,6 b' class TestCompressor_stream_reader(unitt'
605 with self.assertRaises(io.UnsupportedOperation):
608 with self.assertRaises(io.UnsupportedOperation):
606 reader.readlines()
609 reader.readlines()
607
610
608 # This could probably be implemented someday.
609 with self.assertRaises(NotImplementedError):
610 reader.readall()
611
612 with self.assertRaises(io.UnsupportedOperation):
611 with self.assertRaises(io.UnsupportedOperation):
613 iter(reader)
612 iter(reader)
614
613
@@ -644,15 +643,16 b' class TestCompressor_stream_reader(unitt'
644 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
643 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
645 reader.read(10)
644 reader.read(10)
646
645
647 def test_read_bad_size(self):
646 def test_read_sizes(self):
648 cctx = zstd.ZstdCompressor()
647 cctx = zstd.ZstdCompressor()
648 foo = cctx.compress(b'foo')
649
649
650 with cctx.stream_reader(b'foo') as reader:
650 with cctx.stream_reader(b'foo') as reader:
651 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
651 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
652 reader.read(-1)
652 reader.read(-2)
653
653
654 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
654 self.assertEqual(reader.read(0), b'')
655 reader.read(0)
655 self.assertEqual(reader.read(), foo)
656
656
657 def test_read_buffer(self):
657 def test_read_buffer(self):
658 cctx = zstd.ZstdCompressor()
658 cctx = zstd.ZstdCompressor()
@@ -746,11 +746,202 b' class TestCompressor_stream_reader(unitt'
746 with cctx.stream_reader(source, size=42):
746 with cctx.stream_reader(source, size=42):
747 pass
747 pass
748
748
749 def test_readall(self):
750 cctx = zstd.ZstdCompressor()
751 frame = cctx.compress(b'foo' * 1024)
752
753 reader = cctx.stream_reader(b'foo' * 1024)
754 self.assertEqual(reader.readall(), frame)
755
756 def test_readinto(self):
757 cctx = zstd.ZstdCompressor()
758 foo = cctx.compress(b'foo')
759
760 reader = cctx.stream_reader(b'foo')
761 with self.assertRaises(Exception):
762 reader.readinto(b'foobar')
763
764 # readinto() with sufficiently large destination.
765 b = bytearray(1024)
766 reader = cctx.stream_reader(b'foo')
767 self.assertEqual(reader.readinto(b), len(foo))
768 self.assertEqual(b[0:len(foo)], foo)
769 self.assertEqual(reader.readinto(b), 0)
770 self.assertEqual(b[0:len(foo)], foo)
771
772 # readinto() with small reads.
773 b = bytearray(1024)
774 reader = cctx.stream_reader(b'foo', read_size=1)
775 self.assertEqual(reader.readinto(b), len(foo))
776 self.assertEqual(b[0:len(foo)], foo)
777
778 # Too small destination buffer.
779 b = bytearray(2)
780 reader = cctx.stream_reader(b'foo')
781 self.assertEqual(reader.readinto(b), 2)
782 self.assertEqual(b[:], foo[0:2])
783 self.assertEqual(reader.readinto(b), 2)
784 self.assertEqual(b[:], foo[2:4])
785 self.assertEqual(reader.readinto(b), 2)
786 self.assertEqual(b[:], foo[4:6])
787
788 def test_readinto1(self):
789 cctx = zstd.ZstdCompressor()
790 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
791
792 reader = cctx.stream_reader(b'foo')
793 with self.assertRaises(Exception):
794 reader.readinto1(b'foobar')
795
796 b = bytearray(1024)
797 source = OpCountingBytesIO(b'foo')
798 reader = cctx.stream_reader(source)
799 self.assertEqual(reader.readinto1(b), len(foo))
800 self.assertEqual(b[0:len(foo)], foo)
801 self.assertEqual(source._read_count, 2)
802
803 # readinto1() with small reads.
804 b = bytearray(1024)
805 source = OpCountingBytesIO(b'foo')
806 reader = cctx.stream_reader(source, read_size=1)
807 self.assertEqual(reader.readinto1(b), len(foo))
808 self.assertEqual(b[0:len(foo)], foo)
809 self.assertEqual(source._read_count, 4)
810
811 def test_read1(self):
812 cctx = zstd.ZstdCompressor()
813 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
814
815 b = OpCountingBytesIO(b'foo')
816 reader = cctx.stream_reader(b)
817
818 self.assertEqual(reader.read1(), foo)
819 self.assertEqual(b._read_count, 2)
820
821 b = OpCountingBytesIO(b'foo')
822 reader = cctx.stream_reader(b)
823
824 self.assertEqual(reader.read1(0), b'')
825 self.assertEqual(reader.read1(2), foo[0:2])
826 self.assertEqual(b._read_count, 2)
827 self.assertEqual(reader.read1(2), foo[2:4])
828 self.assertEqual(reader.read1(1024), foo[4:])
829
749
830
750 @make_cffi
831 @make_cffi
751 class TestCompressor_stream_writer(unittest.TestCase):
832 class TestCompressor_stream_writer(unittest.TestCase):
833 def test_io_api(self):
834 buffer = io.BytesIO()
835 cctx = zstd.ZstdCompressor()
836 writer = cctx.stream_writer(buffer)
837
838 self.assertFalse(writer.isatty())
839 self.assertFalse(writer.readable())
840
841 with self.assertRaises(io.UnsupportedOperation):
842 writer.readline()
843
844 with self.assertRaises(io.UnsupportedOperation):
845 writer.readline(42)
846
847 with self.assertRaises(io.UnsupportedOperation):
848 writer.readline(size=42)
849
850 with self.assertRaises(io.UnsupportedOperation):
851 writer.readlines()
852
853 with self.assertRaises(io.UnsupportedOperation):
854 writer.readlines(42)
855
856 with self.assertRaises(io.UnsupportedOperation):
857 writer.readlines(hint=42)
858
859 with self.assertRaises(io.UnsupportedOperation):
860 writer.seek(0)
861
862 with self.assertRaises(io.UnsupportedOperation):
863 writer.seek(10, os.SEEK_SET)
864
865 self.assertFalse(writer.seekable())
866
867 with self.assertRaises(io.UnsupportedOperation):
868 writer.truncate()
869
870 with self.assertRaises(io.UnsupportedOperation):
871 writer.truncate(42)
872
873 with self.assertRaises(io.UnsupportedOperation):
874 writer.truncate(size=42)
875
876 self.assertTrue(writer.writable())
877
878 with self.assertRaises(NotImplementedError):
879 writer.writelines([])
880
881 with self.assertRaises(io.UnsupportedOperation):
882 writer.read()
883
884 with self.assertRaises(io.UnsupportedOperation):
885 writer.read(42)
886
887 with self.assertRaises(io.UnsupportedOperation):
888 writer.read(size=42)
889
890 with self.assertRaises(io.UnsupportedOperation):
891 writer.readall()
892
893 with self.assertRaises(io.UnsupportedOperation):
894 writer.readinto(None)
895
896 with self.assertRaises(io.UnsupportedOperation):
897 writer.fileno()
898
899 self.assertFalse(writer.closed)
900
901 def test_fileno_file(self):
902 with tempfile.TemporaryFile('wb') as tf:
903 cctx = zstd.ZstdCompressor()
904 writer = cctx.stream_writer(tf)
905
906 self.assertEqual(writer.fileno(), tf.fileno())
907
908 def test_close(self):
909 buffer = NonClosingBytesIO()
910 cctx = zstd.ZstdCompressor(level=1)
911 writer = cctx.stream_writer(buffer)
912
913 writer.write(b'foo' * 1024)
914 self.assertFalse(writer.closed)
915 self.assertFalse(buffer.closed)
916 writer.close()
917 self.assertTrue(writer.closed)
918 self.assertTrue(buffer.closed)
919
920 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
921 writer.write(b'foo')
922
923 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
924 writer.flush()
925
926 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
927 with writer:
928 pass
929
930 self.assertEqual(buffer.getvalue(),
931 b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f'
932 b'\x6f\x01\x00\xfa\xd3\x77\x43')
933
934 # Context manager exit should close stream.
935 buffer = io.BytesIO()
936 writer = cctx.stream_writer(buffer)
937
938 with writer:
939 writer.write(b'foo')
940
941 self.assertTrue(writer.closed)
942
752 def test_empty(self):
943 def test_empty(self):
753 buffer = io.BytesIO()
944 buffer = NonClosingBytesIO()
754 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
945 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
755 with cctx.stream_writer(buffer) as compressor:
946 with cctx.stream_writer(buffer) as compressor:
756 compressor.write(b'')
947 compressor.write(b'')
@@ -764,6 +955,25 b' class TestCompressor_stream_writer(unitt'
764 self.assertEqual(params.dict_id, 0)
955 self.assertEqual(params.dict_id, 0)
765 self.assertFalse(params.has_checksum)
956 self.assertFalse(params.has_checksum)
766
957
958 # Test without context manager.
959 buffer = io.BytesIO()
960 compressor = cctx.stream_writer(buffer)
961 self.assertEqual(compressor.write(b''), 0)
962 self.assertEqual(buffer.getvalue(), b'')
963 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9)
964 result = buffer.getvalue()
965 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
966
967 params = zstd.get_frame_parameters(result)
968 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
969 self.assertEqual(params.window_size, 524288)
970 self.assertEqual(params.dict_id, 0)
971 self.assertFalse(params.has_checksum)
972
973 # Test write_return_read=True
974 compressor = cctx.stream_writer(buffer, write_return_read=True)
975 self.assertEqual(compressor.write(b''), 0)
976
767 def test_input_types(self):
977 def test_input_types(self):
768 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
978 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
769 cctx = zstd.ZstdCompressor(level=1)
979 cctx = zstd.ZstdCompressor(level=1)
@@ -778,14 +988,17 b' class TestCompressor_stream_writer(unitt'
778 ]
988 ]
779
989
780 for source in sources:
990 for source in sources:
781 buffer = io.BytesIO()
991 buffer = NonClosingBytesIO()
782 with cctx.stream_writer(buffer) as compressor:
992 with cctx.stream_writer(buffer) as compressor:
783 compressor.write(source)
993 compressor.write(source)
784
994
785 self.assertEqual(buffer.getvalue(), expected)
995 self.assertEqual(buffer.getvalue(), expected)
786
996
997 compressor = cctx.stream_writer(buffer, write_return_read=True)
998 self.assertEqual(compressor.write(source), len(source))
999
787 def test_multiple_compress(self):
1000 def test_multiple_compress(self):
788 buffer = io.BytesIO()
1001 buffer = NonClosingBytesIO()
789 cctx = zstd.ZstdCompressor(level=5)
1002 cctx = zstd.ZstdCompressor(level=5)
790 with cctx.stream_writer(buffer) as compressor:
1003 with cctx.stream_writer(buffer) as compressor:
791 self.assertEqual(compressor.write(b'foo'), 0)
1004 self.assertEqual(compressor.write(b'foo'), 0)
@@ -794,9 +1007,27 b' class TestCompressor_stream_writer(unitt'
794
1007
795 result = buffer.getvalue()
1008 result = buffer.getvalue()
796 self.assertEqual(result,
1009 self.assertEqual(result,
797 b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f'
1010 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
798 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1011 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
799
1012
1013 # Test without context manager.
1014 buffer = io.BytesIO()
1015 compressor = cctx.stream_writer(buffer)
1016 self.assertEqual(compressor.write(b'foo'), 0)
1017 self.assertEqual(compressor.write(b'bar'), 0)
1018 self.assertEqual(compressor.write(b'x' * 8192), 0)
1019 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1020 result = buffer.getvalue()
1021 self.assertEqual(result,
1022 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1023 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1024
1025 # Test with write_return_read=True.
1026 compressor = cctx.stream_writer(buffer, write_return_read=True)
1027 self.assertEqual(compressor.write(b'foo'), 3)
1028 self.assertEqual(compressor.write(b'barbiz'), 6)
1029 self.assertEqual(compressor.write(b'x' * 8192), 8192)
1030
800 def test_dictionary(self):
1031 def test_dictionary(self):
801 samples = []
1032 samples = []
802 for i in range(128):
1033 for i in range(128):
@@ -807,9 +1038,9 b' class TestCompressor_stream_writer(unitt'
807 d = zstd.train_dictionary(8192, samples)
1038 d = zstd.train_dictionary(8192, samples)
808
1039
809 h = hashlib.sha1(d.as_bytes()).hexdigest()
1040 h = hashlib.sha1(d.as_bytes()).hexdigest()
810 self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79')
1041 self.assertEqual(h, '88ca0d38332aff379d4ced166a51c280a7679aad')
811
1042
812 buffer = io.BytesIO()
1043 buffer = NonClosingBytesIO()
813 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
1044 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
814 with cctx.stream_writer(buffer) as compressor:
1045 with cctx.stream_writer(buffer) as compressor:
815 self.assertEqual(compressor.write(b'foo'), 0)
1046 self.assertEqual(compressor.write(b'foo'), 0)
@@ -825,7 +1056,7 b' class TestCompressor_stream_writer(unitt'
825 self.assertFalse(params.has_checksum)
1056 self.assertFalse(params.has_checksum)
826
1057
827 h = hashlib.sha1(compressed).hexdigest()
1058 h = hashlib.sha1(compressed).hexdigest()
828 self.assertEqual(h, '23f88344263678478f5f82298e0a5d1833125786')
1059 self.assertEqual(h, '8703b4316f274d26697ea5dd480f29c08e85d940')
829
1060
830 source = b'foo' + b'bar' + (b'foo' * 16384)
1061 source = b'foo' + b'bar' + (b'foo' * 16384)
831
1062
@@ -842,9 +1073,9 b' class TestCompressor_stream_writer(unitt'
842 min_match=5,
1073 min_match=5,
843 search_log=4,
1074 search_log=4,
844 target_length=10,
1075 target_length=10,
845 compression_strategy=zstd.STRATEGY_FAST)
1076 strategy=zstd.STRATEGY_FAST)
846
1077
847 buffer = io.BytesIO()
1078 buffer = NonClosingBytesIO()
848 cctx = zstd.ZstdCompressor(compression_params=params)
1079 cctx = zstd.ZstdCompressor(compression_params=params)
849 with cctx.stream_writer(buffer) as compressor:
1080 with cctx.stream_writer(buffer) as compressor:
850 self.assertEqual(compressor.write(b'foo'), 0)
1081 self.assertEqual(compressor.write(b'foo'), 0)
@@ -863,12 +1094,12 b' class TestCompressor_stream_writer(unitt'
863 self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef')
1094 self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef')
864
1095
865 def test_write_checksum(self):
1096 def test_write_checksum(self):
866 no_checksum = io.BytesIO()
1097 no_checksum = NonClosingBytesIO()
867 cctx = zstd.ZstdCompressor(level=1)
1098 cctx = zstd.ZstdCompressor(level=1)
868 with cctx.stream_writer(no_checksum) as compressor:
1099 with cctx.stream_writer(no_checksum) as compressor:
869 self.assertEqual(compressor.write(b'foobar'), 0)
1100 self.assertEqual(compressor.write(b'foobar'), 0)
870
1101
871 with_checksum = io.BytesIO()
1102 with_checksum = NonClosingBytesIO()
872 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
1103 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
873 with cctx.stream_writer(with_checksum) as compressor:
1104 with cctx.stream_writer(with_checksum) as compressor:
874 self.assertEqual(compressor.write(b'foobar'), 0)
1105 self.assertEqual(compressor.write(b'foobar'), 0)
@@ -886,12 +1117,12 b' class TestCompressor_stream_writer(unitt'
886 len(no_checksum.getvalue()) + 4)
1117 len(no_checksum.getvalue()) + 4)
887
1118
888 def test_write_content_size(self):
1119 def test_write_content_size(self):
889 no_size = io.BytesIO()
1120 no_size = NonClosingBytesIO()
890 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1121 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
891 with cctx.stream_writer(no_size) as compressor:
1122 with cctx.stream_writer(no_size) as compressor:
892 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1123 self.assertEqual(compressor.write(b'foobar' * 256), 0)
893
1124
894 with_size = io.BytesIO()
1125 with_size = NonClosingBytesIO()
895 cctx = zstd.ZstdCompressor(level=1)
1126 cctx = zstd.ZstdCompressor(level=1)
896 with cctx.stream_writer(with_size) as compressor:
1127 with cctx.stream_writer(with_size) as compressor:
897 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1128 self.assertEqual(compressor.write(b'foobar' * 256), 0)
@@ -902,7 +1133,7 b' class TestCompressor_stream_writer(unitt'
902 len(no_size.getvalue()))
1133 len(no_size.getvalue()))
903
1134
904 # Declaring size will write the header.
1135 # Declaring size will write the header.
905 with_size = io.BytesIO()
1136 with_size = NonClosingBytesIO()
906 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
1137 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
907 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1138 self.assertEqual(compressor.write(b'foobar' * 256), 0)
908
1139
@@ -927,7 +1158,7 b' class TestCompressor_stream_writer(unitt'
927
1158
928 d = zstd.train_dictionary(1024, samples)
1159 d = zstd.train_dictionary(1024, samples)
929
1160
930 with_dict_id = io.BytesIO()
1161 with_dict_id = NonClosingBytesIO()
931 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
1162 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
932 with cctx.stream_writer(with_dict_id) as compressor:
1163 with cctx.stream_writer(with_dict_id) as compressor:
933 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1164 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
@@ -935,7 +1166,7 b' class TestCompressor_stream_writer(unitt'
935 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
1166 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
936
1167
937 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
1168 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
938 no_dict_id = io.BytesIO()
1169 no_dict_id = NonClosingBytesIO()
939 with cctx.stream_writer(no_dict_id) as compressor:
1170 with cctx.stream_writer(no_dict_id) as compressor:
940 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1171 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
941
1172
@@ -1009,8 +1240,32 b' class TestCompressor_stream_writer(unitt'
1009 header = trailing[0:3]
1240 header = trailing[0:3]
1010 self.assertEqual(header, b'\x01\x00\x00')
1241 self.assertEqual(header, b'\x01\x00\x00')
1011
1242
1243 def test_flush_frame(self):
1244 cctx = zstd.ZstdCompressor(level=3)
1245 dest = OpCountingBytesIO()
1246
1247 with cctx.stream_writer(dest) as compressor:
1248 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1249 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1250 compressor.write(b'biz' * 16384)
1251
1252 self.assertEqual(dest.getvalue(),
1253 # Frame 1.
1254 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f'
1255 b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08'
1256 # Frame 2.
1257 b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a'
1258 b'\x01\x00\xfa\x3f\x75\x37\x04')
1259
1260 def test_bad_flush_mode(self):
1261 cctx = zstd.ZstdCompressor()
1262 dest = io.BytesIO()
1263 with cctx.stream_writer(dest) as compressor:
1264 with self.assertRaisesRegexp(ValueError, 'unknown flush_mode: 42'):
1265 compressor.flush(flush_mode=42)
1266
1012 def test_multithreaded(self):
1267 def test_multithreaded(self):
1013 dest = io.BytesIO()
1268 dest = NonClosingBytesIO()
1014 cctx = zstd.ZstdCompressor(threads=2)
1269 cctx = zstd.ZstdCompressor(threads=2)
1015 with cctx.stream_writer(dest) as compressor:
1270 with cctx.stream_writer(dest) as compressor:
1016 compressor.write(b'a' * 1048576)
1271 compressor.write(b'a' * 1048576)
@@ -1043,22 +1298,21 b' class TestCompressor_stream_writer(unitt'
1043 pass
1298 pass
1044
1299
1045 def test_tarfile_compat(self):
1300 def test_tarfile_compat(self):
1046 raise unittest.SkipTest('not yet fully working')
1301 dest = NonClosingBytesIO()
1047
1048 dest = io.BytesIO()
1049 cctx = zstd.ZstdCompressor()
1302 cctx = zstd.ZstdCompressor()
1050 with cctx.stream_writer(dest) as compressor:
1303 with cctx.stream_writer(dest) as compressor:
1051 with tarfile.open('tf', mode='w', fileobj=compressor) as tf:
1304 with tarfile.open('tf', mode='w|', fileobj=compressor) as tf:
1052 tf.add(__file__, 'test_compressor.py')
1305 tf.add(__file__, 'test_compressor.py')
1053
1306
1054 dest.seek(0)
1307 dest = io.BytesIO(dest.getvalue())
1055
1308
1056 dctx = zstd.ZstdDecompressor()
1309 dctx = zstd.ZstdDecompressor()
1057 with dctx.stream_reader(dest) as reader:
1310 with dctx.stream_reader(dest) as reader:
1058 with tarfile.open(mode='r:', fileobj=reader) as tf:
1311 with tarfile.open(mode='r|', fileobj=reader) as tf:
1059 for member in tf:
1312 for member in tf:
1060 self.assertEqual(member.name, 'test_compressor.py')
1313 self.assertEqual(member.name, 'test_compressor.py')
1061
1314
1315
1062 @make_cffi
1316 @make_cffi
1063 class TestCompressor_read_to_iter(unittest.TestCase):
1317 class TestCompressor_read_to_iter(unittest.TestCase):
1064 def test_type_validation(self):
1318 def test_type_validation(self):
@@ -1192,7 +1446,7 b' class TestCompressor_chunker(unittest.Te'
1192
1446
1193 it = chunker.finish()
1447 it = chunker.finish()
1194
1448
1195 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x50\x01\x00\x00')
1449 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00')
1196
1450
1197 with self.assertRaises(StopIteration):
1451 with self.assertRaises(StopIteration):
1198 next(it)
1452 next(it)
@@ -1214,7 +1468,7 b' class TestCompressor_chunker(unittest.Te'
1214 it = chunker.finish()
1468 it = chunker.finish()
1215
1469
1216 self.assertEqual(next(it),
1470 self.assertEqual(next(it),
1217 b'\x28\xb5\x2f\xfd\x00\x50\x7d\x00\x00\x48\x66\x6f'
1471 b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f'
1218 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1472 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1219
1473
1220 with self.assertRaises(StopIteration):
1474 with self.assertRaises(StopIteration):
@@ -1258,7 +1512,7 b' class TestCompressor_chunker(unittest.Te'
1258
1512
1259 self.assertEqual(
1513 self.assertEqual(
1260 b''.join(chunks),
1514 b''.join(chunks),
1261 b'\x28\xb5\x2f\xfd\x00\x50\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1515 b'\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1262 b'\xfa\xd3\x77\x43')
1516 b'\xfa\xd3\x77\x43')
1263
1517
1264 dctx = zstd.ZstdDecompressor()
1518 dctx = zstd.ZstdDecompressor()
@@ -1283,7 +1537,7 b' class TestCompressor_chunker(unittest.Te'
1283
1537
1284 self.assertEqual(list(chunker.compress(source)), [])
1538 self.assertEqual(list(chunker.compress(source)), [])
1285 self.assertEqual(list(chunker.finish()), [
1539 self.assertEqual(list(chunker.finish()), [
1286 b'\x28\xb5\x2f\xfd\x00\x50\x19\x00\x00\x66\x6f\x6f'
1540 b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f'
1287 ])
1541 ])
1288
1542
1289 def test_flush(self):
1543 def test_flush(self):
@@ -1296,7 +1550,7 b' class TestCompressor_chunker(unittest.Te'
1296 chunks1 = list(chunker.flush())
1550 chunks1 = list(chunker.flush())
1297
1551
1298 self.assertEqual(chunks1, [
1552 self.assertEqual(chunks1, [
1299 b'\x28\xb5\x2f\xfd\x00\x50\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1553 b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1300 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1554 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1301 ])
1555 ])
1302
1556
@@ -1326,7 +1580,7 b' class TestCompressor_chunker(unittest.Te'
1326
1580
1327 with self.assertRaisesRegexp(
1581 with self.assertRaisesRegexp(
1328 zstd.ZstdError,
1582 zstd.ZstdError,
1329 'cannot call compress\(\) after compression finished'):
1583 r'cannot call compress\(\) after compression finished'):
1330 list(chunker.compress(b'foo'))
1584 list(chunker.compress(b'foo'))
1331
1585
1332 def test_flush_after_finish(self):
1586 def test_flush_after_finish(self):
@@ -1338,7 +1592,7 b' class TestCompressor_chunker(unittest.Te'
1338
1592
1339 with self.assertRaisesRegexp(
1593 with self.assertRaisesRegexp(
1340 zstd.ZstdError,
1594 zstd.ZstdError,
1341 'cannot call flush\(\) after compression finished'):
1595 r'cannot call flush\(\) after compression finished'):
1342 list(chunker.flush())
1596 list(chunker.flush())
1343
1597
1344 def test_finish_after_finish(self):
1598 def test_finish_after_finish(self):
@@ -1350,7 +1604,7 b' class TestCompressor_chunker(unittest.Te'
1350
1604
1351 with self.assertRaisesRegexp(
1605 with self.assertRaisesRegexp(
1352 zstd.ZstdError,
1606 zstd.ZstdError,
1353 'cannot call finish\(\) after compression finished'):
1607 r'cannot call finish\(\) after compression finished'):
1354 list(chunker.finish())
1608 list(chunker.finish())
1355
1609
1356
1610
@@ -1358,6 +1612,9 b' class TestCompressor_multi_compress_to_b'
1358 def test_invalid_inputs(self):
1612 def test_invalid_inputs(self):
1359 cctx = zstd.ZstdCompressor()
1613 cctx = zstd.ZstdCompressor()
1360
1614
1615 if not hasattr(cctx, 'multi_compress_to_buffer'):
1616 self.skipTest('multi_compress_to_buffer not available')
1617
1361 with self.assertRaises(TypeError):
1618 with self.assertRaises(TypeError):
1362 cctx.multi_compress_to_buffer(True)
1619 cctx.multi_compress_to_buffer(True)
1363
1620
@@ -1370,6 +1627,9 b' class TestCompressor_multi_compress_to_b'
1370 def test_empty_input(self):
1627 def test_empty_input(self):
1371 cctx = zstd.ZstdCompressor()
1628 cctx = zstd.ZstdCompressor()
1372
1629
1630 if not hasattr(cctx, 'multi_compress_to_buffer'):
1631 self.skipTest('multi_compress_to_buffer not available')
1632
1373 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
1633 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
1374 cctx.multi_compress_to_buffer([])
1634 cctx.multi_compress_to_buffer([])
1375
1635
@@ -1379,6 +1639,9 b' class TestCompressor_multi_compress_to_b'
1379 def test_list_input(self):
1639 def test_list_input(self):
1380 cctx = zstd.ZstdCompressor(write_checksum=True)
1640 cctx = zstd.ZstdCompressor(write_checksum=True)
1381
1641
1642 if not hasattr(cctx, 'multi_compress_to_buffer'):
1643 self.skipTest('multi_compress_to_buffer not available')
1644
1382 original = [b'foo' * 12, b'bar' * 6]
1645 original = [b'foo' * 12, b'bar' * 6]
1383 frames = [cctx.compress(c) for c in original]
1646 frames = [cctx.compress(c) for c in original]
1384 b = cctx.multi_compress_to_buffer(original)
1647 b = cctx.multi_compress_to_buffer(original)
@@ -1394,6 +1657,9 b' class TestCompressor_multi_compress_to_b'
1394 def test_buffer_with_segments_input(self):
1657 def test_buffer_with_segments_input(self):
1395 cctx = zstd.ZstdCompressor(write_checksum=True)
1658 cctx = zstd.ZstdCompressor(write_checksum=True)
1396
1659
1660 if not hasattr(cctx, 'multi_compress_to_buffer'):
1661 self.skipTest('multi_compress_to_buffer not available')
1662
1397 original = [b'foo' * 4, b'bar' * 6]
1663 original = [b'foo' * 4, b'bar' * 6]
1398 frames = [cctx.compress(c) for c in original]
1664 frames = [cctx.compress(c) for c in original]
1399
1665
@@ -1412,6 +1678,9 b' class TestCompressor_multi_compress_to_b'
1412 def test_buffer_with_segments_collection_input(self):
1678 def test_buffer_with_segments_collection_input(self):
1413 cctx = zstd.ZstdCompressor(write_checksum=True)
1679 cctx = zstd.ZstdCompressor(write_checksum=True)
1414
1680
1681 if not hasattr(cctx, 'multi_compress_to_buffer'):
1682 self.skipTest('multi_compress_to_buffer not available')
1683
1415 original = [
1684 original = [
1416 b'foo1',
1685 b'foo1',
1417 b'foo2' * 2,
1686 b'foo2' * 2,
@@ -1449,6 +1718,9 b' class TestCompressor_multi_compress_to_b'
1449
1718
1450 cctx = zstd.ZstdCompressor(write_checksum=True)
1719 cctx = zstd.ZstdCompressor(write_checksum=True)
1451
1720
1721 if not hasattr(cctx, 'multi_compress_to_buffer'):
1722 self.skipTest('multi_compress_to_buffer not available')
1723
1452 frames = []
1724 frames = []
1453 frames.extend(b'x' * 64 for i in range(256))
1725 frames.extend(b'x' * 64 for i in range(256))
1454 frames.extend(b'y' * 64 for i in range(256))
1726 frames.extend(b'y' * 64 for i in range(256))
@@ -12,6 +12,7 b' import zstandard as zstd'
12
12
13 from . common import (
13 from . common import (
14 make_cffi,
14 make_cffi,
15 NonClosingBytesIO,
15 random_input_data,
16 random_input_data,
16 )
17 )
17
18
@@ -19,6 +20,62 b' from . common import ('
19 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
20 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
20 @make_cffi
21 @make_cffi
21 class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
22 class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
23 @hypothesis.settings(
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
26 level=strategies.integers(min_value=1, max_value=5),
27 source_read_size=strategies.integers(1, 16384),
28 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
29 def test_stream_source_read(self, original, level, source_read_size,
30 read_size):
31 if read_size == 0:
32 read_size = -1
33
34 refctx = zstd.ZstdCompressor(level=level)
35 ref_frame = refctx.compress(original)
36
37 cctx = zstd.ZstdCompressor(level=level)
38 with cctx.stream_reader(io.BytesIO(original), size=len(original),
39 read_size=source_read_size) as reader:
40 chunks = []
41 while True:
42 chunk = reader.read(read_size)
43 if not chunk:
44 break
45
46 chunks.append(chunk)
47
48 self.assertEqual(b''.join(chunks), ref_frame)
49
50 @hypothesis.settings(
51 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
52 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
53 level=strategies.integers(min_value=1, max_value=5),
54 source_read_size=strategies.integers(1, 16384),
55 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
56 def test_buffer_source_read(self, original, level, source_read_size,
57 read_size):
58 if read_size == 0:
59 read_size = -1
60
61 refctx = zstd.ZstdCompressor(level=level)
62 ref_frame = refctx.compress(original)
63
64 cctx = zstd.ZstdCompressor(level=level)
65 with cctx.stream_reader(original, size=len(original),
66 read_size=source_read_size) as reader:
67 chunks = []
68 while True:
69 chunk = reader.read(read_size)
70 if not chunk:
71 break
72
73 chunks.append(chunk)
74
75 self.assertEqual(b''.join(chunks), ref_frame)
76
77 @hypothesis.settings(
78 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
22 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
79 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
23 level=strategies.integers(min_value=1, max_value=5),
80 level=strategies.integers(min_value=1, max_value=5),
24 source_read_size=strategies.integers(1, 16384),
81 source_read_size=strategies.integers(1, 16384),
@@ -33,15 +90,17 b' class TestCompressor_stream_reader_fuzzi'
33 read_size=source_read_size) as reader:
90 read_size=source_read_size) as reader:
34 chunks = []
91 chunks = []
35 while True:
92 while True:
36 read_size = read_sizes.draw(strategies.integers(1, 16384))
93 read_size = read_sizes.draw(strategies.integers(-1, 16384))
37 chunk = reader.read(read_size)
94 chunk = reader.read(read_size)
95 if not chunk and read_size:
96 break
38
97
39 if not chunk:
40 break
41 chunks.append(chunk)
98 chunks.append(chunk)
42
99
43 self.assertEqual(b''.join(chunks), ref_frame)
100 self.assertEqual(b''.join(chunks), ref_frame)
44
101
102 @hypothesis.settings(
103 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
45 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
104 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
46 level=strategies.integers(min_value=1, max_value=5),
105 level=strategies.integers(min_value=1, max_value=5),
47 source_read_size=strategies.integers(1, 16384),
106 source_read_size=strategies.integers(1, 16384),
@@ -57,14 +116,343 b' class TestCompressor_stream_reader_fuzzi'
57 read_size=source_read_size) as reader:
116 read_size=source_read_size) as reader:
58 chunks = []
117 chunks = []
59 while True:
118 while True:
119 read_size = read_sizes.draw(strategies.integers(-1, 16384))
120 chunk = reader.read(read_size)
121 if not chunk and read_size:
122 break
123
124 chunks.append(chunk)
125
126 self.assertEqual(b''.join(chunks), ref_frame)
127
128 @hypothesis.settings(
129 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
130 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
131 level=strategies.integers(min_value=1, max_value=5),
132 source_read_size=strategies.integers(1, 16384),
133 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
134 def test_stream_source_readinto(self, original, level,
135 source_read_size, read_size):
136 refctx = zstd.ZstdCompressor(level=level)
137 ref_frame = refctx.compress(original)
138
139 cctx = zstd.ZstdCompressor(level=level)
140 with cctx.stream_reader(io.BytesIO(original), size=len(original),
141 read_size=source_read_size) as reader:
142 chunks = []
143 while True:
144 b = bytearray(read_size)
145 count = reader.readinto(b)
146
147 if not count:
148 break
149
150 chunks.append(bytes(b[0:count]))
151
152 self.assertEqual(b''.join(chunks), ref_frame)
153
154 @hypothesis.settings(
155 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
156 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
157 level=strategies.integers(min_value=1, max_value=5),
158 source_read_size=strategies.integers(1, 16384),
159 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
160 def test_buffer_source_readinto(self, original, level,
161 source_read_size, read_size):
162
163 refctx = zstd.ZstdCompressor(level=level)
164 ref_frame = refctx.compress(original)
165
166 cctx = zstd.ZstdCompressor(level=level)
167 with cctx.stream_reader(original, size=len(original),
168 read_size=source_read_size) as reader:
169 chunks = []
170 while True:
171 b = bytearray(read_size)
172 count = reader.readinto(b)
173
174 if not count:
175 break
176
177 chunks.append(bytes(b[0:count]))
178
179 self.assertEqual(b''.join(chunks), ref_frame)
180
181 @hypothesis.settings(
182 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
183 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
184 level=strategies.integers(min_value=1, max_value=5),
185 source_read_size=strategies.integers(1, 16384),
186 read_sizes=strategies.data())
187 def test_stream_source_readinto_variance(self, original, level,
188 source_read_size, read_sizes):
189 refctx = zstd.ZstdCompressor(level=level)
190 ref_frame = refctx.compress(original)
191
192 cctx = zstd.ZstdCompressor(level=level)
193 with cctx.stream_reader(io.BytesIO(original), size=len(original),
194 read_size=source_read_size) as reader:
195 chunks = []
196 while True:
60 read_size = read_sizes.draw(strategies.integers(1, 16384))
197 read_size = read_sizes.draw(strategies.integers(1, 16384))
61 chunk = reader.read(read_size)
198 b = bytearray(read_size)
199 count = reader.readinto(b)
200
201 if not count:
202 break
203
204 chunks.append(bytes(b[0:count]))
205
206 self.assertEqual(b''.join(chunks), ref_frame)
207
208 @hypothesis.settings(
209 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
210 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
211 level=strategies.integers(min_value=1, max_value=5),
212 source_read_size=strategies.integers(1, 16384),
213 read_sizes=strategies.data())
214 def test_buffer_source_readinto_variance(self, original, level,
215 source_read_size, read_sizes):
216
217 refctx = zstd.ZstdCompressor(level=level)
218 ref_frame = refctx.compress(original)
219
220 cctx = zstd.ZstdCompressor(level=level)
221 with cctx.stream_reader(original, size=len(original),
222 read_size=source_read_size) as reader:
223 chunks = []
224 while True:
225 read_size = read_sizes.draw(strategies.integers(1, 16384))
226 b = bytearray(read_size)
227 count = reader.readinto(b)
228
229 if not count:
230 break
231
232 chunks.append(bytes(b[0:count]))
233
234 self.assertEqual(b''.join(chunks), ref_frame)
235
236 @hypothesis.settings(
237 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
238 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
239 level=strategies.integers(min_value=1, max_value=5),
240 source_read_size=strategies.integers(1, 16384),
241 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
242 def test_stream_source_read1(self, original, level, source_read_size,
243 read_size):
244 if read_size == 0:
245 read_size = -1
246
247 refctx = zstd.ZstdCompressor(level=level)
248 ref_frame = refctx.compress(original)
249
250 cctx = zstd.ZstdCompressor(level=level)
251 with cctx.stream_reader(io.BytesIO(original), size=len(original),
252 read_size=source_read_size) as reader:
253 chunks = []
254 while True:
255 chunk = reader.read1(read_size)
62 if not chunk:
256 if not chunk:
63 break
257 break
258
64 chunks.append(chunk)
259 chunks.append(chunk)
65
260
66 self.assertEqual(b''.join(chunks), ref_frame)
261 self.assertEqual(b''.join(chunks), ref_frame)
67
262
263 @hypothesis.settings(
264 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
265 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
266 level=strategies.integers(min_value=1, max_value=5),
267 source_read_size=strategies.integers(1, 16384),
268 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
269 def test_buffer_source_read1(self, original, level, source_read_size,
270 read_size):
271 if read_size == 0:
272 read_size = -1
273
274 refctx = zstd.ZstdCompressor(level=level)
275 ref_frame = refctx.compress(original)
276
277 cctx = zstd.ZstdCompressor(level=level)
278 with cctx.stream_reader(original, size=len(original),
279 read_size=source_read_size) as reader:
280 chunks = []
281 while True:
282 chunk = reader.read1(read_size)
283 if not chunk:
284 break
285
286 chunks.append(chunk)
287
288 self.assertEqual(b''.join(chunks), ref_frame)
289
290 @hypothesis.settings(
291 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
292 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
293 level=strategies.integers(min_value=1, max_value=5),
294 source_read_size=strategies.integers(1, 16384),
295 read_sizes=strategies.data())
296 def test_stream_source_read1_variance(self, original, level, source_read_size,
297 read_sizes):
298 refctx = zstd.ZstdCompressor(level=level)
299 ref_frame = refctx.compress(original)
300
301 cctx = zstd.ZstdCompressor(level=level)
302 with cctx.stream_reader(io.BytesIO(original), size=len(original),
303 read_size=source_read_size) as reader:
304 chunks = []
305 while True:
306 read_size = read_sizes.draw(strategies.integers(-1, 16384))
307 chunk = reader.read1(read_size)
308 if not chunk and read_size:
309 break
310
311 chunks.append(chunk)
312
313 self.assertEqual(b''.join(chunks), ref_frame)
314
315 @hypothesis.settings(
316 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
317 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
318 level=strategies.integers(min_value=1, max_value=5),
319 source_read_size=strategies.integers(1, 16384),
320 read_sizes=strategies.data())
321 def test_buffer_source_read1_variance(self, original, level, source_read_size,
322 read_sizes):
323
324 refctx = zstd.ZstdCompressor(level=level)
325 ref_frame = refctx.compress(original)
326
327 cctx = zstd.ZstdCompressor(level=level)
328 with cctx.stream_reader(original, size=len(original),
329 read_size=source_read_size) as reader:
330 chunks = []
331 while True:
332 read_size = read_sizes.draw(strategies.integers(-1, 16384))
333 chunk = reader.read1(read_size)
334 if not chunk and read_size:
335 break
336
337 chunks.append(chunk)
338
339 self.assertEqual(b''.join(chunks), ref_frame)
340
341
342 @hypothesis.settings(
343 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
344 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
345 level=strategies.integers(min_value=1, max_value=5),
346 source_read_size=strategies.integers(1, 16384),
347 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
348 def test_stream_source_readinto1(self, original, level, source_read_size,
349 read_size):
350 if read_size == 0:
351 read_size = -1
352
353 refctx = zstd.ZstdCompressor(level=level)
354 ref_frame = refctx.compress(original)
355
356 cctx = zstd.ZstdCompressor(level=level)
357 with cctx.stream_reader(io.BytesIO(original), size=len(original),
358 read_size=source_read_size) as reader:
359 chunks = []
360 while True:
361 b = bytearray(read_size)
362 count = reader.readinto1(b)
363
364 if not count:
365 break
366
367 chunks.append(bytes(b[0:count]))
368
369 self.assertEqual(b''.join(chunks), ref_frame)
370
371 @hypothesis.settings(
372 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
373 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
374 level=strategies.integers(min_value=1, max_value=5),
375 source_read_size=strategies.integers(1, 16384),
376 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
377 def test_buffer_source_readinto1(self, original, level, source_read_size,
378 read_size):
379 if read_size == 0:
380 read_size = -1
381
382 refctx = zstd.ZstdCompressor(level=level)
383 ref_frame = refctx.compress(original)
384
385 cctx = zstd.ZstdCompressor(level=level)
386 with cctx.stream_reader(original, size=len(original),
387 read_size=source_read_size) as reader:
388 chunks = []
389 while True:
390 b = bytearray(read_size)
391 count = reader.readinto1(b)
392
393 if not count:
394 break
395
396 chunks.append(bytes(b[0:count]))
397
398 self.assertEqual(b''.join(chunks), ref_frame)
399
400 @hypothesis.settings(
401 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
402 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
403 level=strategies.integers(min_value=1, max_value=5),
404 source_read_size=strategies.integers(1, 16384),
405 read_sizes=strategies.data())
406 def test_stream_source_readinto1_variance(self, original, level, source_read_size,
407 read_sizes):
408 refctx = zstd.ZstdCompressor(level=level)
409 ref_frame = refctx.compress(original)
410
411 cctx = zstd.ZstdCompressor(level=level)
412 with cctx.stream_reader(io.BytesIO(original), size=len(original),
413 read_size=source_read_size) as reader:
414 chunks = []
415 while True:
416 read_size = read_sizes.draw(strategies.integers(1, 16384))
417 b = bytearray(read_size)
418 count = reader.readinto1(b)
419
420 if not count:
421 break
422
423 chunks.append(bytes(b[0:count]))
424
425 self.assertEqual(b''.join(chunks), ref_frame)
426
427 @hypothesis.settings(
428 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
429 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
430 level=strategies.integers(min_value=1, max_value=5),
431 source_read_size=strategies.integers(1, 16384),
432 read_sizes=strategies.data())
433 def test_buffer_source_readinto1_variance(self, original, level, source_read_size,
434 read_sizes):
435
436 refctx = zstd.ZstdCompressor(level=level)
437 ref_frame = refctx.compress(original)
438
439 cctx = zstd.ZstdCompressor(level=level)
440 with cctx.stream_reader(original, size=len(original),
441 read_size=source_read_size) as reader:
442 chunks = []
443 while True:
444 read_size = read_sizes.draw(strategies.integers(1, 16384))
445 b = bytearray(read_size)
446 count = reader.readinto1(b)
447
448 if not count:
449 break
450
451 chunks.append(bytes(b[0:count]))
452
453 self.assertEqual(b''.join(chunks), ref_frame)
454
455
68
456
69 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
457 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
70 @make_cffi
458 @make_cffi
@@ -77,7 +465,7 b' class TestCompressor_stream_writer_fuzzi'
77 ref_frame = refctx.compress(original)
465 ref_frame = refctx.compress(original)
78
466
79 cctx = zstd.ZstdCompressor(level=level)
467 cctx = zstd.ZstdCompressor(level=level)
80 b = io.BytesIO()
468 b = NonClosingBytesIO()
81 with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
469 with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
82 compressor.write(original)
470 compressor.write(original)
83
471
@@ -219,6 +607,9 b' class TestCompressor_multi_compress_to_b'
219 write_checksum=True,
607 write_checksum=True,
220 **kwargs)
608 **kwargs)
221
609
610 if not hasattr(cctx, 'multi_compress_to_buffer'):
611 self.skipTest('multi_compress_to_buffer not available')
612
222 result = cctx.multi_compress_to_buffer(original, threads=-1)
613 result = cctx.multi_compress_to_buffer(original, threads=-1)
223
614
224 self.assertEqual(len(result), len(original))
615 self.assertEqual(len(result), len(original))
@@ -15,17 +15,17 b' class TestCompressionParameters(unittest'
15 chain_log=zstd.CHAINLOG_MIN,
15 chain_log=zstd.CHAINLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN,
18 min_match=zstd.SEARCHLENGTH_MIN + 1,
18 min_match=zstd.MINMATCH_MIN + 1,
19 target_length=zstd.TARGETLENGTH_MIN,
19 target_length=zstd.TARGETLENGTH_MIN,
20 compression_strategy=zstd.STRATEGY_FAST)
20 strategy=zstd.STRATEGY_FAST)
21
21
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
23 chain_log=zstd.CHAINLOG_MAX,
23 chain_log=zstd.CHAINLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
26 min_match=zstd.SEARCHLENGTH_MAX - 1,
26 min_match=zstd.MINMATCH_MAX - 1,
27 target_length=zstd.TARGETLENGTH_MAX,
27 target_length=zstd.TARGETLENGTH_MAX,
28 compression_strategy=zstd.STRATEGY_BTULTRA)
28 strategy=zstd.STRATEGY_BTULTRA2)
29
29
30 def test_from_level(self):
30 def test_from_level(self):
31 p = zstd.ZstdCompressionParameters.from_level(1)
31 p = zstd.ZstdCompressionParameters.from_level(1)
@@ -43,7 +43,7 b' class TestCompressionParameters(unittest'
43 search_log=4,
43 search_log=4,
44 min_match=5,
44 min_match=5,
45 target_length=8,
45 target_length=8,
46 compression_strategy=1)
46 strategy=1)
47 self.assertEqual(p.window_log, 10)
47 self.assertEqual(p.window_log, 10)
48 self.assertEqual(p.chain_log, 6)
48 self.assertEqual(p.chain_log, 6)
49 self.assertEqual(p.hash_log, 7)
49 self.assertEqual(p.hash_log, 7)
@@ -59,9 +59,10 b' class TestCompressionParameters(unittest'
59 self.assertEqual(p.threads, 4)
59 self.assertEqual(p.threads, 4)
60
60
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
62 overlap_size_log=6)
62 overlap_log=6)
63 self.assertEqual(p.threads, 2)
63 self.assertEqual(p.threads, 2)
64 self.assertEqual(p.job_size, 1048576)
64 self.assertEqual(p.job_size, 1048576)
65 self.assertEqual(p.overlap_log, 6)
65 self.assertEqual(p.overlap_size_log, 6)
66 self.assertEqual(p.overlap_size_log, 6)
66
67
67 p = zstd.ZstdCompressionParameters(compression_level=-1)
68 p = zstd.ZstdCompressionParameters(compression_level=-1)
@@ -85,8 +86,9 b' class TestCompressionParameters(unittest'
85 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
86 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
86 self.assertEqual(p.ldm_bucket_size_log, 7)
87 self.assertEqual(p.ldm_bucket_size_log, 7)
87
88
88 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=8)
89 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
89 self.assertEqual(p.ldm_hash_every_log, 8)
90 self.assertEqual(p.ldm_hash_every_log, 8)
91 self.assertEqual(p.ldm_hash_rate_log, 8)
90
92
91 def test_estimated_compression_context_size(self):
93 def test_estimated_compression_context_size(self):
92 p = zstd.ZstdCompressionParameters(window_log=20,
94 p = zstd.ZstdCompressionParameters(window_log=20,
@@ -95,12 +97,44 b' class TestCompressionParameters(unittest'
95 search_log=1,
97 search_log=1,
96 min_match=5,
98 min_match=5,
97 target_length=16,
99 target_length=16,
98 compression_strategy=zstd.STRATEGY_DFAST)
100 strategy=zstd.STRATEGY_DFAST)
99
101
100 # 32-bit has slightly different values from 64-bit.
102 # 32-bit has slightly different values from 64-bit.
101 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072,
103 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072,
102 delta=250)
104 delta=250)
103
105
106 def test_strategy(self):
107 with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'):
108 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
109
110 p = zstd.ZstdCompressionParameters(strategy=2)
111 self.assertEqual(p.compression_strategy, 2)
112
113 p = zstd.ZstdCompressionParameters(strategy=3)
114 self.assertEqual(p.compression_strategy, 3)
115
116 def test_ldm_hash_rate_log(self):
117 with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'):
118 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
119
120 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
121 self.assertEqual(p.ldm_hash_every_log, 8)
122
123 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16)
124 self.assertEqual(p.ldm_hash_every_log, 16)
125
126 def test_overlap_log(self):
127 with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'):
128 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
129
130 p = zstd.ZstdCompressionParameters(overlap_log=2)
131 self.assertEqual(p.overlap_log, 2)
132 self.assertEqual(p.overlap_size_log, 2)
133
134 p = zstd.ZstdCompressionParameters(overlap_size_log=4)
135 self.assertEqual(p.overlap_log, 4)
136 self.assertEqual(p.overlap_size_log, 4)
137
104
138
105 @make_cffi
139 @make_cffi
106 class TestFrameParameters(unittest.TestCase):
140 class TestFrameParameters(unittest.TestCase):
@@ -24,8 +24,8 b' s_hashlog = strategies.integers(min_valu'
24 max_value=zstd.HASHLOG_MAX)
24 max_value=zstd.HASHLOG_MAX)
25 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
25 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
26 max_value=zstd.SEARCHLOG_MAX)
26 max_value=zstd.SEARCHLOG_MAX)
27 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
27 s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN,
28 max_value=zstd.SEARCHLENGTH_MAX)
28 max_value=zstd.MINMATCH_MAX)
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
30 max_value=zstd.TARGETLENGTH_MAX)
30 max_value=zstd.TARGETLENGTH_MAX)
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
@@ -35,41 +35,42 b' s_strategy = strategies.sampled_from((zs'
35 zstd.STRATEGY_LAZY2,
35 zstd.STRATEGY_LAZY2,
36 zstd.STRATEGY_BTLAZY2,
36 zstd.STRATEGY_BTLAZY2,
37 zstd.STRATEGY_BTOPT,
37 zstd.STRATEGY_BTOPT,
38 zstd.STRATEGY_BTULTRA))
38 zstd.STRATEGY_BTULTRA,
39 zstd.STRATEGY_BTULTRA2))
39
40
40
41
41 @make_cffi
42 @make_cffi
42 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
43 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
43 class TestCompressionParametersHypothesis(unittest.TestCase):
44 class TestCompressionParametersHypothesis(unittest.TestCase):
44 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
45 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
45 s_searchlength, s_targetlength, s_strategy)
46 s_minmatch, s_targetlength, s_strategy)
46 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
47 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
47 searchlength, targetlength, strategy):
48 minmatch, targetlength, strategy):
48 zstd.ZstdCompressionParameters(window_log=windowlog,
49 zstd.ZstdCompressionParameters(window_log=windowlog,
49 chain_log=chainlog,
50 chain_log=chainlog,
50 hash_log=hashlog,
51 hash_log=hashlog,
51 search_log=searchlog,
52 search_log=searchlog,
52 min_match=searchlength,
53 min_match=minmatch,
53 target_length=targetlength,
54 target_length=targetlength,
54 compression_strategy=strategy)
55 strategy=strategy)
55
56
56 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
57 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
57 s_searchlength, s_targetlength, s_strategy)
58 s_minmatch, s_targetlength, s_strategy)
58 def test_estimated_compression_context_size(self, windowlog, chainlog,
59 def test_estimated_compression_context_size(self, windowlog, chainlog,
59 hashlog, searchlog,
60 hashlog, searchlog,
60 searchlength, targetlength,
61 minmatch, targetlength,
61 strategy):
62 strategy):
62 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
63 if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
63 searchlength += 1
64 minmatch += 1
64 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
65 elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST:
65 searchlength -= 1
66 minmatch -= 1
66
67
67 p = zstd.ZstdCompressionParameters(window_log=windowlog,
68 p = zstd.ZstdCompressionParameters(window_log=windowlog,
68 chain_log=chainlog,
69 chain_log=chainlog,
69 hash_log=hashlog,
70 hash_log=hashlog,
70 search_log=searchlog,
71 search_log=searchlog,
71 min_match=searchlength,
72 min_match=minmatch,
72 target_length=targetlength,
73 target_length=targetlength,
73 compression_strategy=strategy)
74 strategy=strategy)
74 size = p.estimated_compression_context_size()
75 size = p.estimated_compression_context_size()
75
76
@@ -3,6 +3,7 b' import os'
3 import random
3 import random
4 import struct
4 import struct
5 import sys
5 import sys
6 import tempfile
6 import unittest
7 import unittest
7
8
8 import zstandard as zstd
9 import zstandard as zstd
@@ -10,6 +11,7 b' import zstandard as zstd'
10 from .common import (
11 from .common import (
11 generate_samples,
12 generate_samples,
12 make_cffi,
13 make_cffi,
14 NonClosingBytesIO,
13 OpCountingBytesIO,
15 OpCountingBytesIO,
14 )
16 )
15
17
@@ -219,7 +221,7 b' class TestDecompressor_decompress(unitte'
219 cctx = zstd.ZstdCompressor(write_content_size=False)
221 cctx = zstd.ZstdCompressor(write_content_size=False)
220 frame = cctx.compress(source)
222 frame = cctx.compress(source)
221
223
222 dctx = zstd.ZstdDecompressor(max_window_size=1)
224 dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN)
223
225
224 with self.assertRaisesRegexp(
226 with self.assertRaisesRegexp(
225 zstd.ZstdError, 'decompression error: Frame requires too much memory'):
227 zstd.ZstdError, 'decompression error: Frame requires too much memory'):
@@ -302,19 +304,16 b' class TestDecompressor_stream_reader(uni'
302 dctx = zstd.ZstdDecompressor()
304 dctx = zstd.ZstdDecompressor()
303
305
304 with dctx.stream_reader(b'foo') as reader:
306 with dctx.stream_reader(b'foo') as reader:
305 with self.assertRaises(NotImplementedError):
307 with self.assertRaises(io.UnsupportedOperation):
306 reader.readline()
308 reader.readline()
307
309
308 with self.assertRaises(NotImplementedError):
310 with self.assertRaises(io.UnsupportedOperation):
309 reader.readlines()
311 reader.readlines()
310
312
311 with self.assertRaises(NotImplementedError):
313 with self.assertRaises(io.UnsupportedOperation):
312 reader.readall()
313
314 with self.assertRaises(NotImplementedError):
315 iter(reader)
314 iter(reader)
316
315
317 with self.assertRaises(NotImplementedError):
316 with self.assertRaises(io.UnsupportedOperation):
318 next(reader)
317 next(reader)
319
318
320 with self.assertRaises(io.UnsupportedOperation):
319 with self.assertRaises(io.UnsupportedOperation):
@@ -347,15 +346,18 b' class TestDecompressor_stream_reader(uni'
347 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
346 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
348 reader.read(1)
347 reader.read(1)
349
348
350 def test_bad_read_size(self):
349 def test_read_sizes(self):
350 cctx = zstd.ZstdCompressor()
351 foo = cctx.compress(b'foo')
352
351 dctx = zstd.ZstdDecompressor()
353 dctx = zstd.ZstdDecompressor()
352
354
353 with dctx.stream_reader(b'foo') as reader:
355 with dctx.stream_reader(foo) as reader:
354 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
356 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
355 reader.read(-1)
357 reader.read(-2)
356
358
357 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
359 self.assertEqual(reader.read(0), b'')
358 reader.read(0)
360 self.assertEqual(reader.read(), b'foo')
359
361
360 def test_read_buffer(self):
362 def test_read_buffer(self):
361 cctx = zstd.ZstdCompressor()
363 cctx = zstd.ZstdCompressor()
@@ -524,13 +526,243 b' class TestDecompressor_stream_reader(uni'
524 reader = dctx.stream_reader(source)
526 reader = dctx.stream_reader(source)
525
527
526 with reader:
528 with reader:
527 with self.assertRaises(TypeError):
529 reader.read(0)
528 reader.read()
529
530
530 with reader:
531 with reader:
531 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
532 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
532 reader.read(100)
533 reader.read(100)
533
534
535 def test_partial_read(self):
536 # Inspired by https://github.com/indygreg/python-zstandard/issues/71.
537 buffer = io.BytesIO()
538 cctx = zstd.ZstdCompressor()
539 writer = cctx.stream_writer(buffer)
540 writer.write(bytearray(os.urandom(1000000)))
541 writer.flush(zstd.FLUSH_FRAME)
542 buffer.seek(0)
543
544 dctx = zstd.ZstdDecompressor()
545 reader = dctx.stream_reader(buffer)
546
547 while True:
548 chunk = reader.read(8192)
549 if not chunk:
550 break
551
552 def test_read_multiple_frames(self):
553 cctx = zstd.ZstdCompressor()
554 source = io.BytesIO()
555 writer = cctx.stream_writer(source)
556 writer.write(b'foo')
557 writer.flush(zstd.FLUSH_FRAME)
558 writer.write(b'bar')
559 writer.flush(zstd.FLUSH_FRAME)
560
561 dctx = zstd.ZstdDecompressor()
562
563 reader = dctx.stream_reader(source.getvalue())
564 self.assertEqual(reader.read(2), b'fo')
565 self.assertEqual(reader.read(2), b'o')
566 self.assertEqual(reader.read(2), b'ba')
567 self.assertEqual(reader.read(2), b'r')
568
569 source.seek(0)
570 reader = dctx.stream_reader(source)
571 self.assertEqual(reader.read(2), b'fo')
572 self.assertEqual(reader.read(2), b'o')
573 self.assertEqual(reader.read(2), b'ba')
574 self.assertEqual(reader.read(2), b'r')
575
576 reader = dctx.stream_reader(source.getvalue())
577 self.assertEqual(reader.read(3), b'foo')
578 self.assertEqual(reader.read(3), b'bar')
579
580 source.seek(0)
581 reader = dctx.stream_reader(source)
582 self.assertEqual(reader.read(3), b'foo')
583 self.assertEqual(reader.read(3), b'bar')
584
585 reader = dctx.stream_reader(source.getvalue())
586 self.assertEqual(reader.read(4), b'foo')
587 self.assertEqual(reader.read(4), b'bar')
588
589 source.seek(0)
590 reader = dctx.stream_reader(source)
591 self.assertEqual(reader.read(4), b'foo')
592 self.assertEqual(reader.read(4), b'bar')
593
594 reader = dctx.stream_reader(source.getvalue())
595 self.assertEqual(reader.read(128), b'foo')
596 self.assertEqual(reader.read(128), b'bar')
597
598 source.seek(0)
599 reader = dctx.stream_reader(source)
600 self.assertEqual(reader.read(128), b'foo')
601 self.assertEqual(reader.read(128), b'bar')
602
603 # Now tests for reads spanning frames.
604 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
605 self.assertEqual(reader.read(3), b'foo')
606 self.assertEqual(reader.read(3), b'bar')
607
608 source.seek(0)
609 reader = dctx.stream_reader(source, read_across_frames=True)
610 self.assertEqual(reader.read(3), b'foo')
611 self.assertEqual(reader.read(3), b'bar')
612
613 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
614 self.assertEqual(reader.read(6), b'foobar')
615
616 source.seek(0)
617 reader = dctx.stream_reader(source, read_across_frames=True)
618 self.assertEqual(reader.read(6), b'foobar')
619
620 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
621 self.assertEqual(reader.read(7), b'foobar')
622
623 source.seek(0)
624 reader = dctx.stream_reader(source, read_across_frames=True)
625 self.assertEqual(reader.read(7), b'foobar')
626
627 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
628 self.assertEqual(reader.read(128), b'foobar')
629
630 source.seek(0)
631 reader = dctx.stream_reader(source, read_across_frames=True)
632 self.assertEqual(reader.read(128), b'foobar')
633
634 def test_readinto(self):
635 cctx = zstd.ZstdCompressor()
636 foo = cctx.compress(b'foo')
637
638 dctx = zstd.ZstdDecompressor()
639
640 # Attempting to readinto() a non-writable buffer fails.
641 # The exact exception varies based on the backend.
642 reader = dctx.stream_reader(foo)
643 with self.assertRaises(Exception):
644 reader.readinto(b'foobar')
645
646 # readinto() with sufficiently large destination.
647 b = bytearray(1024)
648 reader = dctx.stream_reader(foo)
649 self.assertEqual(reader.readinto(b), 3)
650 self.assertEqual(b[0:3], b'foo')
651 self.assertEqual(reader.readinto(b), 0)
652 self.assertEqual(b[0:3], b'foo')
653
654 # readinto() with small reads.
655 b = bytearray(1024)
656 reader = dctx.stream_reader(foo, read_size=1)
657 self.assertEqual(reader.readinto(b), 3)
658 self.assertEqual(b[0:3], b'foo')
659
660 # Too small destination buffer.
661 b = bytearray(2)
662 reader = dctx.stream_reader(foo)
663 self.assertEqual(reader.readinto(b), 2)
664 self.assertEqual(b[:], b'fo')
665
666 def test_readinto1(self):
667 cctx = zstd.ZstdCompressor()
668 foo = cctx.compress(b'foo')
669
670 dctx = zstd.ZstdDecompressor()
671
672 reader = dctx.stream_reader(foo)
673 with self.assertRaises(Exception):
674 reader.readinto1(b'foobar')
675
676 # Sufficiently large destination.
677 b = bytearray(1024)
678 reader = dctx.stream_reader(foo)
679 self.assertEqual(reader.readinto1(b), 3)
680 self.assertEqual(b[0:3], b'foo')
681 self.assertEqual(reader.readinto1(b), 0)
682 self.assertEqual(b[0:3], b'foo')
683
684 # readinto() with small reads.
685 b = bytearray(1024)
686 reader = dctx.stream_reader(foo, read_size=1)
687 self.assertEqual(reader.readinto1(b), 3)
688 self.assertEqual(b[0:3], b'foo')
689
690 # Too small destination buffer.
691 b = bytearray(2)
692 reader = dctx.stream_reader(foo)
693 self.assertEqual(reader.readinto1(b), 2)
694 self.assertEqual(b[:], b'fo')
695
696 def test_readall(self):
697 cctx = zstd.ZstdCompressor()
698 foo = cctx.compress(b'foo')
699
700 dctx = zstd.ZstdDecompressor()
701 reader = dctx.stream_reader(foo)
702
703 self.assertEqual(reader.readall(), b'foo')
704
705 def test_read1(self):
706 cctx = zstd.ZstdCompressor()
707 foo = cctx.compress(b'foo')
708
709 dctx = zstd.ZstdDecompressor()
710
711 b = OpCountingBytesIO(foo)
712 reader = dctx.stream_reader(b)
713
714 self.assertEqual(reader.read1(), b'foo')
715 self.assertEqual(b._read_count, 1)
716
717 b = OpCountingBytesIO(foo)
718 reader = dctx.stream_reader(b)
719
720 self.assertEqual(reader.read1(0), b'')
721 self.assertEqual(reader.read1(2), b'fo')
722 self.assertEqual(b._read_count, 1)
723 self.assertEqual(reader.read1(1), b'o')
724 self.assertEqual(b._read_count, 1)
725 self.assertEqual(reader.read1(1), b'')
726 self.assertEqual(b._read_count, 2)
727
728 def test_read_lines(self):
729 cctx = zstd.ZstdCompressor()
730 source = b'\n'.join(('line %d' % i).encode('ascii') for i in range(1024))
731
732 frame = cctx.compress(source)
733
734 dctx = zstd.ZstdDecompressor()
735 reader = dctx.stream_reader(frame)
736 tr = io.TextIOWrapper(reader, encoding='utf-8')
737
738 lines = []
739 for line in tr:
740 lines.append(line.encode('utf-8'))
741
742 self.assertEqual(len(lines), 1024)
743 self.assertEqual(b''.join(lines), source)
744
745 reader = dctx.stream_reader(frame)
746 tr = io.TextIOWrapper(reader, encoding='utf-8')
747
748 lines = tr.readlines()
749 self.assertEqual(len(lines), 1024)
750 self.assertEqual(''.join(lines).encode('utf-8'), source)
751
752 reader = dctx.stream_reader(frame)
753 tr = io.TextIOWrapper(reader, encoding='utf-8')
754
755 lines = []
756 while True:
757 line = tr.readline()
758 if not line:
759 break
760
761 lines.append(line.encode('utf-8'))
762
763 self.assertEqual(len(lines), 1024)
764 self.assertEqual(b''.join(lines), source)
765
534
766
535 @make_cffi
767 @make_cffi
536 class TestDecompressor_decompressobj(unittest.TestCase):
768 class TestDecompressor_decompressobj(unittest.TestCase):
@@ -540,6 +772,9 b' class TestDecompressor_decompressobj(uni'
540 dctx = zstd.ZstdDecompressor()
772 dctx = zstd.ZstdDecompressor()
541 dobj = dctx.decompressobj()
773 dobj = dctx.decompressobj()
542 self.assertEqual(dobj.decompress(data), b'foobar')
774 self.assertEqual(dobj.decompress(data), b'foobar')
775 self.assertIsNone(dobj.flush())
776 self.assertIsNone(dobj.flush(10))
777 self.assertIsNone(dobj.flush(length=100))
543
778
544 def test_input_types(self):
779 def test_input_types(self):
545 compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
780 compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
@@ -557,7 +792,11 b' class TestDecompressor_decompressobj(uni'
557
792
558 for source in sources:
793 for source in sources:
559 dobj = dctx.decompressobj()
794 dobj = dctx.decompressobj()
795 self.assertIsNone(dobj.flush())
796 self.assertIsNone(dobj.flush(10))
797 self.assertIsNone(dobj.flush(length=100))
560 self.assertEqual(dobj.decompress(source), b'foo')
798 self.assertEqual(dobj.decompress(source), b'foo')
799 self.assertIsNone(dobj.flush())
561
800
562 def test_reuse(self):
801 def test_reuse(self):
563 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
802 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
@@ -568,6 +807,7 b' class TestDecompressor_decompressobj(uni'
568
807
569 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
808 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
570 dobj.decompress(data)
809 dobj.decompress(data)
810 self.assertIsNone(dobj.flush())
571
811
572 def test_bad_write_size(self):
812 def test_bad_write_size(self):
573 dctx = zstd.ZstdDecompressor()
813 dctx = zstd.ZstdDecompressor()
@@ -585,16 +825,141 b' class TestDecompressor_decompressobj(uni'
585 dobj = dctx.decompressobj(write_size=i + 1)
825 dobj = dctx.decompressobj(write_size=i + 1)
586 self.assertEqual(dobj.decompress(data), source)
826 self.assertEqual(dobj.decompress(data), source)
587
827
828
588 def decompress_via_writer(data):
829 def decompress_via_writer(data):
589 buffer = io.BytesIO()
830 buffer = io.BytesIO()
590 dctx = zstd.ZstdDecompressor()
831 dctx = zstd.ZstdDecompressor()
591 with dctx.stream_writer(buffer) as decompressor:
832 decompressor = dctx.stream_writer(buffer)
592 decompressor.write(data)
833 decompressor.write(data)
834
593 return buffer.getvalue()
835 return buffer.getvalue()
594
836
595
837
596 @make_cffi
838 @make_cffi
597 class TestDecompressor_stream_writer(unittest.TestCase):
839 class TestDecompressor_stream_writer(unittest.TestCase):
840 def test_io_api(self):
841 buffer = io.BytesIO()
842 dctx = zstd.ZstdDecompressor()
843 writer = dctx.stream_writer(buffer)
844
845 self.assertFalse(writer.closed)
846 self.assertFalse(writer.isatty())
847 self.assertFalse(writer.readable())
848
849 with self.assertRaises(io.UnsupportedOperation):
850 writer.readline()
851
852 with self.assertRaises(io.UnsupportedOperation):
853 writer.readline(42)
854
855 with self.assertRaises(io.UnsupportedOperation):
856 writer.readline(size=42)
857
858 with self.assertRaises(io.UnsupportedOperation):
859 writer.readlines()
860
861 with self.assertRaises(io.UnsupportedOperation):
862 writer.readlines(42)
863
864 with self.assertRaises(io.UnsupportedOperation):
865 writer.readlines(hint=42)
866
867 with self.assertRaises(io.UnsupportedOperation):
868 writer.seek(0)
869
870 with self.assertRaises(io.UnsupportedOperation):
871 writer.seek(10, os.SEEK_SET)
872
873 self.assertFalse(writer.seekable())
874
875 with self.assertRaises(io.UnsupportedOperation):
876 writer.tell()
877
878 with self.assertRaises(io.UnsupportedOperation):
879 writer.truncate()
880
881 with self.assertRaises(io.UnsupportedOperation):
882 writer.truncate(42)
883
884 with self.assertRaises(io.UnsupportedOperation):
885 writer.truncate(size=42)
886
887 self.assertTrue(writer.writable())
888
889 with self.assertRaises(io.UnsupportedOperation):
890 writer.writelines([])
891
892 with self.assertRaises(io.UnsupportedOperation):
893 writer.read()
894
895 with self.assertRaises(io.UnsupportedOperation):
896 writer.read(42)
897
898 with self.assertRaises(io.UnsupportedOperation):
899 writer.read(size=42)
900
901 with self.assertRaises(io.UnsupportedOperation):
902 writer.readall()
903
904 with self.assertRaises(io.UnsupportedOperation):
905 writer.readinto(None)
906
907 with self.assertRaises(io.UnsupportedOperation):
908 writer.fileno()
909
910 def test_fileno_file(self):
911 with tempfile.TemporaryFile('wb') as tf:
912 dctx = zstd.ZstdDecompressor()
913 writer = dctx.stream_writer(tf)
914
915 self.assertEqual(writer.fileno(), tf.fileno())
916
917 def test_close(self):
918 foo = zstd.ZstdCompressor().compress(b'foo')
919
920 buffer = NonClosingBytesIO()
921 dctx = zstd.ZstdDecompressor()
922 writer = dctx.stream_writer(buffer)
923
924 writer.write(foo)
925 self.assertFalse(writer.closed)
926 self.assertFalse(buffer.closed)
927 writer.close()
928 self.assertTrue(writer.closed)
929 self.assertTrue(buffer.closed)
930
931 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
932 writer.write(b'')
933
934 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
935 writer.flush()
936
937 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
938 with writer:
939 pass
940
941 self.assertEqual(buffer.getvalue(), b'foo')
942
943 # Context manager exit should close stream.
944 buffer = NonClosingBytesIO()
945 writer = dctx.stream_writer(buffer)
946
947 with writer:
948 writer.write(foo)
949
950 self.assertTrue(writer.closed)
951 self.assertEqual(buffer.getvalue(), b'foo')
952
953 def test_flush(self):
954 buffer = OpCountingBytesIO()
955 dctx = zstd.ZstdDecompressor()
956 writer = dctx.stream_writer(buffer)
957
958 writer.flush()
959 self.assertEqual(buffer._flush_count, 1)
960 writer.flush()
961 self.assertEqual(buffer._flush_count, 2)
962
598 def test_empty_roundtrip(self):
963 def test_empty_roundtrip(self):
599 cctx = zstd.ZstdCompressor()
964 cctx = zstd.ZstdCompressor()
600 empty = cctx.compress(b'')
965 empty = cctx.compress(b'')
@@ -616,9 +981,21 b' class TestDecompressor_stream_writer(uni'
616 dctx = zstd.ZstdDecompressor()
981 dctx = zstd.ZstdDecompressor()
617 for source in sources:
982 for source in sources:
618 buffer = io.BytesIO()
983 buffer = io.BytesIO()
984
985 decompressor = dctx.stream_writer(buffer)
986 decompressor.write(source)
987 self.assertEqual(buffer.getvalue(), b'foo')
988
989 buffer = NonClosingBytesIO()
990
619 with dctx.stream_writer(buffer) as decompressor:
991 with dctx.stream_writer(buffer) as decompressor:
620 decompressor.write(source)
992 self.assertEqual(decompressor.write(source), 3)
993
994 self.assertEqual(buffer.getvalue(), b'foo')
621
995
996 buffer = io.BytesIO()
997 writer = dctx.stream_writer(buffer, write_return_read=True)
998 self.assertEqual(writer.write(source), len(source))
622 self.assertEqual(buffer.getvalue(), b'foo')
999 self.assertEqual(buffer.getvalue(), b'foo')
623
1000
624 def test_large_roundtrip(self):
1001 def test_large_roundtrip(self):
@@ -641,7 +1018,7 b' class TestDecompressor_stream_writer(uni'
641 cctx = zstd.ZstdCompressor()
1018 cctx = zstd.ZstdCompressor()
642 compressed = cctx.compress(orig)
1019 compressed = cctx.compress(orig)
643
1020
644 buffer = io.BytesIO()
1021 buffer = NonClosingBytesIO()
645 dctx = zstd.ZstdDecompressor()
1022 dctx = zstd.ZstdDecompressor()
646 with dctx.stream_writer(buffer) as decompressor:
1023 with dctx.stream_writer(buffer) as decompressor:
647 pos = 0
1024 pos = 0
@@ -651,6 +1028,17 b' class TestDecompressor_stream_writer(uni'
651 pos += 8192
1028 pos += 8192
652 self.assertEqual(buffer.getvalue(), orig)
1029 self.assertEqual(buffer.getvalue(), orig)
653
1030
1031 # Again with write_return_read=True
1032 buffer = io.BytesIO()
1033 writer = dctx.stream_writer(buffer, write_return_read=True)
1034 pos = 0
1035 while pos < len(compressed):
1036 pos2 = pos + 8192
1037 chunk = compressed[pos:pos2]
1038 self.assertEqual(writer.write(chunk), len(chunk))
1039 pos += 8192
1040 self.assertEqual(buffer.getvalue(), orig)
1041
654 def test_dictionary(self):
1042 def test_dictionary(self):
655 samples = []
1043 samples = []
656 for i in range(128):
1044 for i in range(128):
@@ -661,7 +1049,7 b' class TestDecompressor_stream_writer(uni'
661 d = zstd.train_dictionary(8192, samples)
1049 d = zstd.train_dictionary(8192, samples)
662
1050
663 orig = b'foobar' * 16384
1051 orig = b'foobar' * 16384
664 buffer = io.BytesIO()
1052 buffer = NonClosingBytesIO()
665 cctx = zstd.ZstdCompressor(dict_data=d)
1053 cctx = zstd.ZstdCompressor(dict_data=d)
666 with cctx.stream_writer(buffer) as compressor:
1054 with cctx.stream_writer(buffer) as compressor:
667 self.assertEqual(compressor.write(orig), 0)
1055 self.assertEqual(compressor.write(orig), 0)
@@ -670,6 +1058,12 b' class TestDecompressor_stream_writer(uni'
670 buffer = io.BytesIO()
1058 buffer = io.BytesIO()
671
1059
672 dctx = zstd.ZstdDecompressor(dict_data=d)
1060 dctx = zstd.ZstdDecompressor(dict_data=d)
1061 decompressor = dctx.stream_writer(buffer)
1062 self.assertEqual(decompressor.write(compressed), len(orig))
1063 self.assertEqual(buffer.getvalue(), orig)
1064
1065 buffer = NonClosingBytesIO()
1066
673 with dctx.stream_writer(buffer) as decompressor:
1067 with dctx.stream_writer(buffer) as decompressor:
674 self.assertEqual(decompressor.write(compressed), len(orig))
1068 self.assertEqual(decompressor.write(compressed), len(orig))
675
1069
@@ -678,6 +1072,11 b' class TestDecompressor_stream_writer(uni'
678 def test_memory_size(self):
1072 def test_memory_size(self):
679 dctx = zstd.ZstdDecompressor()
1073 dctx = zstd.ZstdDecompressor()
680 buffer = io.BytesIO()
1074 buffer = io.BytesIO()
1075
1076 decompressor = dctx.stream_writer(buffer)
1077 size = decompressor.memory_size()
1078 self.assertGreater(size, 100000)
1079
681 with dctx.stream_writer(buffer) as decompressor:
1080 with dctx.stream_writer(buffer) as decompressor:
682 size = decompressor.memory_size()
1081 size = decompressor.memory_size()
683
1082
@@ -810,7 +1209,7 b' class TestDecompressor_read_to_iter(unit'
810 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
1209 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
811 def test_large_input(self):
1210 def test_large_input(self):
812 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
1211 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
813 compressed = io.BytesIO()
1212 compressed = NonClosingBytesIO()
814 input_size = 0
1213 input_size = 0
815 cctx = zstd.ZstdCompressor(level=1)
1214 cctx = zstd.ZstdCompressor(level=1)
816 with cctx.stream_writer(compressed) as compressor:
1215 with cctx.stream_writer(compressed) as compressor:
@@ -823,7 +1222,7 b' class TestDecompressor_read_to_iter(unit'
823 if have_compressed and have_raw:
1222 if have_compressed and have_raw:
824 break
1223 break
825
1224
826 compressed.seek(0)
1225 compressed = io.BytesIO(compressed.getvalue())
827 self.assertGreater(len(compressed.getvalue()),
1226 self.assertGreater(len(compressed.getvalue()),
828 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
1227 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
829
1228
@@ -861,7 +1260,7 b' class TestDecompressor_read_to_iter(unit'
861
1260
862 source = io.BytesIO()
1261 source = io.BytesIO()
863
1262
864 compressed = io.BytesIO()
1263 compressed = NonClosingBytesIO()
865 with cctx.stream_writer(compressed) as compressor:
1264 with cctx.stream_writer(compressed) as compressor:
866 for i in range(256):
1265 for i in range(256):
867 chunk = b'\0' * 1024
1266 chunk = b'\0' * 1024
@@ -874,7 +1273,7 b' class TestDecompressor_read_to_iter(unit'
874 max_output_size=len(source.getvalue()))
1273 max_output_size=len(source.getvalue()))
875 self.assertEqual(simple, source.getvalue())
1274 self.assertEqual(simple, source.getvalue())
876
1275
877 compressed.seek(0)
1276 compressed = io.BytesIO(compressed.getvalue())
878 streamed = b''.join(dctx.read_to_iter(compressed))
1277 streamed = b''.join(dctx.read_to_iter(compressed))
879 self.assertEqual(streamed, source.getvalue())
1278 self.assertEqual(streamed, source.getvalue())
880
1279
@@ -1001,6 +1400,9 b' class TestDecompressor_multi_decompress_'
1001 def test_invalid_inputs(self):
1400 def test_invalid_inputs(self):
1002 dctx = zstd.ZstdDecompressor()
1401 dctx = zstd.ZstdDecompressor()
1003
1402
1403 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1404 self.skipTest('multi_decompress_to_buffer not available')
1405
1004 with self.assertRaises(TypeError):
1406 with self.assertRaises(TypeError):
1005 dctx.multi_decompress_to_buffer(True)
1407 dctx.multi_decompress_to_buffer(True)
1006
1408
@@ -1020,6 +1422,10 b' class TestDecompressor_multi_decompress_'
1020 frames = [cctx.compress(d) for d in original]
1422 frames = [cctx.compress(d) for d in original]
1021
1423
1022 dctx = zstd.ZstdDecompressor()
1424 dctx = zstd.ZstdDecompressor()
1425
1426 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1427 self.skipTest('multi_decompress_to_buffer not available')
1428
1023 result = dctx.multi_decompress_to_buffer(frames)
1429 result = dctx.multi_decompress_to_buffer(frames)
1024
1430
1025 self.assertEqual(len(result), len(frames))
1431 self.assertEqual(len(result), len(frames))
@@ -1041,6 +1447,10 b' class TestDecompressor_multi_decompress_'
1041 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1447 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1042
1448
1043 dctx = zstd.ZstdDecompressor()
1449 dctx = zstd.ZstdDecompressor()
1450
1451 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1452 self.skipTest('multi_decompress_to_buffer not available')
1453
1044 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
1454 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
1045
1455
1046 self.assertEqual(len(result), len(frames))
1456 self.assertEqual(len(result), len(frames))
@@ -1057,6 +1467,9 b' class TestDecompressor_multi_decompress_'
1057
1467
1058 dctx = zstd.ZstdDecompressor()
1468 dctx = zstd.ZstdDecompressor()
1059
1469
1470 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1471 self.skipTest('multi_decompress_to_buffer not available')
1472
1060 segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
1473 segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
1061 b = zstd.BufferWithSegments(b''.join(frames), segments)
1474 b = zstd.BufferWithSegments(b''.join(frames), segments)
1062
1475
@@ -1074,12 +1487,16 b' class TestDecompressor_multi_decompress_'
1074 frames = [cctx.compress(d) for d in original]
1487 frames = [cctx.compress(d) for d in original]
1075 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1488 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1076
1489
1490 dctx = zstd.ZstdDecompressor()
1491
1492 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1493 self.skipTest('multi_decompress_to_buffer not available')
1494
1077 segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
1495 segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
1078 len(frames[0]), len(frames[1]),
1496 len(frames[0]), len(frames[1]),
1079 len(frames[0]) + len(frames[1]), len(frames[2]))
1497 len(frames[0]) + len(frames[1]), len(frames[2]))
1080 b = zstd.BufferWithSegments(b''.join(frames), segments)
1498 b = zstd.BufferWithSegments(b''.join(frames), segments)
1081
1499
1082 dctx = zstd.ZstdDecompressor()
1083 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
1500 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
1084
1501
1085 self.assertEqual(len(result), len(frames))
1502 self.assertEqual(len(result), len(frames))
@@ -1099,10 +1516,14 b' class TestDecompressor_multi_decompress_'
1099 b'foo4' * 6,
1516 b'foo4' * 6,
1100 ]
1517 ]
1101
1518
1519 if not hasattr(cctx, 'multi_compress_to_buffer'):
1520 self.skipTest('multi_compress_to_buffer not available')
1521
1102 frames = cctx.multi_compress_to_buffer(original)
1522 frames = cctx.multi_compress_to_buffer(original)
1103
1523
1104 # Check round trip.
1524 # Check round trip.
1105 dctx = zstd.ZstdDecompressor()
1525 dctx = zstd.ZstdDecompressor()
1526
1106 decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
1527 decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
1107
1528
1108 self.assertEqual(len(decompressed), len(original))
1529 self.assertEqual(len(decompressed), len(original))
@@ -1138,7 +1559,12 b' class TestDecompressor_multi_decompress_'
1138 frames = [cctx.compress(s) for s in generate_samples()]
1559 frames = [cctx.compress(s) for s in generate_samples()]
1139
1560
1140 dctx = zstd.ZstdDecompressor(dict_data=d)
1561 dctx = zstd.ZstdDecompressor(dict_data=d)
1562
1563 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1564 self.skipTest('multi_decompress_to_buffer not available')
1565
1141 result = dctx.multi_decompress_to_buffer(frames)
1566 result = dctx.multi_decompress_to_buffer(frames)
1567
1142 self.assertEqual([o.tobytes() for o in result], generate_samples())
1568 self.assertEqual([o.tobytes() for o in result], generate_samples())
1143
1569
1144 def test_multiple_threads(self):
1570 def test_multiple_threads(self):
@@ -1149,6 +1575,10 b' class TestDecompressor_multi_decompress_'
1149 frames.extend(cctx.compress(b'y' * 64) for i in range(256))
1575 frames.extend(cctx.compress(b'y' * 64) for i in range(256))
1150
1576
1151 dctx = zstd.ZstdDecompressor()
1577 dctx = zstd.ZstdDecompressor()
1578
1579 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1580 self.skipTest('multi_decompress_to_buffer not available')
1581
1152 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
1582 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
1153
1583
1154 self.assertEqual(len(result), len(frames))
1584 self.assertEqual(len(result), len(frames))
@@ -1164,6 +1594,9 b' class TestDecompressor_multi_decompress_'
1164
1594
1165 dctx = zstd.ZstdDecompressor()
1595 dctx = zstd.ZstdDecompressor()
1166
1596
1597 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1598 self.skipTest('multi_decompress_to_buffer not available')
1599
1167 with self.assertRaisesRegexp(zstd.ZstdError,
1600 with self.assertRaisesRegexp(zstd.ZstdError,
1168 'error decompressing item 1: ('
1601 'error decompressing item 1: ('
1169 'Corrupted block|'
1602 'Corrupted block|'
@@ -12,6 +12,7 b' import zstandard as zstd'
12
12
13 from . common import (
13 from . common import (
14 make_cffi,
14 make_cffi,
15 NonClosingBytesIO,
15 random_input_data,
16 random_input_data,
16 )
17 )
17
18
@@ -23,22 +24,200 b' class TestDecompressor_stream_reader_fuz'
23 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
24 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
25 level=strategies.integers(min_value=1, max_value=5),
26 level=strategies.integers(min_value=1, max_value=5),
26 source_read_size=strategies.integers(1, 16384),
27 streaming=strategies.booleans(),
28 source_read_size=strategies.integers(1, 1048576),
27 read_sizes=strategies.data())
29 read_sizes=strategies.data())
28 def test_stream_source_read_variance(self, original, level, source_read_size,
30 def test_stream_source_read_variance(self, original, level, streaming,
29 read_sizes):
31 source_read_size, read_sizes):
30 cctx = zstd.ZstdCompressor(level=level)
32 cctx = zstd.ZstdCompressor(level=level)
31 frame = cctx.compress(original)
33
34 if streaming:
35 source = io.BytesIO()
36 writer = cctx.stream_writer(source)
37 writer.write(original)
38 writer.flush(zstd.FLUSH_FRAME)
39 source.seek(0)
40 else:
41 frame = cctx.compress(original)
42 source = io.BytesIO(frame)
32
43
33 dctx = zstd.ZstdDecompressor()
44 dctx = zstd.ZstdDecompressor()
34 source = io.BytesIO(frame)
35
45
36 chunks = []
46 chunks = []
37 with dctx.stream_reader(source, read_size=source_read_size) as reader:
47 with dctx.stream_reader(source, read_size=source_read_size) as reader:
38 while True:
48 while True:
39 read_size = read_sizes.draw(strategies.integers(1, 16384))
49 read_size = read_sizes.draw(strategies.integers(-1, 131072))
50 chunk = reader.read(read_size)
51 if not chunk and read_size:
52 break
53
54 chunks.append(chunk)
55
56 self.assertEqual(b''.join(chunks), original)
57
58 # Similar to above except we have a constant read() size.
59 @hypothesis.settings(
60 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
61 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
62 level=strategies.integers(min_value=1, max_value=5),
63 streaming=strategies.booleans(),
64 source_read_size=strategies.integers(1, 1048576),
65 read_size=strategies.integers(-1, 131072))
66 def test_stream_source_read_size(self, original, level, streaming,
67 source_read_size, read_size):
68 if read_size == 0:
69 read_size = 1
70
71 cctx = zstd.ZstdCompressor(level=level)
72
73 if streaming:
74 source = io.BytesIO()
75 writer = cctx.stream_writer(source)
76 writer.write(original)
77 writer.flush(zstd.FLUSH_FRAME)
78 source.seek(0)
79 else:
80 frame = cctx.compress(original)
81 source = io.BytesIO(frame)
82
83 dctx = zstd.ZstdDecompressor()
84
85 chunks = []
86 reader = dctx.stream_reader(source, read_size=source_read_size)
87 while True:
88 chunk = reader.read(read_size)
89 if not chunk and read_size:
90 break
91
92 chunks.append(chunk)
93
94 self.assertEqual(b''.join(chunks), original)
95
96 @hypothesis.settings(
97 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
98 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
99 level=strategies.integers(min_value=1, max_value=5),
100 streaming=strategies.booleans(),
101 source_read_size=strategies.integers(1, 1048576),
102 read_sizes=strategies.data())
103 def test_buffer_source_read_variance(self, original, level, streaming,
104 source_read_size, read_sizes):
105 cctx = zstd.ZstdCompressor(level=level)
106
107 if streaming:
108 source = io.BytesIO()
109 writer = cctx.stream_writer(source)
110 writer.write(original)
111 writer.flush(zstd.FLUSH_FRAME)
112 frame = source.getvalue()
113 else:
114 frame = cctx.compress(original)
115
116 dctx = zstd.ZstdDecompressor()
117 chunks = []
118
119 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
120 while True:
121 read_size = read_sizes.draw(strategies.integers(-1, 131072))
40 chunk = reader.read(read_size)
122 chunk = reader.read(read_size)
41 if not chunk:
123 if not chunk and read_size:
124 break
125
126 chunks.append(chunk)
127
128 self.assertEqual(b''.join(chunks), original)
129
130 # Similar to above except we have a constant read() size.
131 @hypothesis.settings(
132 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
133 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
134 level=strategies.integers(min_value=1, max_value=5),
135 streaming=strategies.booleans(),
136 source_read_size=strategies.integers(1, 1048576),
137 read_size=strategies.integers(-1, 131072))
138 def test_buffer_source_constant_read_size(self, original, level, streaming,
139 source_read_size, read_size):
140 if read_size == 0:
141 read_size = -1
142
143 cctx = zstd.ZstdCompressor(level=level)
144
145 if streaming:
146 source = io.BytesIO()
147 writer = cctx.stream_writer(source)
148 writer.write(original)
149 writer.flush(zstd.FLUSH_FRAME)
150 frame = source.getvalue()
151 else:
152 frame = cctx.compress(original)
153
154 dctx = zstd.ZstdDecompressor()
155 chunks = []
156
157 reader = dctx.stream_reader(frame, read_size=source_read_size)
158 while True:
159 chunk = reader.read(read_size)
160 if not chunk and read_size:
161 break
162
163 chunks.append(chunk)
164
165 self.assertEqual(b''.join(chunks), original)
166
167 @hypothesis.settings(
168 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
169 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
170 level=strategies.integers(min_value=1, max_value=5),
171 streaming=strategies.booleans(),
172 source_read_size=strategies.integers(1, 1048576))
173 def test_stream_source_readall(self, original, level, streaming,
174 source_read_size):
175 cctx = zstd.ZstdCompressor(level=level)
176
177 if streaming:
178 source = io.BytesIO()
179 writer = cctx.stream_writer(source)
180 writer.write(original)
181 writer.flush(zstd.FLUSH_FRAME)
182 source.seek(0)
183 else:
184 frame = cctx.compress(original)
185 source = io.BytesIO(frame)
186
187 dctx = zstd.ZstdDecompressor()
188
189 data = dctx.stream_reader(source, read_size=source_read_size).readall()
190 self.assertEqual(data, original)
191
192 @hypothesis.settings(
193 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
194 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
195 level=strategies.integers(min_value=1, max_value=5),
196 streaming=strategies.booleans(),
197 source_read_size=strategies.integers(1, 1048576),
198 read_sizes=strategies.data())
199 def test_stream_source_read1_variance(self, original, level, streaming,
200 source_read_size, read_sizes):
201 cctx = zstd.ZstdCompressor(level=level)
202
203 if streaming:
204 source = io.BytesIO()
205 writer = cctx.stream_writer(source)
206 writer.write(original)
207 writer.flush(zstd.FLUSH_FRAME)
208 source.seek(0)
209 else:
210 frame = cctx.compress(original)
211 source = io.BytesIO(frame)
212
213 dctx = zstd.ZstdDecompressor()
214
215 chunks = []
216 with dctx.stream_reader(source, read_size=source_read_size) as reader:
217 while True:
218 read_size = read_sizes.draw(strategies.integers(-1, 131072))
219 chunk = reader.read1(read_size)
220 if not chunk and read_size:
42 break
221 break
43
222
44 chunks.append(chunk)
223 chunks.append(chunk)
@@ -49,24 +228,36 b' class TestDecompressor_stream_reader_fuz'
49 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
228 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
50 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
229 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
51 level=strategies.integers(min_value=1, max_value=5),
230 level=strategies.integers(min_value=1, max_value=5),
52 source_read_size=strategies.integers(1, 16384),
231 streaming=strategies.booleans(),
232 source_read_size=strategies.integers(1, 1048576),
53 read_sizes=strategies.data())
233 read_sizes=strategies.data())
54 def test_buffer_source_read_variance(self, original, level, source_read_size,
234 def test_stream_source_readinto1_variance(self, original, level, streaming,
55 read_sizes):
235 source_read_size, read_sizes):
56 cctx = zstd.ZstdCompressor(level=level)
236 cctx = zstd.ZstdCompressor(level=level)
57 frame = cctx.compress(original)
237
238 if streaming:
239 source = io.BytesIO()
240 writer = cctx.stream_writer(source)
241 writer.write(original)
242 writer.flush(zstd.FLUSH_FRAME)
243 source.seek(0)
244 else:
245 frame = cctx.compress(original)
246 source = io.BytesIO(frame)
58
247
59 dctx = zstd.ZstdDecompressor()
248 dctx = zstd.ZstdDecompressor()
249
60 chunks = []
250 chunks = []
61
251 with dctx.stream_reader(source, read_size=source_read_size) as reader:
62 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
63 while True:
252 while True:
64 read_size = read_sizes.draw(strategies.integers(1, 16384))
253 read_size = read_sizes.draw(strategies.integers(1, 131072))
65 chunk = reader.read(read_size)
254 b = bytearray(read_size)
66 if not chunk:
255 count = reader.readinto1(b)
256
257 if not count:
67 break
258 break
68
259
69 chunks.append(chunk)
260 chunks.append(bytes(b[0:count]))
70
261
71 self.assertEqual(b''.join(chunks), original)
262 self.assertEqual(b''.join(chunks), original)
72
263
@@ -75,7 +266,7 b' class TestDecompressor_stream_reader_fuz'
75 @hypothesis.given(
266 @hypothesis.given(
76 original=strategies.sampled_from(random_input_data()),
267 original=strategies.sampled_from(random_input_data()),
77 level=strategies.integers(min_value=1, max_value=5),
268 level=strategies.integers(min_value=1, max_value=5),
78 source_read_size=strategies.integers(1, 16384),
269 source_read_size=strategies.integers(1, 1048576),
79 seek_amounts=strategies.data(),
270 seek_amounts=strategies.data(),
80 read_sizes=strategies.data())
271 read_sizes=strategies.data())
81 def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
272 def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
@@ -99,6 +290,46 b' class TestDecompressor_stream_reader_fuz'
99
290
100 self.assertEqual(original[offset:offset + len(chunk)], chunk)
291 self.assertEqual(original[offset:offset + len(chunk)], chunk)
101
292
293 @hypothesis.settings(
294 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
295 @hypothesis.given(
296 originals=strategies.data(),
297 frame_count=strategies.integers(min_value=2, max_value=10),
298 level=strategies.integers(min_value=1, max_value=5),
299 source_read_size=strategies.integers(1, 1048576),
300 read_sizes=strategies.data())
301 def test_multiple_frames(self, originals, frame_count, level,
302 source_read_size, read_sizes):
303
304 cctx = zstd.ZstdCompressor(level=level)
305 source = io.BytesIO()
306 buffer = io.BytesIO()
307 writer = cctx.stream_writer(buffer)
308
309 for i in range(frame_count):
310 data = originals.draw(strategies.sampled_from(random_input_data()))
311 source.write(data)
312 writer.write(data)
313 writer.flush(zstd.FLUSH_FRAME)
314
315 dctx = zstd.ZstdDecompressor()
316 buffer.seek(0)
317 reader = dctx.stream_reader(buffer, read_size=source_read_size,
318 read_across_frames=True)
319
320 chunks = []
321
322 while True:
323 read_amount = read_sizes.draw(strategies.integers(-1, 16384))
324 chunk = reader.read(read_amount)
325
326 if not chunk and read_amount:
327 break
328
329 chunks.append(chunk)
330
331 self.assertEqual(source.getvalue(), b''.join(chunks))
332
102
333
103 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
334 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
104 @make_cffi
335 @make_cffi
@@ -113,7 +344,7 b' class TestDecompressor_stream_writer_fuz'
113
344
114 dctx = zstd.ZstdDecompressor()
345 dctx = zstd.ZstdDecompressor()
115 source = io.BytesIO(frame)
346 source = io.BytesIO(frame)
116 dest = io.BytesIO()
347 dest = NonClosingBytesIO()
117
348
118 with dctx.stream_writer(dest, write_size=write_size) as decompressor:
349 with dctx.stream_writer(dest, write_size=write_size) as decompressor:
119 while True:
350 while True:
@@ -234,10 +465,12 b' class TestDecompressor_multi_decompress_'
234 write_checksum=True,
465 write_checksum=True,
235 **kwargs)
466 **kwargs)
236
467
468 if not hasattr(cctx, 'multi_compress_to_buffer'):
469 self.skipTest('multi_compress_to_buffer not available')
470
237 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
471 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
238
472
239 dctx = zstd.ZstdDecompressor(**kwargs)
473 dctx = zstd.ZstdDecompressor(**kwargs)
240
241 result = dctx.multi_decompress_to_buffer(frames_buffer)
474 result = dctx.multi_decompress_to_buffer(frames_buffer)
242
475
243 self.assertEqual(len(result), len(original))
476 self.assertEqual(len(result), len(original))
@@ -12,9 +12,9 b' from . common import ('
12 @make_cffi
12 @make_cffi
13 class TestModuleAttributes(unittest.TestCase):
13 class TestModuleAttributes(unittest.TestCase):
14 def test_version(self):
14 def test_version(self):
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 6))
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 8))
16
16
17 self.assertEqual(zstd.__version__, '0.10.1')
17 self.assertEqual(zstd.__version__, '0.11.0')
18
18
19 def test_constants(self):
19 def test_constants(self):
20 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
20 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
@@ -29,6 +29,8 b' class TestModuleAttributes(unittest.Test'
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
31 'MAGIC_NUMBER',
31 'MAGIC_NUMBER',
32 'FLUSH_BLOCK',
33 'FLUSH_FRAME',
32 'BLOCKSIZELOG_MAX',
34 'BLOCKSIZELOG_MAX',
33 'BLOCKSIZE_MAX',
35 'BLOCKSIZE_MAX',
34 'WINDOWLOG_MIN',
36 'WINDOWLOG_MIN',
@@ -38,6 +40,8 b' class TestModuleAttributes(unittest.Test'
38 'HASHLOG_MIN',
40 'HASHLOG_MIN',
39 'HASHLOG_MAX',
41 'HASHLOG_MAX',
40 'HASHLOG3_MAX',
42 'HASHLOG3_MAX',
43 'MINMATCH_MIN',
44 'MINMATCH_MAX',
41 'SEARCHLOG_MIN',
45 'SEARCHLOG_MIN',
42 'SEARCHLOG_MAX',
46 'SEARCHLOG_MAX',
43 'SEARCHLENGTH_MIN',
47 'SEARCHLENGTH_MIN',
@@ -55,6 +59,7 b' class TestModuleAttributes(unittest.Test'
55 'STRATEGY_BTLAZY2',
59 'STRATEGY_BTLAZY2',
56 'STRATEGY_BTOPT',
60 'STRATEGY_BTOPT',
57 'STRATEGY_BTULTRA',
61 'STRATEGY_BTULTRA',
62 'STRATEGY_BTULTRA2',
58 'DICT_TYPE_AUTO',
63 'DICT_TYPE_AUTO',
59 'DICT_TYPE_RAWCONTENT',
64 'DICT_TYPE_RAWCONTENT',
60 'DICT_TYPE_FULLDICT',
65 'DICT_TYPE_FULLDICT',
@@ -35,31 +35,31 b" if _module_policy == 'default':"
35 from zstd import *
35 from zstd import *
36 backend = 'cext'
36 backend = 'cext'
37 elif platform.python_implementation() in ('PyPy',):
37 elif platform.python_implementation() in ('PyPy',):
38 from zstd_cffi import *
38 from .cffi import *
39 backend = 'cffi'
39 backend = 'cffi'
40 else:
40 else:
41 try:
41 try:
42 from zstd import *
42 from zstd import *
43 backend = 'cext'
43 backend = 'cext'
44 except ImportError:
44 except ImportError:
45 from zstd_cffi import *
45 from .cffi import *
46 backend = 'cffi'
46 backend = 'cffi'
47 elif _module_policy == 'cffi_fallback':
47 elif _module_policy == 'cffi_fallback':
48 try:
48 try:
49 from zstd import *
49 from zstd import *
50 backend = 'cext'
50 backend = 'cext'
51 except ImportError:
51 except ImportError:
52 from zstd_cffi import *
52 from .cffi import *
53 backend = 'cffi'
53 backend = 'cffi'
54 elif _module_policy == 'cext':
54 elif _module_policy == 'cext':
55 from zstd import *
55 from zstd import *
56 backend = 'cext'
56 backend = 'cext'
57 elif _module_policy == 'cffi':
57 elif _module_policy == 'cffi':
58 from zstd_cffi import *
58 from .cffi import *
59 backend = 'cffi'
59 backend = 'cffi'
60 else:
60 else:
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
62 'cext, or cffi' % _module_policy)
62 'cext, or cffi' % _module_policy)
63
63
64 # Keep this in sync with python-zstandard.h.
64 # Keep this in sync with python-zstandard.h.
65 __version__ = '0.10.1'
65 __version__ = '0.11.0'
This diff has been collapsed as it changes many lines, (1203 lines changed) Show them Hide them
@@ -28,6 +28,8 b' from __future__ import absolute_import, '
28 'train_dictionary',
28 'train_dictionary',
29
29
30 # Constants.
30 # Constants.
31 'FLUSH_BLOCK',
32 'FLUSH_FRAME',
31 'COMPRESSOBJ_FLUSH_FINISH',
33 'COMPRESSOBJ_FLUSH_FINISH',
32 'COMPRESSOBJ_FLUSH_BLOCK',
34 'COMPRESSOBJ_FLUSH_BLOCK',
33 'ZSTD_VERSION',
35 'ZSTD_VERSION',
@@ -49,6 +51,8 b' from __future__ import absolute_import, '
49 'HASHLOG_MIN',
51 'HASHLOG_MIN',
50 'HASHLOG_MAX',
52 'HASHLOG_MAX',
51 'HASHLOG3_MAX',
53 'HASHLOG3_MAX',
54 'MINMATCH_MIN',
55 'MINMATCH_MAX',
52 'SEARCHLOG_MIN',
56 'SEARCHLOG_MIN',
53 'SEARCHLOG_MAX',
57 'SEARCHLOG_MAX',
54 'SEARCHLENGTH_MIN',
58 'SEARCHLENGTH_MIN',
@@ -66,6 +70,7 b' from __future__ import absolute_import, '
66 'STRATEGY_BTLAZY2',
70 'STRATEGY_BTLAZY2',
67 'STRATEGY_BTOPT',
71 'STRATEGY_BTOPT',
68 'STRATEGY_BTULTRA',
72 'STRATEGY_BTULTRA',
73 'STRATEGY_BTULTRA2',
69 'DICT_TYPE_AUTO',
74 'DICT_TYPE_AUTO',
70 'DICT_TYPE_RAWCONTENT',
75 'DICT_TYPE_RAWCONTENT',
71 'DICT_TYPE_FULLDICT',
76 'DICT_TYPE_FULLDICT',
@@ -114,10 +119,12 b' CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX'
114 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
119 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
115 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
120 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
116 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
121 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
122 MINMATCH_MIN = lib.ZSTD_MINMATCH_MIN
123 MINMATCH_MAX = lib.ZSTD_MINMATCH_MAX
117 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
124 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
118 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
125 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
119 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
126 SEARCHLENGTH_MIN = lib.ZSTD_MINMATCH_MIN
120 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
127 SEARCHLENGTH_MAX = lib.ZSTD_MINMATCH_MAX
121 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
128 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
122 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
129 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
123 LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
130 LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
@@ -132,6 +139,7 b' STRATEGY_LAZY2 = lib.ZSTD_lazy2'
132 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
139 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
133 STRATEGY_BTOPT = lib.ZSTD_btopt
140 STRATEGY_BTOPT = lib.ZSTD_btopt
134 STRATEGY_BTULTRA = lib.ZSTD_btultra
141 STRATEGY_BTULTRA = lib.ZSTD_btultra
142 STRATEGY_BTULTRA2 = lib.ZSTD_btultra2
135
143
136 DICT_TYPE_AUTO = lib.ZSTD_dct_auto
144 DICT_TYPE_AUTO = lib.ZSTD_dct_auto
137 DICT_TYPE_RAWCONTENT = lib.ZSTD_dct_rawContent
145 DICT_TYPE_RAWCONTENT = lib.ZSTD_dct_rawContent
@@ -140,6 +148,9 b' DICT_TYPE_FULLDICT = lib.ZSTD_dct_fullDi'
140 FORMAT_ZSTD1 = lib.ZSTD_f_zstd1
148 FORMAT_ZSTD1 = lib.ZSTD_f_zstd1
141 FORMAT_ZSTD1_MAGICLESS = lib.ZSTD_f_zstd1_magicless
149 FORMAT_ZSTD1_MAGICLESS = lib.ZSTD_f_zstd1_magicless
142
150
151 FLUSH_BLOCK = 0
152 FLUSH_FRAME = 1
153
143 COMPRESSOBJ_FLUSH_FINISH = 0
154 COMPRESSOBJ_FLUSH_FINISH = 0
144 COMPRESSOBJ_FLUSH_BLOCK = 1
155 COMPRESSOBJ_FLUSH_BLOCK = 1
145
156
@@ -182,27 +193,27 b' def _make_cctx_params(params):'
182 res = ffi.gc(res, lib.ZSTD_freeCCtxParams)
193 res = ffi.gc(res, lib.ZSTD_freeCCtxParams)
183
194
184 attrs = [
195 attrs = [
185 (lib.ZSTD_p_format, params.format),
196 (lib.ZSTD_c_format, params.format),
186 (lib.ZSTD_p_compressionLevel, params.compression_level),
197 (lib.ZSTD_c_compressionLevel, params.compression_level),
187 (lib.ZSTD_p_windowLog, params.window_log),
198 (lib.ZSTD_c_windowLog, params.window_log),
188 (lib.ZSTD_p_hashLog, params.hash_log),
199 (lib.ZSTD_c_hashLog, params.hash_log),
189 (lib.ZSTD_p_chainLog, params.chain_log),
200 (lib.ZSTD_c_chainLog, params.chain_log),
190 (lib.ZSTD_p_searchLog, params.search_log),
201 (lib.ZSTD_c_searchLog, params.search_log),
191 (lib.ZSTD_p_minMatch, params.min_match),
202 (lib.ZSTD_c_minMatch, params.min_match),
192 (lib.ZSTD_p_targetLength, params.target_length),
203 (lib.ZSTD_c_targetLength, params.target_length),
193 (lib.ZSTD_p_compressionStrategy, params.compression_strategy),
204 (lib.ZSTD_c_strategy, params.compression_strategy),
194 (lib.ZSTD_p_contentSizeFlag, params.write_content_size),
205 (lib.ZSTD_c_contentSizeFlag, params.write_content_size),
195 (lib.ZSTD_p_checksumFlag, params.write_checksum),
206 (lib.ZSTD_c_checksumFlag, params.write_checksum),
196 (lib.ZSTD_p_dictIDFlag, params.write_dict_id),
207 (lib.ZSTD_c_dictIDFlag, params.write_dict_id),
197 (lib.ZSTD_p_nbWorkers, params.threads),
208 (lib.ZSTD_c_nbWorkers, params.threads),
198 (lib.ZSTD_p_jobSize, params.job_size),
209 (lib.ZSTD_c_jobSize, params.job_size),
199 (lib.ZSTD_p_overlapSizeLog, params.overlap_size_log),
210 (lib.ZSTD_c_overlapLog, params.overlap_log),
200 (lib.ZSTD_p_forceMaxWindow, params.force_max_window),
211 (lib.ZSTD_c_forceMaxWindow, params.force_max_window),
201 (lib.ZSTD_p_enableLongDistanceMatching, params.enable_ldm),
212 (lib.ZSTD_c_enableLongDistanceMatching, params.enable_ldm),
202 (lib.ZSTD_p_ldmHashLog, params.ldm_hash_log),
213 (lib.ZSTD_c_ldmHashLog, params.ldm_hash_log),
203 (lib.ZSTD_p_ldmMinMatch, params.ldm_min_match),
214 (lib.ZSTD_c_ldmMinMatch, params.ldm_min_match),
204 (lib.ZSTD_p_ldmBucketSizeLog, params.ldm_bucket_size_log),
215 (lib.ZSTD_c_ldmBucketSizeLog, params.ldm_bucket_size_log),
205 (lib.ZSTD_p_ldmHashEveryLog, params.ldm_hash_every_log),
216 (lib.ZSTD_c_ldmHashRateLog, params.ldm_hash_rate_log),
206 ]
217 ]
207
218
208 for param, value in attrs:
219 for param, value in attrs:
@@ -220,7 +231,7 b' class ZstdCompressionParameters(object):'
220 'chain_log': 'chainLog',
231 'chain_log': 'chainLog',
221 'hash_log': 'hashLog',
232 'hash_log': 'hashLog',
222 'search_log': 'searchLog',
233 'search_log': 'searchLog',
223 'min_match': 'searchLength',
234 'min_match': 'minMatch',
224 'target_length': 'targetLength',
235 'target_length': 'targetLength',
225 'compression_strategy': 'strategy',
236 'compression_strategy': 'strategy',
226 }
237 }
@@ -233,41 +244,170 b' class ZstdCompressionParameters(object):'
233
244
234 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
245 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
235 chain_log=0, search_log=0, min_match=0, target_length=0,
246 chain_log=0, search_log=0, min_match=0, target_length=0,
236 compression_strategy=0, write_content_size=1, write_checksum=0,
247 strategy=-1, compression_strategy=-1,
237 write_dict_id=0, job_size=0, overlap_size_log=0,
248 write_content_size=1, write_checksum=0,
238 force_max_window=0, enable_ldm=0, ldm_hash_log=0,
249 write_dict_id=0, job_size=0, overlap_log=-1,
239 ldm_min_match=0, ldm_bucket_size_log=0, ldm_hash_every_log=0,
250 overlap_size_log=-1, force_max_window=0, enable_ldm=0,
240 threads=0):
251 ldm_hash_log=0, ldm_min_match=0, ldm_bucket_size_log=0,
252 ldm_hash_rate_log=-1, ldm_hash_every_log=-1, threads=0):
253
254 params = lib.ZSTD_createCCtxParams()
255 if params == ffi.NULL:
256 raise MemoryError()
257
258 params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
259
260 self._params = params
241
261
242 if threads < 0:
262 if threads < 0:
243 threads = _cpu_count()
263 threads = _cpu_count()
244
264
245 self.format = format
265 # We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
246 self.compression_level = compression_level
266 # because setting ZSTD_c_nbWorkers resets the other parameters.
247 self.window_log = window_log
267 _set_compression_parameter(params, lib.ZSTD_c_nbWorkers, threads)
248 self.hash_log = hash_log
268
249 self.chain_log = chain_log
269 _set_compression_parameter(params, lib.ZSTD_c_format, format)
250 self.search_log = search_log
270 _set_compression_parameter(params, lib.ZSTD_c_compressionLevel, compression_level)
251 self.min_match = min_match
271 _set_compression_parameter(params, lib.ZSTD_c_windowLog, window_log)
252 self.target_length = target_length
272 _set_compression_parameter(params, lib.ZSTD_c_hashLog, hash_log)
253 self.compression_strategy = compression_strategy
273 _set_compression_parameter(params, lib.ZSTD_c_chainLog, chain_log)
254 self.write_content_size = write_content_size
274 _set_compression_parameter(params, lib.ZSTD_c_searchLog, search_log)
255 self.write_checksum = write_checksum
275 _set_compression_parameter(params, lib.ZSTD_c_minMatch, min_match)
256 self.write_dict_id = write_dict_id
276 _set_compression_parameter(params, lib.ZSTD_c_targetLength, target_length)
257 self.job_size = job_size
277
258 self.overlap_size_log = overlap_size_log
278 if strategy != -1 and compression_strategy != -1:
259 self.force_max_window = force_max_window
279 raise ValueError('cannot specify both compression_strategy and strategy')
260 self.enable_ldm = enable_ldm
280
261 self.ldm_hash_log = ldm_hash_log
281 if compression_strategy != -1:
262 self.ldm_min_match = ldm_min_match
282 strategy = compression_strategy
263 self.ldm_bucket_size_log = ldm_bucket_size_log
283 elif strategy == -1:
264 self.ldm_hash_every_log = ldm_hash_every_log
284 strategy = 0
265 self.threads = threads
285
266
286 _set_compression_parameter(params, lib.ZSTD_c_strategy, strategy)
267 self.params = _make_cctx_params(self)
287 _set_compression_parameter(params, lib.ZSTD_c_contentSizeFlag, write_content_size)
288 _set_compression_parameter(params, lib.ZSTD_c_checksumFlag, write_checksum)
289 _set_compression_parameter(params, lib.ZSTD_c_dictIDFlag, write_dict_id)
290 _set_compression_parameter(params, lib.ZSTD_c_jobSize, job_size)
291
292 if overlap_log != -1 and overlap_size_log != -1:
293 raise ValueError('cannot specify both overlap_log and overlap_size_log')
294
295 if overlap_size_log != -1:
296 overlap_log = overlap_size_log
297 elif overlap_log == -1:
298 overlap_log = 0
299
300 _set_compression_parameter(params, lib.ZSTD_c_overlapLog, overlap_log)
301 _set_compression_parameter(params, lib.ZSTD_c_forceMaxWindow, force_max_window)
302 _set_compression_parameter(params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm)
303 _set_compression_parameter(params, lib.ZSTD_c_ldmHashLog, ldm_hash_log)
304 _set_compression_parameter(params, lib.ZSTD_c_ldmMinMatch, ldm_min_match)
305 _set_compression_parameter(params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log)
306
307 if ldm_hash_rate_log != -1 and ldm_hash_every_log != -1:
308 raise ValueError('cannot specify both ldm_hash_rate_log and ldm_hash_every_log')
309
310 if ldm_hash_every_log != -1:
311 ldm_hash_rate_log = ldm_hash_every_log
312 elif ldm_hash_rate_log == -1:
313 ldm_hash_rate_log = 0
314
315 _set_compression_parameter(params, lib.ZSTD_c_ldmHashRateLog, ldm_hash_rate_log)
316
317 @property
318 def format(self):
319 return _get_compression_parameter(self._params, lib.ZSTD_c_format)
320
321 @property
322 def compression_level(self):
323 return _get_compression_parameter(self._params, lib.ZSTD_c_compressionLevel)
324
325 @property
326 def window_log(self):
327 return _get_compression_parameter(self._params, lib.ZSTD_c_windowLog)
328
329 @property
330 def hash_log(self):
331 return _get_compression_parameter(self._params, lib.ZSTD_c_hashLog)
332
333 @property
334 def chain_log(self):
335 return _get_compression_parameter(self._params, lib.ZSTD_c_chainLog)
336
337 @property
338 def search_log(self):
339 return _get_compression_parameter(self._params, lib.ZSTD_c_searchLog)
340
341 @property
342 def min_match(self):
343 return _get_compression_parameter(self._params, lib.ZSTD_c_minMatch)
344
345 @property
346 def target_length(self):
347 return _get_compression_parameter(self._params, lib.ZSTD_c_targetLength)
348
349 @property
350 def compression_strategy(self):
351 return _get_compression_parameter(self._params, lib.ZSTD_c_strategy)
352
353 @property
354 def write_content_size(self):
355 return _get_compression_parameter(self._params, lib.ZSTD_c_contentSizeFlag)
356
357 @property
358 def write_checksum(self):
359 return _get_compression_parameter(self._params, lib.ZSTD_c_checksumFlag)
360
361 @property
362 def write_dict_id(self):
363 return _get_compression_parameter(self._params, lib.ZSTD_c_dictIDFlag)
364
365 @property
366 def job_size(self):
367 return _get_compression_parameter(self._params, lib.ZSTD_c_jobSize)
368
369 @property
370 def overlap_log(self):
371 return _get_compression_parameter(self._params, lib.ZSTD_c_overlapLog)
372
373 @property
374 def overlap_size_log(self):
375 return self.overlap_log
376
377 @property
378 def force_max_window(self):
379 return _get_compression_parameter(self._params, lib.ZSTD_c_forceMaxWindow)
380
381 @property
382 def enable_ldm(self):
383 return _get_compression_parameter(self._params, lib.ZSTD_c_enableLongDistanceMatching)
384
385 @property
386 def ldm_hash_log(self):
387 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmHashLog)
388
389 @property
390 def ldm_min_match(self):
391 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmMinMatch)
392
393 @property
394 def ldm_bucket_size_log(self):
395 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmBucketSizeLog)
396
397 @property
398 def ldm_hash_rate_log(self):
399 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmHashRateLog)
400
401 @property
402 def ldm_hash_every_log(self):
403 return self.ldm_hash_rate_log
404
405 @property
406 def threads(self):
407 return _get_compression_parameter(self._params, lib.ZSTD_c_nbWorkers)
268
408
269 def estimated_compression_context_size(self):
409 def estimated_compression_context_size(self):
270 return lib.ZSTD_estimateCCtxSize_usingCCtxParams(self.params)
410 return lib.ZSTD_estimateCCtxSize_usingCCtxParams(self._params)
271
411
272 CompressionParameters = ZstdCompressionParameters
412 CompressionParameters = ZstdCompressionParameters
273
413
@@ -276,31 +416,53 b' def estimate_decompression_context_size('
276
416
277
417
278 def _set_compression_parameter(params, param, value):
418 def _set_compression_parameter(params, param, value):
279 zresult = lib.ZSTD_CCtxParam_setParameter(params, param,
419 zresult = lib.ZSTD_CCtxParam_setParameter(params, param, value)
280 ffi.cast('unsigned', value))
281 if lib.ZSTD_isError(zresult):
420 if lib.ZSTD_isError(zresult):
282 raise ZstdError('unable to set compression context parameter: %s' %
421 raise ZstdError('unable to set compression context parameter: %s' %
283 _zstd_error(zresult))
422 _zstd_error(zresult))
284
423
424
425 def _get_compression_parameter(params, param):
426 result = ffi.new('int *')
427
428 zresult = lib.ZSTD_CCtxParam_getParameter(params, param, result)
429 if lib.ZSTD_isError(zresult):
430 raise ZstdError('unable to get compression context parameter: %s' %
431 _zstd_error(zresult))
432
433 return result[0]
434
435
285 class ZstdCompressionWriter(object):
436 class ZstdCompressionWriter(object):
286 def __init__(self, compressor, writer, source_size, write_size):
437 def __init__(self, compressor, writer, source_size, write_size,
438 write_return_read):
287 self._compressor = compressor
439 self._compressor = compressor
288 self._writer = writer
440 self._writer = writer
289 self._source_size = source_size
290 self._write_size = write_size
441 self._write_size = write_size
442 self._write_return_read = bool(write_return_read)
291 self._entered = False
443 self._entered = False
444 self._closed = False
292 self._bytes_compressed = 0
445 self._bytes_compressed = 0
293
446
294 def __enter__(self):
447 self._dst_buffer = ffi.new('char[]', write_size)
295 if self._entered:
448 self._out_buffer = ffi.new('ZSTD_outBuffer *')
296 raise ZstdError('cannot __enter__ multiple times')
449 self._out_buffer.dst = self._dst_buffer
297
450 self._out_buffer.size = len(self._dst_buffer)
298 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._compressor._cctx,
451 self._out_buffer.pos = 0
299 self._source_size)
452
453 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx,
454 source_size)
300 if lib.ZSTD_isError(zresult):
455 if lib.ZSTD_isError(zresult):
301 raise ZstdError('error setting source size: %s' %
456 raise ZstdError('error setting source size: %s' %
302 _zstd_error(zresult))
457 _zstd_error(zresult))
303
458
459 def __enter__(self):
460 if self._closed:
461 raise ValueError('stream is closed')
462
463 if self._entered:
464 raise ZstdError('cannot __enter__ multiple times')
465
304 self._entered = True
466 self._entered = True
305 return self
467 return self
306
468
@@ -308,50 +470,79 b' class ZstdCompressionWriter(object):'
308 self._entered = False
470 self._entered = False
309
471
310 if not exc_type and not exc_value and not exc_tb:
472 if not exc_type and not exc_value and not exc_tb:
311 dst_buffer = ffi.new('char[]', self._write_size)
473 self.close()
312
313 out_buffer = ffi.new('ZSTD_outBuffer *')
314 in_buffer = ffi.new('ZSTD_inBuffer *')
315
316 out_buffer.dst = dst_buffer
317 out_buffer.size = len(dst_buffer)
318 out_buffer.pos = 0
319
320 in_buffer.src = ffi.NULL
321 in_buffer.size = 0
322 in_buffer.pos = 0
323
324 while True:
325 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
326 out_buffer, in_buffer,
327 lib.ZSTD_e_end)
328
329 if lib.ZSTD_isError(zresult):
330 raise ZstdError('error ending compression stream: %s' %
331 _zstd_error(zresult))
332
333 if out_buffer.pos:
334 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
335 out_buffer.pos = 0
336
337 if zresult == 0:
338 break
339
474
340 self._compressor = None
475 self._compressor = None
341
476
342 return False
477 return False
343
478
344 def memory_size(self):
479 def memory_size(self):
345 if not self._entered:
346 raise ZstdError('cannot determine size of an inactive compressor; '
347 'call when a context manager is active')
348
349 return lib.ZSTD_sizeof_CCtx(self._compressor._cctx)
480 return lib.ZSTD_sizeof_CCtx(self._compressor._cctx)
350
481
482 def fileno(self):
483 f = getattr(self._writer, 'fileno', None)
484 if f:
485 return f()
486 else:
487 raise OSError('fileno not available on underlying writer')
488
489 def close(self):
490 if self._closed:
491 return
492
493 try:
494 self.flush(FLUSH_FRAME)
495 finally:
496 self._closed = True
497
498 # Call close() on underlying stream as well.
499 f = getattr(self._writer, 'close', None)
500 if f:
501 f()
502
503 @property
504 def closed(self):
505 return self._closed
506
507 def isatty(self):
508 return False
509
510 def readable(self):
511 return False
512
513 def readline(self, size=-1):
514 raise io.UnsupportedOperation()
515
516 def readlines(self, hint=-1):
517 raise io.UnsupportedOperation()
518
519 def seek(self, offset, whence=None):
520 raise io.UnsupportedOperation()
521
522 def seekable(self):
523 return False
524
525 def truncate(self, size=None):
526 raise io.UnsupportedOperation()
527
528 def writable(self):
529 return True
530
531 def writelines(self, lines):
532 raise NotImplementedError('writelines() is not yet implemented')
533
534 def read(self, size=-1):
535 raise io.UnsupportedOperation()
536
537 def readall(self):
538 raise io.UnsupportedOperation()
539
540 def readinto(self, b):
541 raise io.UnsupportedOperation()
542
351 def write(self, data):
543 def write(self, data):
352 if not self._entered:
544 if self._closed:
353 raise ZstdError('write() must be called from an active context '
545 raise ValueError('stream is closed')
354 'manager')
355
546
356 total_write = 0
547 total_write = 0
357
548
@@ -362,16 +553,13 b' class ZstdCompressionWriter(object):'
362 in_buffer.size = len(data_buffer)
553 in_buffer.size = len(data_buffer)
363 in_buffer.pos = 0
554 in_buffer.pos = 0
364
555
365 out_buffer = ffi.new('ZSTD_outBuffer *')
556 out_buffer = self._out_buffer
366 dst_buffer = ffi.new('char[]', self._write_size)
367 out_buffer.dst = dst_buffer
368 out_buffer.size = self._write_size
369 out_buffer.pos = 0
557 out_buffer.pos = 0
370
558
371 while in_buffer.pos < in_buffer.size:
559 while in_buffer.pos < in_buffer.size:
372 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
560 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
373 out_buffer, in_buffer,
561 out_buffer, in_buffer,
374 lib.ZSTD_e_continue)
562 lib.ZSTD_e_continue)
375 if lib.ZSTD_isError(zresult):
563 if lib.ZSTD_isError(zresult):
376 raise ZstdError('zstd compress error: %s' %
564 raise ZstdError('zstd compress error: %s' %
377 _zstd_error(zresult))
565 _zstd_error(zresult))
@@ -382,18 +570,25 b' class ZstdCompressionWriter(object):'
382 self._bytes_compressed += out_buffer.pos
570 self._bytes_compressed += out_buffer.pos
383 out_buffer.pos = 0
571 out_buffer.pos = 0
384
572
385 return total_write
573 if self._write_return_read:
386
574 return in_buffer.pos
387 def flush(self):
575 else:
388 if not self._entered:
576 return total_write
389 raise ZstdError('flush must be called from an active context manager')
577
578 def flush(self, flush_mode=FLUSH_BLOCK):
579 if flush_mode == FLUSH_BLOCK:
580 flush = lib.ZSTD_e_flush
581 elif flush_mode == FLUSH_FRAME:
582 flush = lib.ZSTD_e_end
583 else:
584 raise ValueError('unknown flush_mode: %r' % flush_mode)
585
586 if self._closed:
587 raise ValueError('stream is closed')
390
588
391 total_write = 0
589 total_write = 0
392
590
393 out_buffer = ffi.new('ZSTD_outBuffer *')
591 out_buffer = self._out_buffer
394 dst_buffer = ffi.new('char[]', self._write_size)
395 out_buffer.dst = dst_buffer
396 out_buffer.size = self._write_size
397 out_buffer.pos = 0
592 out_buffer.pos = 0
398
593
399 in_buffer = ffi.new('ZSTD_inBuffer *')
594 in_buffer = ffi.new('ZSTD_inBuffer *')
@@ -402,9 +597,9 b' class ZstdCompressionWriter(object):'
402 in_buffer.pos = 0
597 in_buffer.pos = 0
403
598
404 while True:
599 while True:
405 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
600 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
406 out_buffer, in_buffer,
601 out_buffer, in_buffer,
407 lib.ZSTD_e_flush)
602 flush)
408 if lib.ZSTD_isError(zresult):
603 if lib.ZSTD_isError(zresult):
409 raise ZstdError('zstd compress error: %s' %
604 raise ZstdError('zstd compress error: %s' %
410 _zstd_error(zresult))
605 _zstd_error(zresult))
@@ -438,10 +633,10 b' class ZstdCompressionObj(object):'
438 chunks = []
633 chunks = []
439
634
440 while source.pos < len(data):
635 while source.pos < len(data):
441 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
636 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
442 self._out,
637 self._out,
443 source,
638 source,
444 lib.ZSTD_e_continue)
639 lib.ZSTD_e_continue)
445 if lib.ZSTD_isError(zresult):
640 if lib.ZSTD_isError(zresult):
446 raise ZstdError('zstd compress error: %s' %
641 raise ZstdError('zstd compress error: %s' %
447 _zstd_error(zresult))
642 _zstd_error(zresult))
@@ -477,10 +672,10 b' class ZstdCompressionObj(object):'
477 chunks = []
672 chunks = []
478
673
479 while True:
674 while True:
480 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
675 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
481 self._out,
676 self._out,
482 in_buffer,
677 in_buffer,
483 z_flush_mode)
678 z_flush_mode)
484 if lib.ZSTD_isError(zresult):
679 if lib.ZSTD_isError(zresult):
485 raise ZstdError('error ending compression stream: %s' %
680 raise ZstdError('error ending compression stream: %s' %
486 _zstd_error(zresult))
681 _zstd_error(zresult))
@@ -528,10 +723,10 b' class ZstdCompressionChunker(object):'
528 self._in.pos = 0
723 self._in.pos = 0
529
724
530 while self._in.pos < self._in.size:
725 while self._in.pos < self._in.size:
531 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
726 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
532 self._out,
727 self._out,
533 self._in,
728 self._in,
534 lib.ZSTD_e_continue)
729 lib.ZSTD_e_continue)
535
730
536 if self._in.pos == self._in.size:
731 if self._in.pos == self._in.size:
537 self._in.src = ffi.NULL
732 self._in.src = ffi.NULL
@@ -555,9 +750,9 b' class ZstdCompressionChunker(object):'
555 'previous operation')
750 'previous operation')
556
751
557 while True:
752 while True:
558 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
753 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
559 self._out, self._in,
754 self._out, self._in,
560 lib.ZSTD_e_flush)
755 lib.ZSTD_e_flush)
561 if lib.ZSTD_isError(zresult):
756 if lib.ZSTD_isError(zresult):
562 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
757 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
563
758
@@ -577,9 +772,9 b' class ZstdCompressionChunker(object):'
577 'previous operation')
772 'previous operation')
578
773
579 while True:
774 while True:
580 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
775 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
581 self._out, self._in,
776 self._out, self._in,
582 lib.ZSTD_e_end)
777 lib.ZSTD_e_end)
583 if lib.ZSTD_isError(zresult):
778 if lib.ZSTD_isError(zresult):
584 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
779 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
585
780
@@ -592,7 +787,7 b' class ZstdCompressionChunker(object):'
592 return
787 return
593
788
594
789
595 class CompressionReader(object):
790 class ZstdCompressionReader(object):
596 def __init__(self, compressor, source, read_size):
791 def __init__(self, compressor, source, read_size):
597 self._compressor = compressor
792 self._compressor = compressor
598 self._source = source
793 self._source = source
@@ -661,7 +856,16 b' class CompressionReader(object):'
661 return self._bytes_compressed
856 return self._bytes_compressed
662
857
663 def readall(self):
858 def readall(self):
664 raise NotImplementedError()
859 chunks = []
860
861 while True:
862 chunk = self.read(1048576)
863 if not chunk:
864 break
865
866 chunks.append(chunk)
867
868 return b''.join(chunks)
665
869
666 def __iter__(self):
870 def __iter__(self):
667 raise io.UnsupportedOperation()
871 raise io.UnsupportedOperation()
@@ -671,16 +875,67 b' class CompressionReader(object):'
671
875
672 next = __next__
876 next = __next__
673
877
878 def _read_input(self):
879 if self._finished_input:
880 return
881
882 if hasattr(self._source, 'read'):
883 data = self._source.read(self._read_size)
884
885 if not data:
886 self._finished_input = True
887 return
888
889 self._source_buffer = ffi.from_buffer(data)
890 self._in_buffer.src = self._source_buffer
891 self._in_buffer.size = len(self._source_buffer)
892 self._in_buffer.pos = 0
893 else:
894 self._source_buffer = ffi.from_buffer(self._source)
895 self._in_buffer.src = self._source_buffer
896 self._in_buffer.size = len(self._source_buffer)
897 self._in_buffer.pos = 0
898
899 def _compress_into_buffer(self, out_buffer):
900 if self._in_buffer.pos >= self._in_buffer.size:
901 return
902
903 old_pos = out_buffer.pos
904
905 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
906 out_buffer, self._in_buffer,
907 lib.ZSTD_e_continue)
908
909 self._bytes_compressed += out_buffer.pos - old_pos
910
911 if self._in_buffer.pos == self._in_buffer.size:
912 self._in_buffer.src = ffi.NULL
913 self._in_buffer.pos = 0
914 self._in_buffer.size = 0
915 self._source_buffer = None
916
917 if not hasattr(self._source, 'read'):
918 self._finished_input = True
919
920 if lib.ZSTD_isError(zresult):
921 raise ZstdError('zstd compress error: %s',
922 _zstd_error(zresult))
923
924 return out_buffer.pos and out_buffer.pos == out_buffer.size
925
674 def read(self, size=-1):
926 def read(self, size=-1):
675 if self._closed:
927 if self._closed:
676 raise ValueError('stream is closed')
928 raise ValueError('stream is closed')
677
929
678 if self._finished_output:
930 if size < -1:
931 raise ValueError('cannot read negative amounts less than -1')
932
933 if size == -1:
934 return self.readall()
935
936 if self._finished_output or size == 0:
679 return b''
937 return b''
680
938
681 if size < 1:
682 raise ValueError('cannot read negative or size 0 amounts')
683
684 # Need a dedicated ref to dest buffer otherwise it gets collected.
939 # Need a dedicated ref to dest buffer otherwise it gets collected.
685 dst_buffer = ffi.new('char[]', size)
940 dst_buffer = ffi.new('char[]', size)
686 out_buffer = ffi.new('ZSTD_outBuffer *')
941 out_buffer = ffi.new('ZSTD_outBuffer *')
@@ -688,71 +943,21 b' class CompressionReader(object):'
688 out_buffer.size = size
943 out_buffer.size = size
689 out_buffer.pos = 0
944 out_buffer.pos = 0
690
945
691 def compress_input():
946 if self._compress_into_buffer(out_buffer):
692 if self._in_buffer.pos >= self._in_buffer.size:
947 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
693 return
694
695 old_pos = out_buffer.pos
696
697 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
698 out_buffer, self._in_buffer,
699 lib.ZSTD_e_continue)
700
701 self._bytes_compressed += out_buffer.pos - old_pos
702
703 if self._in_buffer.pos == self._in_buffer.size:
704 self._in_buffer.src = ffi.NULL
705 self._in_buffer.pos = 0
706 self._in_buffer.size = 0
707 self._source_buffer = None
708
709 if not hasattr(self._source, 'read'):
710 self._finished_input = True
711
712 if lib.ZSTD_isError(zresult):
713 raise ZstdError('zstd compress error: %s',
714 _zstd_error(zresult))
715
716 if out_buffer.pos and out_buffer.pos == out_buffer.size:
717 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
718
719 def get_input():
720 if self._finished_input:
721 return
722
723 if hasattr(self._source, 'read'):
724 data = self._source.read(self._read_size)
725
726 if not data:
727 self._finished_input = True
728 return
729
730 self._source_buffer = ffi.from_buffer(data)
731 self._in_buffer.src = self._source_buffer
732 self._in_buffer.size = len(self._source_buffer)
733 self._in_buffer.pos = 0
734 else:
735 self._source_buffer = ffi.from_buffer(self._source)
736 self._in_buffer.src = self._source_buffer
737 self._in_buffer.size = len(self._source_buffer)
738 self._in_buffer.pos = 0
739
740 result = compress_input()
741 if result:
742 return result
743
948
744 while not self._finished_input:
949 while not self._finished_input:
745 get_input()
950 self._read_input()
746 result = compress_input()
951
747 if result:
952 if self._compress_into_buffer(out_buffer):
748 return result
953 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
749
954
750 # EOF
955 # EOF
751 old_pos = out_buffer.pos
956 old_pos = out_buffer.pos
752
957
753 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
958 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
754 out_buffer, self._in_buffer,
959 out_buffer, self._in_buffer,
755 lib.ZSTD_e_end)
960 lib.ZSTD_e_end)
756
961
757 self._bytes_compressed += out_buffer.pos - old_pos
962 self._bytes_compressed += out_buffer.pos - old_pos
758
963
@@ -765,6 +970,159 b' class CompressionReader(object):'
765
970
766 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
971 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
767
972
973 def read1(self, size=-1):
974 if self._closed:
975 raise ValueError('stream is closed')
976
977 if size < -1:
978 raise ValueError('cannot read negative amounts less than -1')
979
980 if self._finished_output or size == 0:
981 return b''
982
983 # -1 returns arbitrary number of bytes.
984 if size == -1:
985 size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
986
987 dst_buffer = ffi.new('char[]', size)
988 out_buffer = ffi.new('ZSTD_outBuffer *')
989 out_buffer.dst = dst_buffer
990 out_buffer.size = size
991 out_buffer.pos = 0
992
993 # read1() dictates that we can perform at most 1 call to the
994 # underlying stream to get input. However, we can't satisfy this
995 # restriction with compression because not all input generates output.
996 # It is possible to perform a block flush in order to ensure output.
997 # But this may not be desirable behavior. So we allow multiple read()
998 # to the underlying stream. But unlike read(), we stop once we have
999 # any output.
1000
1001 self._compress_into_buffer(out_buffer)
1002 if out_buffer.pos:
1003 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1004
1005 while not self._finished_input:
1006 self._read_input()
1007
1008 # If we've filled the output buffer, return immediately.
1009 if self._compress_into_buffer(out_buffer):
1010 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1011
1012 # If we've populated the output buffer and we're not at EOF,
1013 # also return, as we've satisfied the read1() limits.
1014 if out_buffer.pos and not self._finished_input:
1015 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1016
1017 # Else if we're at EOS and we have room left in the buffer,
1018 # fall through to below and try to add more data to the output.
1019
1020 # EOF.
1021 old_pos = out_buffer.pos
1022
1023 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1024 out_buffer, self._in_buffer,
1025 lib.ZSTD_e_end)
1026
1027 self._bytes_compressed += out_buffer.pos - old_pos
1028
1029 if lib.ZSTD_isError(zresult):
1030 raise ZstdError('error ending compression stream: %s' %
1031 _zstd_error(zresult))
1032
1033 if zresult == 0:
1034 self._finished_output = True
1035
1036 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1037
1038 def readinto(self, b):
1039 if self._closed:
1040 raise ValueError('stream is closed')
1041
1042 if self._finished_output:
1043 return 0
1044
1045 # TODO use writable=True once we require CFFI >= 1.12.
1046 dest_buffer = ffi.from_buffer(b)
1047 ffi.memmove(b, b'', 0)
1048 out_buffer = ffi.new('ZSTD_outBuffer *')
1049 out_buffer.dst = dest_buffer
1050 out_buffer.size = len(dest_buffer)
1051 out_buffer.pos = 0
1052
1053 if self._compress_into_buffer(out_buffer):
1054 return out_buffer.pos
1055
1056 while not self._finished_input:
1057 self._read_input()
1058 if self._compress_into_buffer(out_buffer):
1059 return out_buffer.pos
1060
1061 # EOF.
1062 old_pos = out_buffer.pos
1063 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1064 out_buffer, self._in_buffer,
1065 lib.ZSTD_e_end)
1066
1067 self._bytes_compressed += out_buffer.pos - old_pos
1068
1069 if lib.ZSTD_isError(zresult):
1070 raise ZstdError('error ending compression stream: %s',
1071 _zstd_error(zresult))
1072
1073 if zresult == 0:
1074 self._finished_output = True
1075
1076 return out_buffer.pos
1077
1078 def readinto1(self, b):
1079 if self._closed:
1080 raise ValueError('stream is closed')
1081
1082 if self._finished_output:
1083 return 0
1084
1085 # TODO use writable=True once we require CFFI >= 1.12.
1086 dest_buffer = ffi.from_buffer(b)
1087 ffi.memmove(b, b'', 0)
1088
1089 out_buffer = ffi.new('ZSTD_outBuffer *')
1090 out_buffer.dst = dest_buffer
1091 out_buffer.size = len(dest_buffer)
1092 out_buffer.pos = 0
1093
1094 self._compress_into_buffer(out_buffer)
1095 if out_buffer.pos:
1096 return out_buffer.pos
1097
1098 while not self._finished_input:
1099 self._read_input()
1100
1101 if self._compress_into_buffer(out_buffer):
1102 return out_buffer.pos
1103
1104 if out_buffer.pos and not self._finished_input:
1105 return out_buffer.pos
1106
1107 # EOF.
1108 old_pos = out_buffer.pos
1109
1110 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1111 out_buffer, self._in_buffer,
1112 lib.ZSTD_e_end)
1113
1114 self._bytes_compressed += out_buffer.pos - old_pos
1115
1116 if lib.ZSTD_isError(zresult):
1117 raise ZstdError('error ending compression stream: %s' %
1118 _zstd_error(zresult))
1119
1120 if zresult == 0:
1121 self._finished_output = True
1122
1123 return out_buffer.pos
1124
1125
768 class ZstdCompressor(object):
1126 class ZstdCompressor(object):
769 def __init__(self, level=3, dict_data=None, compression_params=None,
1127 def __init__(self, level=3, dict_data=None, compression_params=None,
770 write_checksum=None, write_content_size=None,
1128 write_checksum=None, write_content_size=None,
@@ -803,25 +1161,25 b' class ZstdCompressor(object):'
803 self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
1161 self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
804
1162
805 _set_compression_parameter(self._params,
1163 _set_compression_parameter(self._params,
806 lib.ZSTD_p_compressionLevel,
1164 lib.ZSTD_c_compressionLevel,
807 level)
1165 level)
808
1166
809 _set_compression_parameter(
1167 _set_compression_parameter(
810 self._params,
1168 self._params,
811 lib.ZSTD_p_contentSizeFlag,
1169 lib.ZSTD_c_contentSizeFlag,
812 write_content_size if write_content_size is not None else 1)
1170 write_content_size if write_content_size is not None else 1)
813
1171
814 _set_compression_parameter(self._params,
1172 _set_compression_parameter(self._params,
815 lib.ZSTD_p_checksumFlag,
1173 lib.ZSTD_c_checksumFlag,
816 1 if write_checksum else 0)
1174 1 if write_checksum else 0)
817
1175
818 _set_compression_parameter(self._params,
1176 _set_compression_parameter(self._params,
819 lib.ZSTD_p_dictIDFlag,
1177 lib.ZSTD_c_dictIDFlag,
820 1 if write_dict_id else 0)
1178 1 if write_dict_id else 0)
821
1179
822 if threads:
1180 if threads:
823 _set_compression_parameter(self._params,
1181 _set_compression_parameter(self._params,
824 lib.ZSTD_p_nbWorkers,
1182 lib.ZSTD_c_nbWorkers,
825 threads)
1183 threads)
826
1184
827 cctx = lib.ZSTD_createCCtx()
1185 cctx = lib.ZSTD_createCCtx()
@@ -864,7 +1222,7 b' class ZstdCompressor(object):'
864 return lib.ZSTD_sizeof_CCtx(self._cctx)
1222 return lib.ZSTD_sizeof_CCtx(self._cctx)
865
1223
866 def compress(self, data):
1224 def compress(self, data):
867 lib.ZSTD_CCtx_reset(self._cctx)
1225 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
868
1226
869 data_buffer = ffi.from_buffer(data)
1227 data_buffer = ffi.from_buffer(data)
870
1228
@@ -887,10 +1245,10 b' class ZstdCompressor(object):'
887 in_buffer.size = len(data_buffer)
1245 in_buffer.size = len(data_buffer)
888 in_buffer.pos = 0
1246 in_buffer.pos = 0
889
1247
890 zresult = lib.ZSTD_compress_generic(self._cctx,
1248 zresult = lib.ZSTD_compressStream2(self._cctx,
891 out_buffer,
1249 out_buffer,
892 in_buffer,
1250 in_buffer,
893 lib.ZSTD_e_end)
1251 lib.ZSTD_e_end)
894
1252
895 if lib.ZSTD_isError(zresult):
1253 if lib.ZSTD_isError(zresult):
896 raise ZstdError('cannot compress: %s' %
1254 raise ZstdError('cannot compress: %s' %
@@ -901,7 +1259,7 b' class ZstdCompressor(object):'
901 return ffi.buffer(out, out_buffer.pos)[:]
1259 return ffi.buffer(out, out_buffer.pos)[:]
902
1260
903 def compressobj(self, size=-1):
1261 def compressobj(self, size=-1):
904 lib.ZSTD_CCtx_reset(self._cctx)
1262 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
905
1263
906 if size < 0:
1264 if size < 0:
907 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1265 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -923,7 +1281,7 b' class ZstdCompressor(object):'
923 return cobj
1281 return cobj
924
1282
925 def chunker(self, size=-1, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1283 def chunker(self, size=-1, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
926 lib.ZSTD_CCtx_reset(self._cctx)
1284 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
927
1285
928 if size < 0:
1286 if size < 0:
929 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1287 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -944,7 +1302,7 b' class ZstdCompressor(object):'
944 if not hasattr(ofh, 'write'):
1302 if not hasattr(ofh, 'write'):
945 raise ValueError('second argument must have a write() method')
1303 raise ValueError('second argument must have a write() method')
946
1304
947 lib.ZSTD_CCtx_reset(self._cctx)
1305 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
948
1306
949 if size < 0:
1307 if size < 0:
950 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1308 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -976,10 +1334,10 b' class ZstdCompressor(object):'
976 in_buffer.pos = 0
1334 in_buffer.pos = 0
977
1335
978 while in_buffer.pos < in_buffer.size:
1336 while in_buffer.pos < in_buffer.size:
979 zresult = lib.ZSTD_compress_generic(self._cctx,
1337 zresult = lib.ZSTD_compressStream2(self._cctx,
980 out_buffer,
1338 out_buffer,
981 in_buffer,
1339 in_buffer,
982 lib.ZSTD_e_continue)
1340 lib.ZSTD_e_continue)
983 if lib.ZSTD_isError(zresult):
1341 if lib.ZSTD_isError(zresult):
984 raise ZstdError('zstd compress error: %s' %
1342 raise ZstdError('zstd compress error: %s' %
985 _zstd_error(zresult))
1343 _zstd_error(zresult))
@@ -991,10 +1349,10 b' class ZstdCompressor(object):'
991
1349
992 # We've finished reading. Flush the compressor.
1350 # We've finished reading. Flush the compressor.
993 while True:
1351 while True:
994 zresult = lib.ZSTD_compress_generic(self._cctx,
1352 zresult = lib.ZSTD_compressStream2(self._cctx,
995 out_buffer,
1353 out_buffer,
996 in_buffer,
1354 in_buffer,
997 lib.ZSTD_e_end)
1355 lib.ZSTD_e_end)
998 if lib.ZSTD_isError(zresult):
1356 if lib.ZSTD_isError(zresult):
999 raise ZstdError('error ending compression stream: %s' %
1357 raise ZstdError('error ending compression stream: %s' %
1000 _zstd_error(zresult))
1358 _zstd_error(zresult))
@@ -1011,7 +1369,7 b' class ZstdCompressor(object):'
1011
1369
1012 def stream_reader(self, source, size=-1,
1370 def stream_reader(self, source, size=-1,
1013 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
1371 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
1014 lib.ZSTD_CCtx_reset(self._cctx)
1372 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1015
1373
1016 try:
1374 try:
1017 size = len(source)
1375 size = len(source)
@@ -1026,20 +1384,22 b' class ZstdCompressor(object):'
1026 raise ZstdError('error setting source size: %s' %
1384 raise ZstdError('error setting source size: %s' %
1027 _zstd_error(zresult))
1385 _zstd_error(zresult))
1028
1386
1029 return CompressionReader(self, source, read_size)
1387 return ZstdCompressionReader(self, source, read_size)
1030
1388
1031 def stream_writer(self, writer, size=-1,
1389 def stream_writer(self, writer, size=-1,
1032 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1390 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1391 write_return_read=False):
1033
1392
1034 if not hasattr(writer, 'write'):
1393 if not hasattr(writer, 'write'):
1035 raise ValueError('must pass an object with a write() method')
1394 raise ValueError('must pass an object with a write() method')
1036
1395
1037 lib.ZSTD_CCtx_reset(self._cctx)
1396 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1038
1397
1039 if size < 0:
1398 if size < 0:
1040 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1399 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1041
1400
1042 return ZstdCompressionWriter(self, writer, size, write_size)
1401 return ZstdCompressionWriter(self, writer, size, write_size,
1402 write_return_read)
1043
1403
1044 write_to = stream_writer
1404 write_to = stream_writer
1045
1405
@@ -1056,7 +1416,7 b' class ZstdCompressor(object):'
1056 raise ValueError('must pass an object with a read() method or '
1416 raise ValueError('must pass an object with a read() method or '
1057 'conforms to buffer protocol')
1417 'conforms to buffer protocol')
1058
1418
1059 lib.ZSTD_CCtx_reset(self._cctx)
1419 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1060
1420
1061 if size < 0:
1421 if size < 0:
1062 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1422 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -1104,8 +1464,8 b' class ZstdCompressor(object):'
1104 in_buffer.pos = 0
1464 in_buffer.pos = 0
1105
1465
1106 while in_buffer.pos < in_buffer.size:
1466 while in_buffer.pos < in_buffer.size:
1107 zresult = lib.ZSTD_compress_generic(self._cctx, out_buffer, in_buffer,
1467 zresult = lib.ZSTD_compressStream2(self._cctx, out_buffer, in_buffer,
1108 lib.ZSTD_e_continue)
1468 lib.ZSTD_e_continue)
1109 if lib.ZSTD_isError(zresult):
1469 if lib.ZSTD_isError(zresult):
1110 raise ZstdError('zstd compress error: %s' %
1470 raise ZstdError('zstd compress error: %s' %
1111 _zstd_error(zresult))
1471 _zstd_error(zresult))
@@ -1124,10 +1484,10 b' class ZstdCompressor(object):'
1124 # remains.
1484 # remains.
1125 while True:
1485 while True:
1126 assert out_buffer.pos == 0
1486 assert out_buffer.pos == 0
1127 zresult = lib.ZSTD_compress_generic(self._cctx,
1487 zresult = lib.ZSTD_compressStream2(self._cctx,
1128 out_buffer,
1488 out_buffer,
1129 in_buffer,
1489 in_buffer,
1130 lib.ZSTD_e_end)
1490 lib.ZSTD_e_end)
1131 if lib.ZSTD_isError(zresult):
1491 if lib.ZSTD_isError(zresult):
1132 raise ZstdError('error ending compression stream: %s' %
1492 raise ZstdError('error ending compression stream: %s' %
1133 _zstd_error(zresult))
1493 _zstd_error(zresult))
@@ -1234,7 +1594,7 b' class ZstdCompressionDict(object):'
1234 cparams = ffi.new('ZSTD_compressionParameters')
1594 cparams = ffi.new('ZSTD_compressionParameters')
1235 cparams.chainLog = compression_params.chain_log
1595 cparams.chainLog = compression_params.chain_log
1236 cparams.hashLog = compression_params.hash_log
1596 cparams.hashLog = compression_params.hash_log
1237 cparams.searchLength = compression_params.min_match
1597 cparams.minMatch = compression_params.min_match
1238 cparams.searchLog = compression_params.search_log
1598 cparams.searchLog = compression_params.search_log
1239 cparams.strategy = compression_params.compression_strategy
1599 cparams.strategy = compression_params.compression_strategy
1240 cparams.targetLength = compression_params.target_length
1600 cparams.targetLength = compression_params.target_length
@@ -1345,6 +1705,10 b' class ZstdDecompressionObj(object):'
1345 out_buffer = ffi.new('ZSTD_outBuffer *')
1705 out_buffer = ffi.new('ZSTD_outBuffer *')
1346
1706
1347 data_buffer = ffi.from_buffer(data)
1707 data_buffer = ffi.from_buffer(data)
1708
1709 if len(data_buffer) == 0:
1710 return b''
1711
1348 in_buffer.src = data_buffer
1712 in_buffer.src = data_buffer
1349 in_buffer.size = len(data_buffer)
1713 in_buffer.size = len(data_buffer)
1350 in_buffer.pos = 0
1714 in_buffer.pos = 0
@@ -1357,8 +1721,8 b' class ZstdDecompressionObj(object):'
1357 chunks = []
1721 chunks = []
1358
1722
1359 while True:
1723 while True:
1360 zresult = lib.ZSTD_decompress_generic(self._decompressor._dctx,
1724 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1361 out_buffer, in_buffer)
1725 out_buffer, in_buffer)
1362 if lib.ZSTD_isError(zresult):
1726 if lib.ZSTD_isError(zresult):
1363 raise ZstdError('zstd decompressor error: %s' %
1727 raise ZstdError('zstd decompressor error: %s' %
1364 _zstd_error(zresult))
1728 _zstd_error(zresult))
@@ -1378,12 +1742,16 b' class ZstdDecompressionObj(object):'
1378
1742
1379 return b''.join(chunks)
1743 return b''.join(chunks)
1380
1744
1381
1745 def flush(self, length=0):
1382 class DecompressionReader(object):
1746 pass
1383 def __init__(self, decompressor, source, read_size):
1747
1748
1749 class ZstdDecompressionReader(object):
1750 def __init__(self, decompressor, source, read_size, read_across_frames):
1384 self._decompressor = decompressor
1751 self._decompressor = decompressor
1385 self._source = source
1752 self._source = source
1386 self._read_size = read_size
1753 self._read_size = read_size
1754 self._read_across_frames = bool(read_across_frames)
1387 self._entered = False
1755 self._entered = False
1388 self._closed = False
1756 self._closed = False
1389 self._bytes_decompressed = 0
1757 self._bytes_decompressed = 0
@@ -1418,10 +1786,10 b' class DecompressionReader(object):'
1418 return True
1786 return True
1419
1787
1420 def readline(self):
1788 def readline(self):
1421 raise NotImplementedError()
1789 raise io.UnsupportedOperation()
1422
1790
1423 def readlines(self):
1791 def readlines(self):
1424 raise NotImplementedError()
1792 raise io.UnsupportedOperation()
1425
1793
1426 def write(self, data):
1794 def write(self, data):
1427 raise io.UnsupportedOperation()
1795 raise io.UnsupportedOperation()
@@ -1447,25 +1815,158 b' class DecompressionReader(object):'
1447 return self._bytes_decompressed
1815 return self._bytes_decompressed
1448
1816
1449 def readall(self):
1817 def readall(self):
1450 raise NotImplementedError()
1818 chunks = []
1819
1820 while True:
1821 chunk = self.read(1048576)
1822 if not chunk:
1823 break
1824
1825 chunks.append(chunk)
1826
1827 return b''.join(chunks)
1451
1828
1452 def __iter__(self):
1829 def __iter__(self):
1453 raise NotImplementedError()
1830 raise io.UnsupportedOperation()
1454
1831
1455 def __next__(self):
1832 def __next__(self):
1456 raise NotImplementedError()
1833 raise io.UnsupportedOperation()
1457
1834
1458 next = __next__
1835 next = __next__
1459
1836
1460 def read(self, size):
1837 def _read_input(self):
1838 # We have data left over in the input buffer. Use it.
1839 if self._in_buffer.pos < self._in_buffer.size:
1840 return
1841
1842 # All input data exhausted. Nothing to do.
1843 if self._finished_input:
1844 return
1845
1846 # Else populate the input buffer from our source.
1847 if hasattr(self._source, 'read'):
1848 data = self._source.read(self._read_size)
1849
1850 if not data:
1851 self._finished_input = True
1852 return
1853
1854 self._source_buffer = ffi.from_buffer(data)
1855 self._in_buffer.src = self._source_buffer
1856 self._in_buffer.size = len(self._source_buffer)
1857 self._in_buffer.pos = 0
1858 else:
1859 self._source_buffer = ffi.from_buffer(self._source)
1860 self._in_buffer.src = self._source_buffer
1861 self._in_buffer.size = len(self._source_buffer)
1862 self._in_buffer.pos = 0
1863
1864 def _decompress_into_buffer(self, out_buffer):
1865 """Decompress available input into an output buffer.
1866
1867 Returns True if data in output buffer should be emitted.
1868 """
1869 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1870 out_buffer, self._in_buffer)
1871
1872 if self._in_buffer.pos == self._in_buffer.size:
1873 self._in_buffer.src = ffi.NULL
1874 self._in_buffer.pos = 0
1875 self._in_buffer.size = 0
1876 self._source_buffer = None
1877
1878 if not hasattr(self._source, 'read'):
1879 self._finished_input = True
1880
1881 if lib.ZSTD_isError(zresult):
1882 raise ZstdError('zstd decompress error: %s' %
1883 _zstd_error(zresult))
1884
1885 # Emit data if there is data AND either:
1886 # a) output buffer is full (read amount is satisfied)
1887 # b) we're at end of a frame and not in frame spanning mode
1888 return (out_buffer.pos and
1889 (out_buffer.pos == out_buffer.size or
1890 zresult == 0 and not self._read_across_frames))
1891
1892 def read(self, size=-1):
1893 if self._closed:
1894 raise ValueError('stream is closed')
1895
1896 if size < -1:
1897 raise ValueError('cannot read negative amounts less than -1')
1898
1899 if size == -1:
1900 # This is recursive. But it gets the job done.
1901 return self.readall()
1902
1903 if self._finished_output or size == 0:
1904 return b''
1905
1906 # We /could/ call into readinto() here. But that introduces more
1907 # overhead.
1908 dst_buffer = ffi.new('char[]', size)
1909 out_buffer = ffi.new('ZSTD_outBuffer *')
1910 out_buffer.dst = dst_buffer
1911 out_buffer.size = size
1912 out_buffer.pos = 0
1913
1914 self._read_input()
1915 if self._decompress_into_buffer(out_buffer):
1916 self._bytes_decompressed += out_buffer.pos
1917 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1918
1919 while not self._finished_input:
1920 self._read_input()
1921 if self._decompress_into_buffer(out_buffer):
1922 self._bytes_decompressed += out_buffer.pos
1923 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1924
1925 self._bytes_decompressed += out_buffer.pos
1926 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1927
1928 def readinto(self, b):
1461 if self._closed:
1929 if self._closed:
1462 raise ValueError('stream is closed')
1930 raise ValueError('stream is closed')
1463
1931
1464 if self._finished_output:
1932 if self._finished_output:
1933 return 0
1934
1935 # TODO use writable=True once we require CFFI >= 1.12.
1936 dest_buffer = ffi.from_buffer(b)
1937 ffi.memmove(b, b'', 0)
1938 out_buffer = ffi.new('ZSTD_outBuffer *')
1939 out_buffer.dst = dest_buffer
1940 out_buffer.size = len(dest_buffer)
1941 out_buffer.pos = 0
1942
1943 self._read_input()
1944 if self._decompress_into_buffer(out_buffer):
1945 self._bytes_decompressed += out_buffer.pos
1946 return out_buffer.pos
1947
1948 while not self._finished_input:
1949 self._read_input()
1950 if self._decompress_into_buffer(out_buffer):
1951 self._bytes_decompressed += out_buffer.pos
1952 return out_buffer.pos
1953
1954 self._bytes_decompressed += out_buffer.pos
1955 return out_buffer.pos
1956
1957 def read1(self, size=-1):
1958 if self._closed:
1959 raise ValueError('stream is closed')
1960
1961 if size < -1:
1962 raise ValueError('cannot read negative amounts less than -1')
1963
1964 if self._finished_output or size == 0:
1465 return b''
1965 return b''
1466
1966
1467 if size < 1:
1967 # -1 returns arbitrary number of bytes.
1468 raise ValueError('cannot read negative or size 0 amounts')
1968 if size == -1:
1969 size = DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1469
1970
1470 dst_buffer = ffi.new('char[]', size)
1971 dst_buffer = ffi.new('char[]', size)
1471 out_buffer = ffi.new('ZSTD_outBuffer *')
1972 out_buffer = ffi.new('ZSTD_outBuffer *')
@@ -1473,64 +1974,46 b' class DecompressionReader(object):'
1473 out_buffer.size = size
1974 out_buffer.size = size
1474 out_buffer.pos = 0
1975 out_buffer.pos = 0
1475
1976
1476 def decompress():
1977 # read1() dictates that we can perform at most 1 call to underlying
1477 zresult = lib.ZSTD_decompress_generic(self._decompressor._dctx,
1978 # stream to get input. However, we can't satisfy this restriction with
1478 out_buffer, self._in_buffer)
1979 # decompression because not all input generates output. So we allow
1479
1980 # multiple read(). But unlike read(), we stop once we have any output.
1480 if self._in_buffer.pos == self._in_buffer.size:
1481 self._in_buffer.src = ffi.NULL
1482 self._in_buffer.pos = 0
1483 self._in_buffer.size = 0
1484 self._source_buffer = None
1485
1486 if not hasattr(self._source, 'read'):
1487 self._finished_input = True
1488
1489 if lib.ZSTD_isError(zresult):
1490 raise ZstdError('zstd decompress error: %s',
1491 _zstd_error(zresult))
1492 elif zresult == 0:
1493 self._finished_output = True
1494
1495 if out_buffer.pos and out_buffer.pos == out_buffer.size:
1496 self._bytes_decompressed += out_buffer.size
1497 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1498
1499 def get_input():
1500 if self._finished_input:
1501 return
1502
1503 if hasattr(self._source, 'read'):
1504 data = self._source.read(self._read_size)
1505
1506 if not data:
1507 self._finished_input = True
1508 return
1509
1510 self._source_buffer = ffi.from_buffer(data)
1511 self._in_buffer.src = self._source_buffer
1512 self._in_buffer.size = len(self._source_buffer)
1513 self._in_buffer.pos = 0
1514 else:
1515 self._source_buffer = ffi.from_buffer(self._source)
1516 self._in_buffer.src = self._source_buffer
1517 self._in_buffer.size = len(self._source_buffer)
1518 self._in_buffer.pos = 0
1519
1520 get_input()
1521 result = decompress()
1522 if result:
1523 return result
1524
1525 while not self._finished_input:
1981 while not self._finished_input:
1526 get_input()
1982 self._read_input()
1527 result = decompress()
1983 self._decompress_into_buffer(out_buffer)
1528 if result:
1984
1529 return result
1985 if out_buffer.pos:
1986 break
1530
1987
1531 self._bytes_decompressed += out_buffer.pos
1988 self._bytes_decompressed += out_buffer.pos
1532 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1989 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1533
1990
1991 def readinto1(self, b):
1992 if self._closed:
1993 raise ValueError('stream is closed')
1994
1995 if self._finished_output:
1996 return 0
1997
1998 # TODO use writable=True once we require CFFI >= 1.12.
1999 dest_buffer = ffi.from_buffer(b)
2000 ffi.memmove(b, b'', 0)
2001
2002 out_buffer = ffi.new('ZSTD_outBuffer *')
2003 out_buffer.dst = dest_buffer
2004 out_buffer.size = len(dest_buffer)
2005 out_buffer.pos = 0
2006
2007 while not self._finished_input and not self._finished_output:
2008 self._read_input()
2009 self._decompress_into_buffer(out_buffer)
2010
2011 if out_buffer.pos:
2012 break
2013
2014 self._bytes_decompressed += out_buffer.pos
2015 return out_buffer.pos
2016
1534 def seek(self, pos, whence=os.SEEK_SET):
2017 def seek(self, pos, whence=os.SEEK_SET):
1535 if self._closed:
2018 if self._closed:
1536 raise ValueError('stream is closed')
2019 raise ValueError('stream is closed')
@@ -1569,34 +2052,108 b' class DecompressionReader(object):'
1569 return self._bytes_decompressed
2052 return self._bytes_decompressed
1570
2053
1571 class ZstdDecompressionWriter(object):
2054 class ZstdDecompressionWriter(object):
1572 def __init__(self, decompressor, writer, write_size):
2055 def __init__(self, decompressor, writer, write_size, write_return_read):
2056 decompressor._ensure_dctx()
2057
1573 self._decompressor = decompressor
2058 self._decompressor = decompressor
1574 self._writer = writer
2059 self._writer = writer
1575 self._write_size = write_size
2060 self._write_size = write_size
2061 self._write_return_read = bool(write_return_read)
1576 self._entered = False
2062 self._entered = False
2063 self._closed = False
1577
2064
1578 def __enter__(self):
2065 def __enter__(self):
2066 if self._closed:
2067 raise ValueError('stream is closed')
2068
1579 if self._entered:
2069 if self._entered:
1580 raise ZstdError('cannot __enter__ multiple times')
2070 raise ZstdError('cannot __enter__ multiple times')
1581
2071
1582 self._decompressor._ensure_dctx()
1583 self._entered = True
2072 self._entered = True
1584
2073
1585 return self
2074 return self
1586
2075
1587 def __exit__(self, exc_type, exc_value, exc_tb):
2076 def __exit__(self, exc_type, exc_value, exc_tb):
1588 self._entered = False
2077 self._entered = False
2078 self.close()
1589
2079
1590 def memory_size(self):
2080 def memory_size(self):
1591 if not self._decompressor._dctx:
1592 raise ZstdError('cannot determine size of inactive decompressor '
1593 'call when context manager is active')
1594
1595 return lib.ZSTD_sizeof_DCtx(self._decompressor._dctx)
2081 return lib.ZSTD_sizeof_DCtx(self._decompressor._dctx)
1596
2082
2083 def close(self):
2084 if self._closed:
2085 return
2086
2087 try:
2088 self.flush()
2089 finally:
2090 self._closed = True
2091
2092 f = getattr(self._writer, 'close', None)
2093 if f:
2094 f()
2095
2096 @property
2097 def closed(self):
2098 return self._closed
2099
2100 def fileno(self):
2101 f = getattr(self._writer, 'fileno', None)
2102 if f:
2103 return f()
2104 else:
2105 raise OSError('fileno not available on underlying writer')
2106
2107 def flush(self):
2108 if self._closed:
2109 raise ValueError('stream is closed')
2110
2111 f = getattr(self._writer, 'flush', None)
2112 if f:
2113 return f()
2114
2115 def isatty(self):
2116 return False
2117
2118 def readable(self):
2119 return False
2120
2121 def readline(self, size=-1):
2122 raise io.UnsupportedOperation()
2123
2124 def readlines(self, hint=-1):
2125 raise io.UnsupportedOperation()
2126
2127 def seek(self, offset, whence=None):
2128 raise io.UnsupportedOperation()
2129
2130 def seekable(self):
2131 return False
2132
2133 def tell(self):
2134 raise io.UnsupportedOperation()
2135
2136 def truncate(self, size=None):
2137 raise io.UnsupportedOperation()
2138
2139 def writable(self):
2140 return True
2141
2142 def writelines(self, lines):
2143 raise io.UnsupportedOperation()
2144
2145 def read(self, size=-1):
2146 raise io.UnsupportedOperation()
2147
2148 def readall(self):
2149 raise io.UnsupportedOperation()
2150
2151 def readinto(self, b):
2152 raise io.UnsupportedOperation()
2153
1597 def write(self, data):
2154 def write(self, data):
1598 if not self._entered:
2155 if self._closed:
1599 raise ZstdError('write must be called from an active context manager')
2156 raise ValueError('stream is closed')
1600
2157
1601 total_write = 0
2158 total_write = 0
1602
2159
@@ -1616,7 +2173,7 b' class ZstdDecompressionWriter(object):'
1616 dctx = self._decompressor._dctx
2173 dctx = self._decompressor._dctx
1617
2174
1618 while in_buffer.pos < in_buffer.size:
2175 while in_buffer.pos < in_buffer.size:
1619 zresult = lib.ZSTD_decompress_generic(dctx, out_buffer, in_buffer)
2176 zresult = lib.ZSTD_decompressStream(dctx, out_buffer, in_buffer)
1620 if lib.ZSTD_isError(zresult):
2177 if lib.ZSTD_isError(zresult):
1621 raise ZstdError('zstd decompress error: %s' %
2178 raise ZstdError('zstd decompress error: %s' %
1622 _zstd_error(zresult))
2179 _zstd_error(zresult))
@@ -1626,7 +2183,10 b' class ZstdDecompressionWriter(object):'
1626 total_write += out_buffer.pos
2183 total_write += out_buffer.pos
1627 out_buffer.pos = 0
2184 out_buffer.pos = 0
1628
2185
1629 return total_write
2186 if self._write_return_read:
2187 return in_buffer.pos
2188 else:
2189 return total_write
1630
2190
1631
2191
1632 class ZstdDecompressor(object):
2192 class ZstdDecompressor(object):
@@ -1684,7 +2244,7 b' class ZstdDecompressor(object):'
1684 in_buffer.size = len(data_buffer)
2244 in_buffer.size = len(data_buffer)
1685 in_buffer.pos = 0
2245 in_buffer.pos = 0
1686
2246
1687 zresult = lib.ZSTD_decompress_generic(self._dctx, out_buffer, in_buffer)
2247 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
1688 if lib.ZSTD_isError(zresult):
2248 if lib.ZSTD_isError(zresult):
1689 raise ZstdError('decompression error: %s' %
2249 raise ZstdError('decompression error: %s' %
1690 _zstd_error(zresult))
2250 _zstd_error(zresult))
@@ -1696,9 +2256,10 b' class ZstdDecompressor(object):'
1696
2256
1697 return ffi.buffer(result_buffer, out_buffer.pos)[:]
2257 return ffi.buffer(result_buffer, out_buffer.pos)[:]
1698
2258
1699 def stream_reader(self, source, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE):
2259 def stream_reader(self, source, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2260 read_across_frames=False):
1700 self._ensure_dctx()
2261 self._ensure_dctx()
1701 return DecompressionReader(self, source, read_size)
2262 return ZstdDecompressionReader(self, source, read_size, read_across_frames)
1702
2263
1703 def decompressobj(self, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2264 def decompressobj(self, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1704 if write_size < 1:
2265 if write_size < 1:
@@ -1767,7 +2328,7 b' class ZstdDecompressor(object):'
1767 while in_buffer.pos < in_buffer.size:
2328 while in_buffer.pos < in_buffer.size:
1768 assert out_buffer.pos == 0
2329 assert out_buffer.pos == 0
1769
2330
1770 zresult = lib.ZSTD_decompress_generic(self._dctx, out_buffer, in_buffer)
2331 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
1771 if lib.ZSTD_isError(zresult):
2332 if lib.ZSTD_isError(zresult):
1772 raise ZstdError('zstd decompress error: %s' %
2333 raise ZstdError('zstd decompress error: %s' %
1773 _zstd_error(zresult))
2334 _zstd_error(zresult))
@@ -1787,11 +2348,13 b' class ZstdDecompressor(object):'
1787
2348
1788 read_from = read_to_iter
2349 read_from = read_to_iter
1789
2350
1790 def stream_writer(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2351 def stream_writer(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2352 write_return_read=False):
1791 if not hasattr(writer, 'write'):
2353 if not hasattr(writer, 'write'):
1792 raise ValueError('must pass an object with a write() method')
2354 raise ValueError('must pass an object with a write() method')
1793
2355
1794 return ZstdDecompressionWriter(self, writer, write_size)
2356 return ZstdDecompressionWriter(self, writer, write_size,
2357 write_return_read)
1795
2358
1796 write_to = stream_writer
2359 write_to = stream_writer
1797
2360
@@ -1829,7 +2392,7 b' class ZstdDecompressor(object):'
1829
2392
1830 # Flush all read data to output.
2393 # Flush all read data to output.
1831 while in_buffer.pos < in_buffer.size:
2394 while in_buffer.pos < in_buffer.size:
1832 zresult = lib.ZSTD_decompress_generic(self._dctx, out_buffer, in_buffer)
2395 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
1833 if lib.ZSTD_isError(zresult):
2396 if lib.ZSTD_isError(zresult):
1834 raise ZstdError('zstd decompressor error: %s' %
2397 raise ZstdError('zstd decompressor error: %s' %
1835 _zstd_error(zresult))
2398 _zstd_error(zresult))
@@ -1881,7 +2444,7 b' class ZstdDecompressor(object):'
1881 in_buffer.size = len(chunk_buffer)
2444 in_buffer.size = len(chunk_buffer)
1882 in_buffer.pos = 0
2445 in_buffer.pos = 0
1883
2446
1884 zresult = lib.ZSTD_decompress_generic(self._dctx, out_buffer, in_buffer)
2447 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
1885 if lib.ZSTD_isError(zresult):
2448 if lib.ZSTD_isError(zresult):
1886 raise ZstdError('could not decompress chunk 0: %s' %
2449 raise ZstdError('could not decompress chunk 0: %s' %
1887 _zstd_error(zresult))
2450 _zstd_error(zresult))
@@ -1918,7 +2481,7 b' class ZstdDecompressor(object):'
1918 in_buffer.size = len(chunk_buffer)
2481 in_buffer.size = len(chunk_buffer)
1919 in_buffer.pos = 0
2482 in_buffer.pos = 0
1920
2483
1921 zresult = lib.ZSTD_decompress_generic(self._dctx, out_buffer, in_buffer)
2484 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
1922 if lib.ZSTD_isError(zresult):
2485 if lib.ZSTD_isError(zresult):
1923 raise ZstdError('could not decompress chunk %d: %s' %
2486 raise ZstdError('could not decompress chunk %d: %s' %
1924 _zstd_error(zresult))
2487 _zstd_error(zresult))
@@ -1931,7 +2494,7 b' class ZstdDecompressor(object):'
1931 return ffi.buffer(last_buffer, len(last_buffer))[:]
2494 return ffi.buffer(last_buffer, len(last_buffer))[:]
1932
2495
1933 def _ensure_dctx(self, load_dict=True):
2496 def _ensure_dctx(self, load_dict=True):
1934 lib.ZSTD_DCtx_reset(self._dctx)
2497 lib.ZSTD_DCtx_reset(self._dctx, lib.ZSTD_reset_session_only)
1935
2498
1936 if self._max_window_size:
2499 if self._max_window_size:
1937 zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx,
2500 zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx,
@@ -210,7 +210,7 b' void zstd_module_init(PyObject* m) {'
210 We detect this mismatch here and refuse to load the module if this
210 We detect this mismatch here and refuse to load the module if this
211 scenario is detected.
211 scenario is detected.
212 */
212 */
213 if (ZSTD_VERSION_NUMBER != 10306 || ZSTD_versionNumber() != 10306) {
213 if (ZSTD_VERSION_NUMBER != 10308 || ZSTD_versionNumber() != 10308) {
214 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
214 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
215 return;
215 return;
216 }
216 }
@@ -339,17 +339,10 b' MEM_STATIC size_t BIT_getUpperBits(size_'
339
339
340 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
340 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
341 {
341 {
342 #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */
342 U32 const regMask = sizeof(bitContainer)*8 - 1;
343 # if defined(__x86_64__)
343 /* if start > regMask, bitstream is corrupted, and result is undefined */
344 if (sizeof(bitContainer)==8)
345 return _bextr_u64(bitContainer, start, nbBits);
346 else
347 # endif
348 return _bextr_u32(bitContainer, start, nbBits);
349 #else
350 assert(nbBits < BIT_MASK_SIZE);
344 assert(nbBits < BIT_MASK_SIZE);
351 return (bitContainer >> start) & BIT_mask[nbBits];
345 return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
352 #endif
353 }
346 }
354
347
355 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
348 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
@@ -366,9 +359,13 b' MEM_STATIC size_t BIT_getLowerBits(size_'
366 * @return : value extracted */
359 * @return : value extracted */
367 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
360 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
368 {
361 {
369 #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
362 /* arbitrate between double-shift and shift+mask */
363 #if 1
364 /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
365 * bitstream is likely corrupted, and result is undefined */
370 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
366 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
371 #else
367 #else
368 /* this code path is slower on my os-x laptop */
372 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
369 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
373 return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
370 return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
374 #endif
371 #endif
@@ -392,7 +389,7 b' MEM_STATIC void BIT_skipBits(BIT_DStream'
392 * Read (consume) next n bits from local register and update.
389 * Read (consume) next n bits from local register and update.
393 * Pay attention to not read more than nbBits contained into local register.
390 * Pay attention to not read more than nbBits contained into local register.
394 * @return : extracted value. */
391 * @return : extracted value. */
395 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
392 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
396 {
393 {
397 size_t const value = BIT_lookBits(bitD, nbBits);
394 size_t const value = BIT_lookBits(bitD, nbBits);
398 BIT_skipBits(bitD, nbBits);
395 BIT_skipBits(bitD, nbBits);
@@ -401,7 +398,7 b' MEM_STATIC size_t BIT_readBits(BIT_DStre'
401
398
402 /*! BIT_readBitsFast() :
399 /*! BIT_readBitsFast() :
403 * unsafe version; only works only if nbBits >= 1 */
400 * unsafe version; only works only if nbBits >= 1 */
404 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
401 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
405 {
402 {
406 size_t const value = BIT_lookBitsFast(bitD, nbBits);
403 size_t const value = BIT_lookBitsFast(bitD, nbBits);
407 assert(nbBits >= 1);
404 assert(nbBits >= 1);
@@ -15,6 +15,8 b''
15 * Compiler specifics
15 * Compiler specifics
16 *********************************************************/
16 *********************************************************/
17 /* force inlining */
17 /* force inlining */
18
19 #if !defined(ZSTD_NO_INLINE)
18 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
20 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
19 # define INLINE_KEYWORD inline
21 # define INLINE_KEYWORD inline
20 #else
22 #else
@@ -29,6 +31,13 b''
29 # define FORCE_INLINE_ATTR
31 # define FORCE_INLINE_ATTR
30 #endif
32 #endif
31
33
34 #else
35
36 #define INLINE_KEYWORD
37 #define FORCE_INLINE_ATTR
38
39 #endif
40
32 /**
41 /**
33 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
42 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
34 * parameters. They must be inlined for the compiler to elimininate the constant
43 * parameters. They must be inlined for the compiler to elimininate the constant
@@ -89,23 +98,21 b''
89 #endif
98 #endif
90
99
91 /* prefetch
100 /* prefetch
92 * can be disabled, by declaring NO_PREFETCH macro
101 * can be disabled, by declaring NO_PREFETCH build macro */
93 * All prefetch invocations use a single default locality 2,
94 * generating instruction prefetcht1,
95 * which, according to Intel, means "load data into L2 cache".
96 * This is a good enough "middle ground" for the time being,
97 * though in theory, it would be better to specialize locality depending on data being prefetched.
98 * Tests could not determine any sensible difference based on locality value. */
99 #if defined(NO_PREFETCH)
102 #if defined(NO_PREFETCH)
100 # define PREFETCH(ptr) (void)(ptr) /* disabled */
103 # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
104 # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
101 #else
105 #else
102 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
106 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
103 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
107 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
104 # define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
108 # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
109 # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
105 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
110 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
106 # define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
111 # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
112 # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
107 # else
113 # else
108 # define PREFETCH(ptr) (void)(ptr) /* disabled */
114 # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
115 # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
109 # endif
116 # endif
110 #endif /* NO_PREFETCH */
117 #endif /* NO_PREFETCH */
111
118
@@ -116,7 +123,7 b''
116 size_t const _size = (size_t)(s); \
123 size_t const _size = (size_t)(s); \
117 size_t _pos; \
124 size_t _pos; \
118 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
125 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
119 PREFETCH(_ptr + _pos); \
126 PREFETCH_L2(_ptr + _pos); \
120 } \
127 } \
121 }
128 }
122
129
@@ -78,7 +78,7 b' MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void)'
78 __asm__(
78 __asm__(
79 "pushl %%ebx\n\t"
79 "pushl %%ebx\n\t"
80 "cpuid\n\t"
80 "cpuid\n\t"
81 "movl %%ebx, %%eax\n\r"
81 "movl %%ebx, %%eax\n\t"
82 "popl %%ebx"
82 "popl %%ebx"
83 : "=a"(f7b), "=c"(f7c)
83 : "=a"(f7b), "=c"(f7c)
84 : "a"(7), "c"(0)
84 : "a"(7), "c"(0)
@@ -57,9 +57,9 b' extern "C" {'
57 #endif
57 #endif
58
58
59
59
60 /* static assert is triggered at compile time, leaving no runtime artefact,
60 /* static assert is triggered at compile time, leaving no runtime artefact.
61 * but can only work with compile-time constants.
61 * static assert only works with compile-time constants.
62 * This variant can only be used inside a function. */
62 * Also, this variant can only be used inside a function. */
63 #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
63 #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
64
64
65
65
@@ -70,9 +70,19 b' extern "C" {'
70 # define DEBUGLEVEL 0
70 # define DEBUGLEVEL 0
71 #endif
71 #endif
72
72
73
74 /* DEBUGFILE can be defined externally,
75 * typically through compiler command line.
76 * note : currently useless.
77 * Value must be stderr or stdout */
78 #ifndef DEBUGFILE
79 # define DEBUGFILE stderr
80 #endif
81
82
73 /* recommended values for DEBUGLEVEL :
83 /* recommended values for DEBUGLEVEL :
74 * 0 : no debug, all run-time functions disabled
84 * 0 : release mode, no debug, all run-time checks disabled
75 * 1 : no display, enables assert() only
85 * 1 : enables assert() only, no display
76 * 2 : reserved, for currently active debug path
86 * 2 : reserved, for currently active debug path
77 * 3 : events once per object lifetime (CCtx, CDict, etc.)
87 * 3 : events once per object lifetime (CCtx, CDict, etc.)
78 * 4 : events once per frame
88 * 4 : events once per frame
@@ -81,7 +91,7 b' extern "C" {'
81 * 7+: events at every position (*very* verbose)
91 * 7+: events at every position (*very* verbose)
82 *
92 *
83 * It's generally inconvenient to output traces > 5.
93 * It's generally inconvenient to output traces > 5.
84 * In which case, it's possible to selectively enable higher verbosity levels
94 * In which case, it's possible to selectively trigger high verbosity levels
85 * by modifying g_debug_level.
95 * by modifying g_debug_level.
86 */
96 */
87
97
@@ -95,11 +105,12 b' extern "C" {'
95
105
96 #if (DEBUGLEVEL>=2)
106 #if (DEBUGLEVEL>=2)
97 # include <stdio.h>
107 # include <stdio.h>
98 extern int g_debuglevel; /* here, this variable is only declared,
108 extern int g_debuglevel; /* the variable is only declared,
99 it actually lives in debug.c,
109 it actually lives in debug.c,
100 and is shared by the whole process.
110 and is shared by the whole process.
101 It's typically used to enable very verbose levels
111 It's not thread-safe.
102 on selective conditions (such as position in src) */
112 It's useful when enabling very verbose levels
113 on selective conditions (such as position in src) */
103
114
104 # define RAWLOG(l, ...) { \
115 # define RAWLOG(l, ...) { \
105 if (l<=g_debuglevel) { \
116 if (l<=g_debuglevel) { \
@@ -14,6 +14,10 b''
14
14
15 const char* ERR_getErrorString(ERR_enum code)
15 const char* ERR_getErrorString(ERR_enum code)
16 {
16 {
17 #ifdef ZSTD_STRIP_ERROR_STRINGS
18 (void)code;
19 return "Error strings stripped";
20 #else
17 static const char* const notErrorCode = "Unspecified error code";
21 static const char* const notErrorCode = "Unspecified error code";
18 switch( code )
22 switch( code )
19 {
23 {
@@ -39,10 +43,12 b' const char* ERR_getErrorString(ERR_enum '
39 case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
43 case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
40 case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
44 case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
41 case PREFIX(srcSize_wrong): return "Src size is incorrect";
45 case PREFIX(srcSize_wrong): return "Src size is incorrect";
46 case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
42 /* following error codes are not stable and may be removed or changed in a future version */
47 /* following error codes are not stable and may be removed or changed in a future version */
43 case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
48 case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
44 case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
49 case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
45 case PREFIX(maxCode):
50 case PREFIX(maxCode):
46 default: return notErrorCode;
51 default: return notErrorCode;
47 }
52 }
53 #endif
48 }
54 }
@@ -512,7 +512,7 b' MEM_STATIC void FSE_initCState(FSE_CStat'
512 const U32 tableLog = MEM_read16(ptr);
512 const U32 tableLog = MEM_read16(ptr);
513 statePtr->value = (ptrdiff_t)1<<tableLog;
513 statePtr->value = (ptrdiff_t)1<<tableLog;
514 statePtr->stateTable = u16ptr+2;
514 statePtr->stateTable = u16ptr+2;
515 statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
515 statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
516 statePtr->stateLog = tableLog;
516 statePtr->stateLog = tableLog;
517 }
517 }
518
518
@@ -531,7 +531,7 b' MEM_STATIC void FSE_initCState2(FSE_CSta'
531 }
531 }
532 }
532 }
533
533
534 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
534 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
535 {
535 {
536 FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
536 FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
537 const U16* const stateTable = (const U16*)(statePtr->stateTable);
537 const U16* const stateTable = (const U16*)(statePtr->stateTable);
@@ -173,15 +173,19 b' typedef U32 HUF_DTable;'
173 * Advanced decompression functions
173 * Advanced decompression functions
174 ******************************************/
174 ******************************************/
175 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
175 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
176 #ifndef HUF_FORCE_DECOMPRESS_X1
176 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
177 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
178 #endif
177
179
178 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
180 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
179 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
181 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
180 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
182 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
181 size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
183 size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
182 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
184 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
185 #ifndef HUF_FORCE_DECOMPRESS_X1
183 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
186 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
184 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
187 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
188 #endif
185
189
186
190
187 /* ****************************************
191 /* ****************************************
@@ -228,7 +232,7 b' size_t HUF_compress4X_repeat(void* dst, '
228 #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
232 #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
229 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
233 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
230 size_t HUF_buildCTable_wksp (HUF_CElt* tree,
234 size_t HUF_buildCTable_wksp (HUF_CElt* tree,
231 const U32* count, U32 maxSymbolValue, U32 maxNbBits,
235 const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
232 void* workSpace, size_t wkspSize);
236 void* workSpace, size_t wkspSize);
233
237
234 /*! HUF_readStats() :
238 /*! HUF_readStats() :
@@ -277,14 +281,22 b' U32 HUF_selectDecoder (size_t dstSize, s'
277 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
281 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
278 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
282 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
279
283
284 #ifndef HUF_FORCE_DECOMPRESS_X2
280 size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
285 size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
281 size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
286 size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
287 #endif
288 #ifndef HUF_FORCE_DECOMPRESS_X1
282 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
289 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
283 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
290 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
291 #endif
284
292
285 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
293 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
294 #ifndef HUF_FORCE_DECOMPRESS_X2
286 size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
295 size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
296 #endif
297 #ifndef HUF_FORCE_DECOMPRESS_X1
287 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
298 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
299 #endif
288
300
289
301
290 /* ====================== */
302 /* ====================== */
@@ -306,24 +318,36 b' size_t HUF_compress1X_repeat(void* dst, '
306 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
318 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
307
319
308 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
320 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
321 #ifndef HUF_FORCE_DECOMPRESS_X1
309 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
322 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
323 #endif
310
324
311 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
325 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
312 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
326 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
327 #ifndef HUF_FORCE_DECOMPRESS_X2
313 size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
328 size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
314 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
329 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
330 #endif
331 #ifndef HUF_FORCE_DECOMPRESS_X1
315 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
332 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
316 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
333 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
334 #endif
317
335
318 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
336 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
337 #ifndef HUF_FORCE_DECOMPRESS_X2
319 size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
338 size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
339 #endif
340 #ifndef HUF_FORCE_DECOMPRESS_X1
320 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
341 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
342 #endif
321
343
322 /* BMI2 variants.
344 /* BMI2 variants.
323 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
345 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
324 */
346 */
325 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
347 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
348 #ifndef HUF_FORCE_DECOMPRESS_X2
326 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
349 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
350 #endif
327 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
351 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
328 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
352 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
329
353
@@ -39,6 +39,10 b' extern "C" {'
39 # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
39 # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
40 #endif
40 #endif
41
41
42 #ifndef __has_builtin
43 # define __has_builtin(x) 0 /* compat. with non-clang compilers */
44 #endif
45
42 /* code only tested on 32 and 64 bits systems */
46 /* code only tested on 32 and 64 bits systems */
43 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
47 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
44 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
48 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
@@ -198,7 +202,8 b' MEM_STATIC U32 MEM_swap32(U32 in)'
198 {
202 {
199 #if defined(_MSC_VER) /* Visual Studio */
203 #if defined(_MSC_VER) /* Visual Studio */
200 return _byteswap_ulong(in);
204 return _byteswap_ulong(in);
201 #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
205 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
206 || (defined(__clang__) && __has_builtin(__builtin_bswap32))
202 return __builtin_bswap32(in);
207 return __builtin_bswap32(in);
203 #else
208 #else
204 return ((in << 24) & 0xff000000 ) |
209 return ((in << 24) & 0xff000000 ) |
@@ -212,7 +217,8 b' MEM_STATIC U64 MEM_swap64(U64 in)'
212 {
217 {
213 #if defined(_MSC_VER) /* Visual Studio */
218 #if defined(_MSC_VER) /* Visual Studio */
214 return _byteswap_uint64(in);
219 return _byteswap_uint64(in);
215 #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
220 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
221 || (defined(__clang__) && __has_builtin(__builtin_bswap64))
216 return __builtin_bswap64(in);
222 return __builtin_bswap64(in);
217 #else
223 #else
218 return ((in << 56) & 0xff00000000000000ULL) |
224 return ((in << 56) & 0xff00000000000000ULL) |
@@ -88,8 +88,8 b' static void* POOL_thread(void* opaque) {'
88 ctx->numThreadsBusy++;
88 ctx->numThreadsBusy++;
89 ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
89 ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
90 /* Unlock the mutex, signal a pusher, and run the job */
90 /* Unlock the mutex, signal a pusher, and run the job */
91 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
91 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
92 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
92 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
93
93
94 job.function(job.opaque);
94 job.function(job.opaque);
95
95
@@ -30,8 +30,10 b' const char* ZSTD_versionString(void) { r'
30 /*-****************************************
30 /*-****************************************
31 * ZSTD Error Management
31 * ZSTD Error Management
32 ******************************************/
32 ******************************************/
33 #undef ZSTD_isError /* defined within zstd_internal.h */
33 /*! ZSTD_isError() :
34 /*! ZSTD_isError() :
34 * tells if a return value is an error code */
35 * tells if a return value is an error code
36 * symbol is required for external callers */
35 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
37 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
36
38
37 /*! ZSTD_getErrorName() :
39 /*! ZSTD_getErrorName() :
@@ -72,6 +72,7 b' typedef enum {'
72 ZSTD_error_workSpace_tooSmall= 66,
72 ZSTD_error_workSpace_tooSmall= 66,
73 ZSTD_error_dstSize_tooSmall = 70,
73 ZSTD_error_dstSize_tooSmall = 70,
74 ZSTD_error_srcSize_wrong = 72,
74 ZSTD_error_srcSize_wrong = 72,
75 ZSTD_error_dstBuffer_null = 74,
75 /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
76 /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
76 ZSTD_error_frameIndex_tooLarge = 100,
77 ZSTD_error_frameIndex_tooLarge = 100,
77 ZSTD_error_seekableIO = 102,
78 ZSTD_error_seekableIO = 102,
@@ -41,6 +41,9 b' extern "C" {'
41
41
42 /* ---- static assert (debug) --- */
42 /* ---- static assert (debug) --- */
43 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
43 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
44 #define ZSTD_isError ERR_isError /* for inlining */
45 #define FSE_isError ERR_isError
46 #define HUF_isError ERR_isError
44
47
45
48
46 /*-*************************************
49 /*-*************************************
@@ -75,7 +78,6 b' static const U32 repStartValue[ZSTD_REP_'
75 #define BIT0 1
78 #define BIT0 1
76
79
77 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
80 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
78 #define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */
79 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
81 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
80 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
82 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
81
83
@@ -242,7 +244,7 b' typedef struct {'
242 blockType_e blockType;
244 blockType_e blockType;
243 U32 lastBlock;
245 U32 lastBlock;
244 U32 origSize;
246 U32 origSize;
245 } blockProperties_t;
247 } blockProperties_t; /* declared here for decompress and fullbench */
246
248
247 /*! ZSTD_getcBlockSize() :
249 /*! ZSTD_getcBlockSize() :
248 * Provides the size of compressed block from block header `src` */
250 * Provides the size of compressed block from block header `src` */
@@ -250,6 +252,13 b' typedef struct {'
250 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
252 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
251 blockProperties_t* bpPtr);
253 blockProperties_t* bpPtr);
252
254
255 /*! ZSTD_decodeSeqHeaders() :
256 * decode sequence header from src */
257 /* Used by: decompress, fullbench (does not get its definition from here) */
258 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
259 const void* src, size_t srcSize);
260
261
253 #if defined (__cplusplus)
262 #if defined (__cplusplus)
254 }
263 }
255 #endif
264 #endif
@@ -115,7 +115,7 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
115 /* symbol start positions */
115 /* symbol start positions */
116 { U32 u;
116 { U32 u;
117 cumul[0] = 0;
117 cumul[0] = 0;
118 for (u=1; u<=maxSymbolValue+1; u++) {
118 for (u=1; u <= maxSymbolValue+1; u++) {
119 if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
119 if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
120 cumul[u] = cumul[u-1] + 1;
120 cumul[u] = cumul[u-1] + 1;
121 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
121 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
@@ -658,7 +658,7 b' size_t FSE_compress_wksp (void* dst, siz'
658 BYTE* op = ostart;
658 BYTE* op = ostart;
659 BYTE* const oend = ostart + dstSize;
659 BYTE* const oend = ostart + dstSize;
660
660
661 U32 count[FSE_MAX_SYMBOL_VALUE+1];
661 unsigned count[FSE_MAX_SYMBOL_VALUE+1];
662 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
662 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
663 FSE_CTable* CTable = (FSE_CTable*)workSpace;
663 FSE_CTable* CTable = (FSE_CTable*)workSpace;
664 size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
664 size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
@@ -672,7 +672,7 b' size_t FSE_compress_wksp (void* dst, siz'
672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
673
673
674 /* Scan input and build symbol stats */
674 /* Scan input and build symbol stats */
675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
@@ -73,6 +73,7 b' unsigned HIST_count_simple(unsigned* cou'
73 return largestCount;
73 return largestCount;
74 }
74 }
75
75
76 typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
76
77
77 /* HIST_count_parallel_wksp() :
78 /* HIST_count_parallel_wksp() :
78 * store histogram into 4 intermediate tables, recombined at the end.
79 * store histogram into 4 intermediate tables, recombined at the end.
@@ -85,8 +86,8 b' unsigned HIST_count_simple(unsigned* cou'
85 static size_t HIST_count_parallel_wksp(
86 static size_t HIST_count_parallel_wksp(
86 unsigned* count, unsigned* maxSymbolValuePtr,
87 unsigned* count, unsigned* maxSymbolValuePtr,
87 const void* source, size_t sourceSize,
88 const void* source, size_t sourceSize,
88 unsigned checkMax,
89 HIST_checkInput_e check,
89 unsigned* const workSpace)
90 U32* const workSpace)
90 {
91 {
91 const BYTE* ip = (const BYTE*)source;
92 const BYTE* ip = (const BYTE*)source;
92 const BYTE* const iend = ip+sourceSize;
93 const BYTE* const iend = ip+sourceSize;
@@ -137,7 +138,7 b' static size_t HIST_count_parallel_wksp('
137 /* finish last symbols */
138 /* finish last symbols */
138 while (ip<iend) Counting1[*ip++]++;
139 while (ip<iend) Counting1[*ip++]++;
139
140
140 if (checkMax) { /* verify stats will fit into destination table */
141 if (check) { /* verify stats will fit into destination table */
141 U32 s; for (s=255; s>maxSymbolValue; s--) {
142 U32 s; for (s=255; s>maxSymbolValue; s--) {
142 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
143 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
143 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
144 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
@@ -157,14 +158,18 b' static size_t HIST_count_parallel_wksp('
157
158
158 /* HIST_countFast_wksp() :
159 /* HIST_countFast_wksp() :
159 * Same as HIST_countFast(), but using an externally provided scratch buffer.
160 * Same as HIST_countFast(), but using an externally provided scratch buffer.
160 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
161 * `workSpace` is a writable buffer which must be 4-bytes aligned,
162 * `workSpaceSize` must be >= HIST_WKSP_SIZE
163 */
161 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
164 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
162 const void* source, size_t sourceSize,
165 const void* source, size_t sourceSize,
163 unsigned* workSpace)
166 void* workSpace, size_t workSpaceSize)
164 {
167 {
165 if (sourceSize < 1500) /* heuristic threshold */
168 if (sourceSize < 1500) /* heuristic threshold */
166 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
169 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
167 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
170 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
171 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
172 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
168 }
173 }
169
174
170 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
175 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
@@ -172,24 +177,27 b' size_t HIST_countFast(unsigned* count, u'
172 const void* source, size_t sourceSize)
177 const void* source, size_t sourceSize)
173 {
178 {
174 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
179 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
175 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
180 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
176 }
181 }
177
182
178 /* HIST_count_wksp() :
183 /* HIST_count_wksp() :
179 * Same as HIST_count(), but using an externally provided scratch buffer.
184 * Same as HIST_count(), but using an externally provided scratch buffer.
180 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
185 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
181 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
186 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
182 const void* source, size_t sourceSize, unsigned* workSpace)
187 const void* source, size_t sourceSize,
188 void* workSpace, size_t workSpaceSize)
183 {
189 {
190 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
191 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
184 if (*maxSymbolValuePtr < 255)
192 if (*maxSymbolValuePtr < 255)
185 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
193 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
186 *maxSymbolValuePtr = 255;
194 *maxSymbolValuePtr = 255;
187 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
195 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
188 }
196 }
189
197
190 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
198 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
191 const void* src, size_t srcSize)
199 const void* src, size_t srcSize)
192 {
200 {
193 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
201 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
194 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
202 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
195 }
203 }
@@ -41,11 +41,11 b''
41
41
42 /*! HIST_count():
42 /*! HIST_count():
43 * Provides the precise count of each byte within a table 'count'.
43 * Provides the precise count of each byte within a table 'count'.
44 * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
44 * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
45 * Updates *maxSymbolValuePtr with actual largest symbol value detected.
45 * Updates *maxSymbolValuePtr with actual largest symbol value detected.
46 * @return : count of the most frequent symbol (which isn't identified).
46 * @return : count of the most frequent symbol (which isn't identified).
47 * or an error code, which can be tested using HIST_isError().
47 * or an error code, which can be tested using HIST_isError().
48 * note : if return == srcSize, there is only one symbol.
48 * note : if return == srcSize, there is only one symbol.
49 */
49 */
50 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
50 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
51 const void* src, size_t srcSize);
51 const void* src, size_t srcSize);
@@ -56,14 +56,16 b' unsigned HIST_isError(size_t code); /**'
56 /* --- advanced histogram functions --- */
56 /* --- advanced histogram functions --- */
57
57
58 #define HIST_WKSP_SIZE_U32 1024
58 #define HIST_WKSP_SIZE_U32 1024
59 #define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
59 /** HIST_count_wksp() :
60 /** HIST_count_wksp() :
60 * Same as HIST_count(), but using an externally provided scratch buffer.
61 * Same as HIST_count(), but using an externally provided scratch buffer.
61 * Benefit is this function will use very little stack space.
62 * Benefit is this function will use very little stack space.
62 * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
63 * `workSpace` is a writable buffer which must be 4-bytes aligned,
64 * `workSpaceSize` must be >= HIST_WKSP_SIZE
63 */
65 */
64 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
66 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
65 const void* src, size_t srcSize,
67 const void* src, size_t srcSize,
66 unsigned* workSpace);
68 void* workSpace, size_t workSpaceSize);
67
69
68 /** HIST_countFast() :
70 /** HIST_countFast() :
69 * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
71 * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
@@ -74,11 +76,12 b' size_t HIST_countFast(unsigned* count, u'
74
76
75 /** HIST_countFast_wksp() :
77 /** HIST_countFast_wksp() :
76 * Same as HIST_countFast(), but using an externally provided scratch buffer.
78 * Same as HIST_countFast(), but using an externally provided scratch buffer.
77 * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
79 * `workSpace` is a writable buffer which must be 4-bytes aligned,
80 * `workSpaceSize` must be >= HIST_WKSP_SIZE
78 */
81 */
79 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
82 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
80 const void* src, size_t srcSize,
83 const void* src, size_t srcSize,
81 unsigned* workSpace);
84 void* workSpace, size_t workSpaceSize);
82
85
83 /*! HIST_count_simple() :
86 /*! HIST_count_simple() :
84 * Same as HIST_countFast(), this function is unsafe,
87 * Same as HIST_countFast(), this function is unsafe,
@@ -88,13 +88,13 b' static size_t HUF_compressWeights (void*'
88 BYTE* op = ostart;
88 BYTE* op = ostart;
89 BYTE* const oend = ostart + dstSize;
89 BYTE* const oend = ostart + dstSize;
90
90
91 U32 maxSymbolValue = HUF_TABLELOG_MAX;
91 unsigned maxSymbolValue = HUF_TABLELOG_MAX;
92 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
92 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
93
93
94 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
94 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
95 BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
95 BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
96
96
97 U32 count[HUF_TABLELOG_MAX+1];
97 unsigned count[HUF_TABLELOG_MAX+1];
98 S16 norm[HUF_TABLELOG_MAX+1];
98 S16 norm[HUF_TABLELOG_MAX+1];
99
99
100 /* init conditions */
100 /* init conditions */
@@ -134,7 +134,7 b' struct HUF_CElt_s {'
134 `CTable` : Huffman tree to save, using huf representation.
134 `CTable` : Huffman tree to save, using huf representation.
135 @return : size of saved CTable */
135 @return : size of saved CTable */
136 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
136 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
137 const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
137 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
138 {
138 {
139 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
139 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
140 BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
140 BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
@@ -169,7 +169,7 b' size_t HUF_writeCTable (void* dst, size_'
169 }
169 }
170
170
171
171
172 size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize)
172 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
173 {
173 {
174 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
174 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
175 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
175 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
@@ -315,7 +315,7 b' typedef struct {'
315 U32 current;
315 U32 current;
316 } rankPos;
316 } rankPos;
317
317
318 static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
318 static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
319 {
319 {
320 rankPos rank[32];
320 rankPos rank[32];
321 U32 n;
321 U32 n;
@@ -347,7 +347,7 b' static void HUF_sort(nodeElt* huffNode, '
347 */
347 */
348 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
348 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
349 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
349 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
350 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
350 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
351 {
351 {
352 nodeElt* const huffNode0 = (nodeElt*)workSpace;
352 nodeElt* const huffNode0 = (nodeElt*)workSpace;
353 nodeElt* const huffNode = huffNode0+1;
353 nodeElt* const huffNode = huffNode0+1;
@@ -421,7 +421,7 b' size_t HUF_buildCTable_wksp (HUF_CElt* t'
421 * @return : maxNbBits
421 * @return : maxNbBits
422 * Note : count is used before tree is written, so they can safely overlap
422 * Note : count is used before tree is written, so they can safely overlap
423 */
423 */
424 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
424 size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
425 {
425 {
426 huffNodeTable nodeTable;
426 huffNodeTable nodeTable;
427 return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
427 return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
@@ -610,13 +610,14 b' size_t HUF_compress4X_usingCTable(void* '
610 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
610 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
611 }
611 }
612
612
613 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
613
614
614 static size_t HUF_compressCTable_internal(
615 static size_t HUF_compressCTable_internal(
615 BYTE* const ostart, BYTE* op, BYTE* const oend,
616 BYTE* const ostart, BYTE* op, BYTE* const oend,
616 const void* src, size_t srcSize,
617 const void* src, size_t srcSize,
617 unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
618 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
618 {
619 {
619 size_t const cSize = singleStream ?
620 size_t const cSize = (nbStreams==HUF_singleStream) ?
620 HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
621 HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
621 HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
622 HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
622 if (HUF_isError(cSize)) { return cSize; }
623 if (HUF_isError(cSize)) { return cSize; }
@@ -628,21 +629,21 b' static size_t HUF_compressCTable_interna'
628 }
629 }
629
630
630 typedef struct {
631 typedef struct {
631 U32 count[HUF_SYMBOLVALUE_MAX + 1];
632 unsigned count[HUF_SYMBOLVALUE_MAX + 1];
632 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
633 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
633 huffNodeTable nodeTable;
634 huffNodeTable nodeTable;
634 } HUF_compress_tables_t;
635 } HUF_compress_tables_t;
635
636
636 /* HUF_compress_internal() :
637 /* HUF_compress_internal() :
637 * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
638 * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
638 static size_t HUF_compress_internal (
639 static size_t
639 void* dst, size_t dstSize,
640 HUF_compress_internal (void* dst, size_t dstSize,
640 const void* src, size_t srcSize,
641 const void* src, size_t srcSize,
641 unsigned maxSymbolValue, unsigned huffLog,
642 unsigned maxSymbolValue, unsigned huffLog,
642 unsigned singleStream,
643 HUF_nbStreams_e nbStreams,
643 void* workSpace, size_t wkspSize,
644 void* workSpace, size_t wkspSize,
644 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
645 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
645 const int bmi2)
646 const int bmi2)
646 {
647 {
647 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
648 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
648 BYTE* const ostart = (BYTE*)dst;
649 BYTE* const ostart = (BYTE*)dst;
@@ -651,7 +652,7 b' static size_t HUF_compress_internal ('
651
652
652 /* checks & inits */
653 /* checks & inits */
653 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
654 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
654 if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
655 if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
655 if (!srcSize) return 0; /* Uncompressed */
656 if (!srcSize) return 0; /* Uncompressed */
656 if (!dstSize) return 0; /* cannot fit anything within dst budget */
657 if (!dstSize) return 0; /* cannot fit anything within dst budget */
657 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
658 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -664,11 +665,11 b' static size_t HUF_compress_internal ('
664 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
665 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
665 return HUF_compressCTable_internal(ostart, op, oend,
666 return HUF_compressCTable_internal(ostart, op, oend,
666 src, srcSize,
667 src, srcSize,
667 singleStream, oldHufTable, bmi2);
668 nbStreams, oldHufTable, bmi2);
668 }
669 }
669
670
670 /* Scan input and build symbol stats */
671 /* Scan input and build symbol stats */
671 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
672 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
672 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
673 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
673 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
674 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
674 }
675 }
@@ -683,14 +684,15 b' static size_t HUF_compress_internal ('
683 if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
684 if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
684 return HUF_compressCTable_internal(ostart, op, oend,
685 return HUF_compressCTable_internal(ostart, op, oend,
685 src, srcSize,
686 src, srcSize,
686 singleStream, oldHufTable, bmi2);
687 nbStreams, oldHufTable, bmi2);
687 }
688 }
688
689
689 /* Build Huffman Tree */
690 /* Build Huffman Tree */
690 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
691 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
691 { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
692 { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
692 maxSymbolValue, huffLog,
693 maxSymbolValue, huffLog,
693 table->nodeTable, sizeof(table->nodeTable)) );
694 table->nodeTable, sizeof(table->nodeTable));
695 CHECK_F(maxBits);
694 huffLog = (U32)maxBits;
696 huffLog = (U32)maxBits;
695 /* Zero unused symbols in CTable, so we can check it for validity */
697 /* Zero unused symbols in CTable, so we can check it for validity */
696 memset(table->CTable + (maxSymbolValue + 1), 0,
698 memset(table->CTable + (maxSymbolValue + 1), 0,
@@ -706,7 +708,7 b' static size_t HUF_compress_internal ('
706 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
708 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
707 return HUF_compressCTable_internal(ostart, op, oend,
709 return HUF_compressCTable_internal(ostart, op, oend,
708 src, srcSize,
710 src, srcSize,
709 singleStream, oldHufTable, bmi2);
711 nbStreams, oldHufTable, bmi2);
710 } }
712 } }
711
713
712 /* Use the new huffman table */
714 /* Use the new huffman table */
@@ -718,7 +720,7 b' static size_t HUF_compress_internal ('
718 }
720 }
719 return HUF_compressCTable_internal(ostart, op, oend,
721 return HUF_compressCTable_internal(ostart, op, oend,
720 src, srcSize,
722 src, srcSize,
721 singleStream, table->CTable, bmi2);
723 nbStreams, table->CTable, bmi2);
722 }
724 }
723
725
724
726
@@ -728,7 +730,7 b' size_t HUF_compress1X_wksp (void* dst, s'
728 void* workSpace, size_t wkspSize)
730 void* workSpace, size_t wkspSize)
729 {
731 {
730 return HUF_compress_internal(dst, dstSize, src, srcSize,
732 return HUF_compress_internal(dst, dstSize, src, srcSize,
731 maxSymbolValue, huffLog, 1 /*single stream*/,
733 maxSymbolValue, huffLog, HUF_singleStream,
732 workSpace, wkspSize,
734 workSpace, wkspSize,
733 NULL, NULL, 0, 0 /*bmi2*/);
735 NULL, NULL, 0, 0 /*bmi2*/);
734 }
736 }
@@ -740,7 +742,7 b' size_t HUF_compress1X_repeat (void* dst,'
740 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
742 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
741 {
743 {
742 return HUF_compress_internal(dst, dstSize, src, srcSize,
744 return HUF_compress_internal(dst, dstSize, src, srcSize,
743 maxSymbolValue, huffLog, 1 /*single stream*/,
745 maxSymbolValue, huffLog, HUF_singleStream,
744 workSpace, wkspSize, hufTable,
746 workSpace, wkspSize, hufTable,
745 repeat, preferRepeat, bmi2);
747 repeat, preferRepeat, bmi2);
746 }
748 }
@@ -762,7 +764,7 b' size_t HUF_compress4X_wksp (void* dst, s'
762 void* workSpace, size_t wkspSize)
764 void* workSpace, size_t wkspSize)
763 {
765 {
764 return HUF_compress_internal(dst, dstSize, src, srcSize,
766 return HUF_compress_internal(dst, dstSize, src, srcSize,
765 maxSymbolValue, huffLog, 0 /*4 streams*/,
767 maxSymbolValue, huffLog, HUF_fourStreams,
766 workSpace, wkspSize,
768 workSpace, wkspSize,
767 NULL, NULL, 0, 0 /*bmi2*/);
769 NULL, NULL, 0, 0 /*bmi2*/);
768 }
770 }
@@ -777,7 +779,7 b' size_t HUF_compress4X_repeat (void* dst,'
777 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
779 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
778 {
780 {
779 return HUF_compress_internal(dst, dstSize, src, srcSize,
781 return HUF_compress_internal(dst, dstSize, src, srcSize,
780 maxSymbolValue, huffLog, 0 /* 4 streams */,
782 maxSymbolValue, huffLog, HUF_fourStreams,
781 workSpace, wkspSize,
783 workSpace, wkspSize,
782 hufTable, repeat, preferRepeat, bmi2);
784 hufTable, repeat, preferRepeat, bmi2);
783 }
785 }
This diff has been collapsed as it changes many lines, (994 lines changed) Show them Hide them
@@ -11,6 +11,7 b''
11 /*-*************************************
11 /*-*************************************
12 * Dependencies
12 * Dependencies
13 ***************************************/
13 ***************************************/
14 #include <limits.h> /* INT_MAX */
14 #include <string.h> /* memset */
15 #include <string.h> /* memset */
15 #include "cpu.h"
16 #include "cpu.h"
16 #include "mem.h"
17 #include "mem.h"
@@ -61,7 +62,7 b' static void ZSTD_initCCtx(ZSTD_CCtx* cct'
61 memset(cctx, 0, sizeof(*cctx));
62 memset(cctx, 0, sizeof(*cctx));
62 cctx->customMem = memManager;
63 cctx->customMem = memManager;
63 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
64 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
64 { size_t const err = ZSTD_CCtx_resetParameters(cctx);
65 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
65 assert(!ZSTD_isError(err));
66 assert(!ZSTD_isError(err));
66 (void)err;
67 (void)err;
67 }
68 }
@@ -128,7 +129,7 b' static size_t ZSTD_sizeof_mtctx(const ZS'
128 #ifdef ZSTD_MULTITHREAD
129 #ifdef ZSTD_MULTITHREAD
129 return ZSTDMT_sizeof_CCtx(cctx->mtctx);
130 return ZSTDMT_sizeof_CCtx(cctx->mtctx);
130 #else
131 #else
131 (void) cctx;
132 (void)cctx;
132 return 0;
133 return 0;
133 #endif
134 #endif
134 }
135 }
@@ -226,9 +227,160 b' static ZSTD_CCtx_params ZSTD_assignParam'
226 return ret;
227 return ret;
227 }
228 }
228
229
229 #define CLAMPCHECK(val,min,max) { \
230 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
230 if (((val)<(min)) | ((val)>(max))) { \
231 {
231 return ERROR(parameter_outOfBound); \
232 ZSTD_bounds bounds = { 0, 0, 0 };
233
234 switch(param)
235 {
236 case ZSTD_c_compressionLevel:
237 bounds.lowerBound = ZSTD_minCLevel();
238 bounds.upperBound = ZSTD_maxCLevel();
239 return bounds;
240
241 case ZSTD_c_windowLog:
242 bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
243 bounds.upperBound = ZSTD_WINDOWLOG_MAX;
244 return bounds;
245
246 case ZSTD_c_hashLog:
247 bounds.lowerBound = ZSTD_HASHLOG_MIN;
248 bounds.upperBound = ZSTD_HASHLOG_MAX;
249 return bounds;
250
251 case ZSTD_c_chainLog:
252 bounds.lowerBound = ZSTD_CHAINLOG_MIN;
253 bounds.upperBound = ZSTD_CHAINLOG_MAX;
254 return bounds;
255
256 case ZSTD_c_searchLog:
257 bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
258 bounds.upperBound = ZSTD_SEARCHLOG_MAX;
259 return bounds;
260
261 case ZSTD_c_minMatch:
262 bounds.lowerBound = ZSTD_MINMATCH_MIN;
263 bounds.upperBound = ZSTD_MINMATCH_MAX;
264 return bounds;
265
266 case ZSTD_c_targetLength:
267 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
268 bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
269 return bounds;
270
271 case ZSTD_c_strategy:
272 bounds.lowerBound = ZSTD_STRATEGY_MIN;
273 bounds.upperBound = ZSTD_STRATEGY_MAX;
274 return bounds;
275
276 case ZSTD_c_contentSizeFlag:
277 bounds.lowerBound = 0;
278 bounds.upperBound = 1;
279 return bounds;
280
281 case ZSTD_c_checksumFlag:
282 bounds.lowerBound = 0;
283 bounds.upperBound = 1;
284 return bounds;
285
286 case ZSTD_c_dictIDFlag:
287 bounds.lowerBound = 0;
288 bounds.upperBound = 1;
289 return bounds;
290
291 case ZSTD_c_nbWorkers:
292 bounds.lowerBound = 0;
293 #ifdef ZSTD_MULTITHREAD
294 bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
295 #else
296 bounds.upperBound = 0;
297 #endif
298 return bounds;
299
300 case ZSTD_c_jobSize:
301 bounds.lowerBound = 0;
302 #ifdef ZSTD_MULTITHREAD
303 bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
304 #else
305 bounds.upperBound = 0;
306 #endif
307 return bounds;
308
309 case ZSTD_c_overlapLog:
310 bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
311 bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
312 return bounds;
313
314 case ZSTD_c_enableLongDistanceMatching:
315 bounds.lowerBound = 0;
316 bounds.upperBound = 1;
317 return bounds;
318
319 case ZSTD_c_ldmHashLog:
320 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
321 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
322 return bounds;
323
324 case ZSTD_c_ldmMinMatch:
325 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
326 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
327 return bounds;
328
329 case ZSTD_c_ldmBucketSizeLog:
330 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
331 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
332 return bounds;
333
334 case ZSTD_c_ldmHashRateLog:
335 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
336 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
337 return bounds;
338
339 /* experimental parameters */
340 case ZSTD_c_rsyncable:
341 bounds.lowerBound = 0;
342 bounds.upperBound = 1;
343 return bounds;
344
345 case ZSTD_c_forceMaxWindow :
346 bounds.lowerBound = 0;
347 bounds.upperBound = 1;
348 return bounds;
349
350 case ZSTD_c_format:
351 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
352 bounds.lowerBound = ZSTD_f_zstd1;
353 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */
354 return bounds;
355
356 case ZSTD_c_forceAttachDict:
357 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
358 bounds.lowerBound = ZSTD_dictDefaultAttach;
359 bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */
360 return bounds;
361
362 default:
363 { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
364 return boundError;
365 }
366 }
367 }
368
369 /* ZSTD_cParam_withinBounds:
370 * @return 1 if value is within cParam bounds,
371 * 0 otherwise */
372 static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
373 {
374 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
375 if (ZSTD_isError(bounds.error)) return 0;
376 if (value < bounds.lowerBound) return 0;
377 if (value > bounds.upperBound) return 0;
378 return 1;
379 }
380
381 #define BOUNDCHECK(cParam, val) { \
382 if (!ZSTD_cParam_withinBounds(cParam,val)) { \
383 return ERROR(parameter_outOfBound); \
232 } }
384 } }
233
385
234
386
@@ -236,38 +388,39 b' static int ZSTD_isUpdateAuthorized(ZSTD_'
236 {
388 {
237 switch(param)
389 switch(param)
238 {
390 {
239 case ZSTD_p_compressionLevel:
391 case ZSTD_c_compressionLevel:
240 case ZSTD_p_hashLog:
392 case ZSTD_c_hashLog:
241 case ZSTD_p_chainLog:
393 case ZSTD_c_chainLog:
242 case ZSTD_p_searchLog:
394 case ZSTD_c_searchLog:
243 case ZSTD_p_minMatch:
395 case ZSTD_c_minMatch:
244 case ZSTD_p_targetLength:
396 case ZSTD_c_targetLength:
245 case ZSTD_p_compressionStrategy:
397 case ZSTD_c_strategy:
246 return 1;
398 return 1;
247
399
248 case ZSTD_p_format:
400 case ZSTD_c_format:
249 case ZSTD_p_windowLog:
401 case ZSTD_c_windowLog:
250 case ZSTD_p_contentSizeFlag:
402 case ZSTD_c_contentSizeFlag:
251 case ZSTD_p_checksumFlag:
403 case ZSTD_c_checksumFlag:
252 case ZSTD_p_dictIDFlag:
404 case ZSTD_c_dictIDFlag:
253 case ZSTD_p_forceMaxWindow :
405 case ZSTD_c_forceMaxWindow :
254 case ZSTD_p_nbWorkers:
406 case ZSTD_c_nbWorkers:
255 case ZSTD_p_jobSize:
407 case ZSTD_c_jobSize:
256 case ZSTD_p_overlapSizeLog:
408 case ZSTD_c_overlapLog:
257 case ZSTD_p_enableLongDistanceMatching:
409 case ZSTD_c_rsyncable:
258 case ZSTD_p_ldmHashLog:
410 case ZSTD_c_enableLongDistanceMatching:
259 case ZSTD_p_ldmMinMatch:
411 case ZSTD_c_ldmHashLog:
260 case ZSTD_p_ldmBucketSizeLog:
412 case ZSTD_c_ldmMinMatch:
261 case ZSTD_p_ldmHashEveryLog:
413 case ZSTD_c_ldmBucketSizeLog:
262 case ZSTD_p_forceAttachDict:
414 case ZSTD_c_ldmHashRateLog:
415 case ZSTD_c_forceAttachDict:
263 default:
416 default:
264 return 0;
417 return 0;
265 }
418 }
266 }
419 }
267
420
268 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
421 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
269 {
422 {
270 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%u, %u)", (U32)param, value);
423 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
271 if (cctx->streamStage != zcss_init) {
424 if (cctx->streamStage != zcss_init) {
272 if (ZSTD_isUpdateAuthorized(param)) {
425 if (ZSTD_isUpdateAuthorized(param)) {
273 cctx->cParamsChanged = 1;
426 cctx->cParamsChanged = 1;
@@ -277,51 +430,52 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*'
277
430
278 switch(param)
431 switch(param)
279 {
432 {
280 case ZSTD_p_format :
433 case ZSTD_c_format :
281 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
434 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
282
435
283 case ZSTD_p_compressionLevel:
436 case ZSTD_c_compressionLevel:
284 if (cctx->cdict) return ERROR(stage_wrong);
437 if (cctx->cdict) return ERROR(stage_wrong);
285 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
438 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
286
439
287 case ZSTD_p_windowLog:
440 case ZSTD_c_windowLog:
288 case ZSTD_p_hashLog:
441 case ZSTD_c_hashLog:
289 case ZSTD_p_chainLog:
442 case ZSTD_c_chainLog:
290 case ZSTD_p_searchLog:
443 case ZSTD_c_searchLog:
291 case ZSTD_p_minMatch:
444 case ZSTD_c_minMatch:
292 case ZSTD_p_targetLength:
445 case ZSTD_c_targetLength:
293 case ZSTD_p_compressionStrategy:
446 case ZSTD_c_strategy:
294 if (cctx->cdict) return ERROR(stage_wrong);
447 if (cctx->cdict) return ERROR(stage_wrong);
295 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
448 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
296
449
297 case ZSTD_p_contentSizeFlag:
450 case ZSTD_c_contentSizeFlag:
298 case ZSTD_p_checksumFlag:
451 case ZSTD_c_checksumFlag:
299 case ZSTD_p_dictIDFlag:
452 case ZSTD_c_dictIDFlag:
300 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
453 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
301
454
302 case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize,
455 case ZSTD_c_forceMaxWindow : /* Force back-references to remain < windowSize,
303 * even when referencing into Dictionary content.
456 * even when referencing into Dictionary content.
304 * default : 0 when using a CDict, 1 when using a Prefix */
457 * default : 0 when using a CDict, 1 when using a Prefix */
305 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
458 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
306
459
307 case ZSTD_p_forceAttachDict:
460 case ZSTD_c_forceAttachDict:
308 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
461 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
309
462
310 case ZSTD_p_nbWorkers:
463 case ZSTD_c_nbWorkers:
311 if ((value>0) && cctx->staticSize) {
464 if ((value!=0) && cctx->staticSize) {
312 return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
465 return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
313 }
466 }
314 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
467 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
315
468
316 case ZSTD_p_jobSize:
469 case ZSTD_c_jobSize:
317 case ZSTD_p_overlapSizeLog:
470 case ZSTD_c_overlapLog:
471 case ZSTD_c_rsyncable:
318 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
472 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
319
473
320 case ZSTD_p_enableLongDistanceMatching:
474 case ZSTD_c_enableLongDistanceMatching:
321 case ZSTD_p_ldmHashLog:
475 case ZSTD_c_ldmHashLog:
322 case ZSTD_p_ldmMinMatch:
476 case ZSTD_c_ldmMinMatch:
323 case ZSTD_p_ldmBucketSizeLog:
477 case ZSTD_c_ldmBucketSizeLog:
324 case ZSTD_p_ldmHashEveryLog:
478 case ZSTD_c_ldmHashRateLog:
325 if (cctx->cdict) return ERROR(stage_wrong);
479 if (cctx->cdict) return ERROR(stage_wrong);
326 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
480 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
327
481
@@ -329,21 +483,21 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*'
329 }
483 }
330 }
484 }
331
485
332 size_t ZSTD_CCtxParam_setParameter(
486 size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams,
333 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned value)
487 ZSTD_cParameter param, int value)
334 {
488 {
335 DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%u, %u)", (U32)param, value);
489 DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%i, %i)", (int)param, value);
336 switch(param)
490 switch(param)
337 {
491 {
338 case ZSTD_p_format :
492 case ZSTD_c_format :
339 if (value > (unsigned)ZSTD_f_zstd1_magicless)
493 BOUNDCHECK(ZSTD_c_format, value);
340 return ERROR(parameter_unsupported);
341 CCtxParams->format = (ZSTD_format_e)value;
494 CCtxParams->format = (ZSTD_format_e)value;
342 return (size_t)CCtxParams->format;
495 return (size_t)CCtxParams->format;
343
496
344 case ZSTD_p_compressionLevel : {
497 case ZSTD_c_compressionLevel : {
345 int cLevel = (int)value; /* cast expected to restore negative sign */
498 int cLevel = value;
346 if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
499 if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
500 if (cLevel < ZSTD_minCLevel()) cLevel = ZSTD_minCLevel();
347 if (cLevel) { /* 0 : does not change current level */
501 if (cLevel) { /* 0 : does not change current level */
348 CCtxParams->compressionLevel = cLevel;
502 CCtxParams->compressionLevel = cLevel;
349 }
503 }
@@ -351,213 +505,229 b' size_t ZSTD_CCtxParam_setParameter('
351 return 0; /* return type (size_t) cannot represent negative values */
505 return 0; /* return type (size_t) cannot represent negative values */
352 }
506 }
353
507
354 case ZSTD_p_windowLog :
508 case ZSTD_c_windowLog :
355 if (value>0) /* 0 => use default */
509 if (value!=0) /* 0 => use default */
356 CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
510 BOUNDCHECK(ZSTD_c_windowLog, value);
357 CCtxParams->cParams.windowLog = value;
511 CCtxParams->cParams.windowLog = value;
358 return CCtxParams->cParams.windowLog;
512 return CCtxParams->cParams.windowLog;
359
513
360 case ZSTD_p_hashLog :
514 case ZSTD_c_hashLog :
361 if (value>0) /* 0 => use default */
515 if (value!=0) /* 0 => use default */
362 CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
516 BOUNDCHECK(ZSTD_c_hashLog, value);
363 CCtxParams->cParams.hashLog = value;
517 CCtxParams->cParams.hashLog = value;
364 return CCtxParams->cParams.hashLog;
518 return CCtxParams->cParams.hashLog;
365
519
366 case ZSTD_p_chainLog :
520 case ZSTD_c_chainLog :
367 if (value>0) /* 0 => use default */
521 if (value!=0) /* 0 => use default */
368 CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
522 BOUNDCHECK(ZSTD_c_chainLog, value);
369 CCtxParams->cParams.chainLog = value;
523 CCtxParams->cParams.chainLog = value;
370 return CCtxParams->cParams.chainLog;
524 return CCtxParams->cParams.chainLog;
371
525
372 case ZSTD_p_searchLog :
526 case ZSTD_c_searchLog :
373 if (value>0) /* 0 => use default */
527 if (value!=0) /* 0 => use default */
374 CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
528 BOUNDCHECK(ZSTD_c_searchLog, value);
375 CCtxParams->cParams.searchLog = value;
529 CCtxParams->cParams.searchLog = value;
376 return value;
530 return value;
377
531
378 case ZSTD_p_minMatch :
532 case ZSTD_c_minMatch :
379 if (value>0) /* 0 => use default */
533 if (value!=0) /* 0 => use default */
380 CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
534 BOUNDCHECK(ZSTD_c_minMatch, value);
381 CCtxParams->cParams.searchLength = value;
535 CCtxParams->cParams.minMatch = value;
382 return CCtxParams->cParams.searchLength;
536 return CCtxParams->cParams.minMatch;
383
537
384 case ZSTD_p_targetLength :
538 case ZSTD_c_targetLength :
385 /* all values are valid. 0 => use default */
539 BOUNDCHECK(ZSTD_c_targetLength, value);
386 CCtxParams->cParams.targetLength = value;
540 CCtxParams->cParams.targetLength = value;
387 return CCtxParams->cParams.targetLength;
541 return CCtxParams->cParams.targetLength;
388
542
389 case ZSTD_p_compressionStrategy :
543 case ZSTD_c_strategy :
390 if (value>0) /* 0 => use default */
544 if (value!=0) /* 0 => use default */
391 CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
545 BOUNDCHECK(ZSTD_c_strategy, value);
392 CCtxParams->cParams.strategy = (ZSTD_strategy)value;
546 CCtxParams->cParams.strategy = (ZSTD_strategy)value;
393 return (size_t)CCtxParams->cParams.strategy;
547 return (size_t)CCtxParams->cParams.strategy;
394
548
395 case ZSTD_p_contentSizeFlag :
549 case ZSTD_c_contentSizeFlag :
396 /* Content size written in frame header _when known_ (default:1) */
550 /* Content size written in frame header _when known_ (default:1) */
397 DEBUGLOG(4, "set content size flag = %u", (value>0));
551 DEBUGLOG(4, "set content size flag = %u", (value!=0));
398 CCtxParams->fParams.contentSizeFlag = value > 0;
552 CCtxParams->fParams.contentSizeFlag = value != 0;
399 return CCtxParams->fParams.contentSizeFlag;
553 return CCtxParams->fParams.contentSizeFlag;
400
554
401 case ZSTD_p_checksumFlag :
555 case ZSTD_c_checksumFlag :
402 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
556 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
403 CCtxParams->fParams.checksumFlag = value > 0;
557 CCtxParams->fParams.checksumFlag = value != 0;
404 return CCtxParams->fParams.checksumFlag;
558 return CCtxParams->fParams.checksumFlag;
405
559
406 case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
560 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
407 DEBUGLOG(4, "set dictIDFlag = %u", (value>0));
561 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
408 CCtxParams->fParams.noDictIDFlag = !value;
562 CCtxParams->fParams.noDictIDFlag = !value;
409 return !CCtxParams->fParams.noDictIDFlag;
563 return !CCtxParams->fParams.noDictIDFlag;
410
564
411 case ZSTD_p_forceMaxWindow :
565 case ZSTD_c_forceMaxWindow :
412 CCtxParams->forceWindow = (value > 0);
566 CCtxParams->forceWindow = (value != 0);
413 return CCtxParams->forceWindow;
567 return CCtxParams->forceWindow;
414
568
415 case ZSTD_p_forceAttachDict :
569 case ZSTD_c_forceAttachDict : {
416 CCtxParams->attachDictPref = value ?
570 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
417 (value > 0 ? ZSTD_dictForceAttach : ZSTD_dictForceCopy) :
571 BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
418 ZSTD_dictDefaultAttach;
572 CCtxParams->attachDictPref = pref;
419 return CCtxParams->attachDictPref;
573 return CCtxParams->attachDictPref;
420
574 }
421 case ZSTD_p_nbWorkers :
575
576 case ZSTD_c_nbWorkers :
422 #ifndef ZSTD_MULTITHREAD
577 #ifndef ZSTD_MULTITHREAD
423 if (value>0) return ERROR(parameter_unsupported);
578 if (value!=0) return ERROR(parameter_unsupported);
424 return 0;
579 return 0;
425 #else
580 #else
426 return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value);
581 return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value);
427 #endif
582 #endif
428
583
429 case ZSTD_p_jobSize :
584 case ZSTD_c_jobSize :
430 #ifndef ZSTD_MULTITHREAD
585 #ifndef ZSTD_MULTITHREAD
431 return ERROR(parameter_unsupported);
586 return ERROR(parameter_unsupported);
432 #else
587 #else
433 return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value);
588 return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value);
434 #endif
589 #endif
435
590
436 case ZSTD_p_overlapSizeLog :
591 case ZSTD_c_overlapLog :
592 #ifndef ZSTD_MULTITHREAD
593 return ERROR(parameter_unsupported);
594 #else
595 return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapLog, value);
596 #endif
597
598 case ZSTD_c_rsyncable :
437 #ifndef ZSTD_MULTITHREAD
599 #ifndef ZSTD_MULTITHREAD
438 return ERROR(parameter_unsupported);
600 return ERROR(parameter_unsupported);
439 #else
601 #else
440 return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value);
602 return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_rsyncable, value);
441 #endif
603 #endif
442
604
443 case ZSTD_p_enableLongDistanceMatching :
605 case ZSTD_c_enableLongDistanceMatching :
444 CCtxParams->ldmParams.enableLdm = (value>0);
606 CCtxParams->ldmParams.enableLdm = (value!=0);
445 return CCtxParams->ldmParams.enableLdm;
607 return CCtxParams->ldmParams.enableLdm;
446
608
447 case ZSTD_p_ldmHashLog :
609 case ZSTD_c_ldmHashLog :
448 if (value>0) /* 0 ==> auto */
610 if (value!=0) /* 0 ==> auto */
449 CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
611 BOUNDCHECK(ZSTD_c_ldmHashLog, value);
450 CCtxParams->ldmParams.hashLog = value;
612 CCtxParams->ldmParams.hashLog = value;
451 return CCtxParams->ldmParams.hashLog;
613 return CCtxParams->ldmParams.hashLog;
452
614
453 case ZSTD_p_ldmMinMatch :
615 case ZSTD_c_ldmMinMatch :
454 if (value>0) /* 0 ==> default */
616 if (value!=0) /* 0 ==> default */
455 CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX);
617 BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
456 CCtxParams->ldmParams.minMatchLength = value;
618 CCtxParams->ldmParams.minMatchLength = value;
457 return CCtxParams->ldmParams.minMatchLength;
619 return CCtxParams->ldmParams.minMatchLength;
458
620
459 case ZSTD_p_ldmBucketSizeLog :
621 case ZSTD_c_ldmBucketSizeLog :
460 if (value > ZSTD_LDM_BUCKETSIZELOG_MAX)
622 if (value!=0) /* 0 ==> default */
461 return ERROR(parameter_outOfBound);
623 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
462 CCtxParams->ldmParams.bucketSizeLog = value;
624 CCtxParams->ldmParams.bucketSizeLog = value;
463 return CCtxParams->ldmParams.bucketSizeLog;
625 return CCtxParams->ldmParams.bucketSizeLog;
464
626
465 case ZSTD_p_ldmHashEveryLog :
627 case ZSTD_c_ldmHashRateLog :
466 if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
628 if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
467 return ERROR(parameter_outOfBound);
629 return ERROR(parameter_outOfBound);
468 CCtxParams->ldmParams.hashEveryLog = value;
630 CCtxParams->ldmParams.hashRateLog = value;
469 return CCtxParams->ldmParams.hashEveryLog;
631 return CCtxParams->ldmParams.hashRateLog;
470
632
471 default: return ERROR(parameter_unsupported);
633 default: return ERROR(parameter_unsupported);
472 }
634 }
473 }
635 }
474
636
475 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value)
637 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value)
476 {
638 {
477 return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value);
639 return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value);
478 }
640 }
479
641
480 size_t ZSTD_CCtxParam_getParameter(
642 size_t ZSTD_CCtxParam_getParameter(
481 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned* value)
643 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value)
482 {
644 {
483 switch(param)
645 switch(param)
484 {
646 {
485 case ZSTD_p_format :
647 case ZSTD_c_format :
486 *value = CCtxParams->format;
648 *value = CCtxParams->format;
487 break;
649 break;
488 case ZSTD_p_compressionLevel :
650 case ZSTD_c_compressionLevel :
489 *value = CCtxParams->compressionLevel;
651 *value = CCtxParams->compressionLevel;
490 break;
652 break;
491 case ZSTD_p_windowLog :
653 case ZSTD_c_windowLog :
492 *value = CCtxParams->cParams.windowLog;
654 *value = CCtxParams->cParams.windowLog;
493 break;
655 break;
494 case ZSTD_p_hashLog :
656 case ZSTD_c_hashLog :
495 *value = CCtxParams->cParams.hashLog;
657 *value = CCtxParams->cParams.hashLog;
496 break;
658 break;
497 case ZSTD_p_chainLog :
659 case ZSTD_c_chainLog :
498 *value = CCtxParams->cParams.chainLog;
660 *value = CCtxParams->cParams.chainLog;
499 break;
661 break;
500 case ZSTD_p_searchLog :
662 case ZSTD_c_searchLog :
501 *value = CCtxParams->cParams.searchLog;
663 *value = CCtxParams->cParams.searchLog;
502 break;
664 break;
503 case ZSTD_p_minMatch :
665 case ZSTD_c_minMatch :
504 *value = CCtxParams->cParams.searchLength;
666 *value = CCtxParams->cParams.minMatch;
505 break;
667 break;
506 case ZSTD_p_targetLength :
668 case ZSTD_c_targetLength :
507 *value = CCtxParams->cParams.targetLength;
669 *value = CCtxParams->cParams.targetLength;
508 break;
670 break;
509 case ZSTD_p_compressionStrategy :
671 case ZSTD_c_strategy :
510 *value = (unsigned)CCtxParams->cParams.strategy;
672 *value = (unsigned)CCtxParams->cParams.strategy;
511 break;
673 break;
512 case ZSTD_p_contentSizeFlag :
674 case ZSTD_c_contentSizeFlag :
513 *value = CCtxParams->fParams.contentSizeFlag;
675 *value = CCtxParams->fParams.contentSizeFlag;
514 break;
676 break;
515 case ZSTD_p_checksumFlag :
677 case ZSTD_c_checksumFlag :
516 *value = CCtxParams->fParams.checksumFlag;
678 *value = CCtxParams->fParams.checksumFlag;
517 break;
679 break;
518 case ZSTD_p_dictIDFlag :
680 case ZSTD_c_dictIDFlag :
519 *value = !CCtxParams->fParams.noDictIDFlag;
681 *value = !CCtxParams->fParams.noDictIDFlag;
520 break;
682 break;
521 case ZSTD_p_forceMaxWindow :
683 case ZSTD_c_forceMaxWindow :
522 *value = CCtxParams->forceWindow;
684 *value = CCtxParams->forceWindow;
523 break;
685 break;
524 case ZSTD_p_forceAttachDict :
686 case ZSTD_c_forceAttachDict :
525 *value = CCtxParams->attachDictPref;
687 *value = CCtxParams->attachDictPref;
526 break;
688 break;
527 case ZSTD_p_nbWorkers :
689 case ZSTD_c_nbWorkers :
528 #ifndef ZSTD_MULTITHREAD
690 #ifndef ZSTD_MULTITHREAD
529 assert(CCtxParams->nbWorkers == 0);
691 assert(CCtxParams->nbWorkers == 0);
530 #endif
692 #endif
531 *value = CCtxParams->nbWorkers;
693 *value = CCtxParams->nbWorkers;
532 break;
694 break;
533 case ZSTD_p_jobSize :
695 case ZSTD_c_jobSize :
534 #ifndef ZSTD_MULTITHREAD
696 #ifndef ZSTD_MULTITHREAD
535 return ERROR(parameter_unsupported);
697 return ERROR(parameter_unsupported);
536 #else
698 #else
537 *value = CCtxParams->jobSize;
699 assert(CCtxParams->jobSize <= INT_MAX);
700 *value = (int)CCtxParams->jobSize;
538 break;
701 break;
539 #endif
702 #endif
540 case ZSTD_p_overlapSizeLog :
703 case ZSTD_c_overlapLog :
541 #ifndef ZSTD_MULTITHREAD
704 #ifndef ZSTD_MULTITHREAD
542 return ERROR(parameter_unsupported);
705 return ERROR(parameter_unsupported);
543 #else
706 #else
544 *value = CCtxParams->overlapSizeLog;
707 *value = CCtxParams->overlapLog;
545 break;
708 break;
546 #endif
709 #endif
547 case ZSTD_p_enableLongDistanceMatching :
710 case ZSTD_c_rsyncable :
711 #ifndef ZSTD_MULTITHREAD
712 return ERROR(parameter_unsupported);
713 #else
714 *value = CCtxParams->rsyncable;
715 break;
716 #endif
717 case ZSTD_c_enableLongDistanceMatching :
548 *value = CCtxParams->ldmParams.enableLdm;
718 *value = CCtxParams->ldmParams.enableLdm;
549 break;
719 break;
550 case ZSTD_p_ldmHashLog :
720 case ZSTD_c_ldmHashLog :
551 *value = CCtxParams->ldmParams.hashLog;
721 *value = CCtxParams->ldmParams.hashLog;
552 break;
722 break;
553 case ZSTD_p_ldmMinMatch :
723 case ZSTD_c_ldmMinMatch :
554 *value = CCtxParams->ldmParams.minMatchLength;
724 *value = CCtxParams->ldmParams.minMatchLength;
555 break;
725 break;
556 case ZSTD_p_ldmBucketSizeLog :
726 case ZSTD_c_ldmBucketSizeLog :
557 *value = CCtxParams->ldmParams.bucketSizeLog;
727 *value = CCtxParams->ldmParams.bucketSizeLog;
558 break;
728 break;
559 case ZSTD_p_ldmHashEveryLog :
729 case ZSTD_c_ldmHashRateLog :
560 *value = CCtxParams->ldmParams.hashEveryLog;
730 *value = CCtxParams->ldmParams.hashRateLog;
561 break;
731 break;
562 default: return ERROR(parameter_unsupported);
732 default: return ERROR(parameter_unsupported);
563 }
733 }
@@ -655,34 +825,35 b' size_t ZSTD_CCtx_refPrefix_advanced('
655
825
656 /*! ZSTD_CCtx_reset() :
826 /*! ZSTD_CCtx_reset() :
657 * Also dumps dictionary */
827 * Also dumps dictionary */
658 void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
828 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
659 {
829 {
660 cctx->streamStage = zcss_init;
830 if ( (reset == ZSTD_reset_session_only)
661 cctx->pledgedSrcSizePlusOne = 0;
831 || (reset == ZSTD_reset_session_and_parameters) ) {
832 cctx->streamStage = zcss_init;
833 cctx->pledgedSrcSizePlusOne = 0;
834 }
835 if ( (reset == ZSTD_reset_parameters)
836 || (reset == ZSTD_reset_session_and_parameters) ) {
837 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
838 cctx->cdict = NULL;
839 return ZSTD_CCtxParams_reset(&cctx->requestedParams);
840 }
841 return 0;
662 }
842 }
663
843
664 size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx)
665 {
666 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
667 cctx->cdict = NULL;
668 return ZSTD_CCtxParams_reset(&cctx->requestedParams);
669 }
670
844
671 /** ZSTD_checkCParams() :
845 /** ZSTD_checkCParams() :
672 control CParam values remain within authorized range.
846 control CParam values remain within authorized range.
673 @return : 0, or an error code if one value is beyond authorized range */
847 @return : 0, or an error code if one value is beyond authorized range */
674 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
848 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
675 {
849 {
676 CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
850 BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog);
677 CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
851 BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog);
678 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
852 BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog);
679 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
853 BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog);
680 CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
854 BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch);
681 ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
855 BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength);
682 if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
856 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
683 return ERROR(parameter_outOfBound);
684 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
685 return ERROR(parameter_unsupported);
686 return 0;
857 return 0;
687 }
858 }
688
859
@@ -692,19 +863,19 b' size_t ZSTD_checkCParams(ZSTD_compressio'
692 static ZSTD_compressionParameters
863 static ZSTD_compressionParameters
693 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
864 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
694 {
865 {
695 # define CLAMP(val,min,max) { \
866 # define CLAMP_TYPE(cParam, val, type) { \
696 if (val<min) val=min; \
867 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
697 else if (val>max) val=max; \
868 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
869 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
698 }
870 }
699 CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
871 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int)
700 CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
872 CLAMP(ZSTD_c_windowLog, cParams.windowLog);
701 CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
873 CLAMP(ZSTD_c_chainLog, cParams.chainLog);
702 CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
874 CLAMP(ZSTD_c_hashLog, cParams.hashLog);
703 CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
875 CLAMP(ZSTD_c_searchLog, cParams.searchLog);
704 ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
876 CLAMP(ZSTD_c_minMatch, cParams.minMatch);
705 if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
877 CLAMP(ZSTD_c_targetLength,cParams.targetLength);
706 cParams.targetLength = ZSTD_TARGETLENGTH_MAX;
878 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
707 CLAMP(cParams.strategy, ZSTD_fast, ZSTD_btultra);
708 return cParams;
879 return cParams;
709 }
880 }
710
881
@@ -774,7 +945,7 b' ZSTD_compressionParameters ZSTD_getCPara'
774 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
945 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
775 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
946 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
776 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
947 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
777 if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength;
948 if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch;
778 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
949 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
779 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
950 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
780 assert(!ZSTD_checkCParams(cParams));
951 assert(!ZSTD_checkCParams(cParams));
@@ -787,13 +958,12 b' ZSTD_sizeof_matchState(const ZSTD_compre'
787 {
958 {
788 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
959 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
789 size_t const hSize = ((size_t)1) << cParams->hashLog;
960 size_t const hSize = ((size_t)1) << cParams->hashLog;
790 U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
961 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
791 size_t const h3Size = ((size_t)1) << hashLog3;
962 size_t const h3Size = ((size_t)1) << hashLog3;
792 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
963 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
793 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
964 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
794 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
965 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
795 size_t const optSpace = (forCCtx && ((cParams->strategy == ZSTD_btopt) ||
966 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
796 (cParams->strategy == ZSTD_btultra)))
797 ? optPotentialSpace
967 ? optPotentialSpace
798 : 0;
968 : 0;
799 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
969 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
@@ -808,7 +978,7 b' size_t ZSTD_estimateCCtxSize_usingCCtxPa'
808 { ZSTD_compressionParameters const cParams =
978 { ZSTD_compressionParameters const cParams =
809 ZSTD_getCParamsFromCCtxParams(params, 0, 0);
979 ZSTD_getCParamsFromCCtxParams(params, 0, 0);
810 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
980 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
811 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
981 U32 const divider = (cParams.minMatch==3) ? 3 : 4;
812 size_t const maxNbSeq = blockSize / divider;
982 size_t const maxNbSeq = blockSize / divider;
813 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
983 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
814 size_t const entropySpace = HUF_WORKSPACE_SIZE;
984 size_t const entropySpace = HUF_WORKSPACE_SIZE;
@@ -843,7 +1013,7 b' size_t ZSTD_estimateCCtxSize(int compres'
843 {
1013 {
844 int level;
1014 int level;
845 size_t memBudget = 0;
1015 size_t memBudget = 0;
846 for (level=1; level<=compressionLevel; level++) {
1016 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
847 size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
1017 size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
848 if (newMB > memBudget) memBudget = newMB;
1018 if (newMB > memBudget) memBudget = newMB;
849 }
1019 }
@@ -879,7 +1049,7 b' size_t ZSTD_estimateCStreamSize(int comp'
879 {
1049 {
880 int level;
1050 int level;
881 size_t memBudget = 0;
1051 size_t memBudget = 0;
882 for (level=1; level<=compressionLevel; level++) {
1052 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
883 size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
1053 size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
884 if (newMB > memBudget) memBudget = newMB;
1054 if (newMB > memBudget) memBudget = newMB;
885 }
1055 }
@@ -933,7 +1103,7 b' static U32 ZSTD_equivalentCParams(ZSTD_c'
933 return (cParams1.hashLog == cParams2.hashLog)
1103 return (cParams1.hashLog == cParams2.hashLog)
934 & (cParams1.chainLog == cParams2.chainLog)
1104 & (cParams1.chainLog == cParams2.chainLog)
935 & (cParams1.strategy == cParams2.strategy) /* opt parser space */
1105 & (cParams1.strategy == cParams2.strategy) /* opt parser space */
936 & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */
1106 & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */
937 }
1107 }
938
1108
939 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
1109 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
@@ -945,7 +1115,7 b' static void ZSTD_assertEqualCParams(ZSTD'
945 assert(cParams1.chainLog == cParams2.chainLog);
1115 assert(cParams1.chainLog == cParams2.chainLog);
946 assert(cParams1.hashLog == cParams2.hashLog);
1116 assert(cParams1.hashLog == cParams2.hashLog);
947 assert(cParams1.searchLog == cParams2.searchLog);
1117 assert(cParams1.searchLog == cParams2.searchLog);
948 assert(cParams1.searchLength == cParams2.searchLength);
1118 assert(cParams1.minMatch == cParams2.minMatch);
949 assert(cParams1.targetLength == cParams2.targetLength);
1119 assert(cParams1.targetLength == cParams2.targetLength);
950 assert(cParams1.strategy == cParams2.strategy);
1120 assert(cParams1.strategy == cParams2.strategy);
951 }
1121 }
@@ -960,7 +1130,7 b' static U32 ZSTD_equivalentLdmParams(ldmP'
960 ldmParams1.hashLog == ldmParams2.hashLog &&
1130 ldmParams1.hashLog == ldmParams2.hashLog &&
961 ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
1131 ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
962 ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
1132 ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
963 ldmParams1.hashEveryLog == ldmParams2.hashEveryLog);
1133 ldmParams1.hashRateLog == ldmParams2.hashRateLog);
964 }
1134 }
965
1135
966 typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
1136 typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
@@ -976,7 +1146,7 b' static U32 ZSTD_sufficientBuff(size_t bu'
976 {
1146 {
977 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
1147 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
978 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
1148 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
979 size_t const maxNbSeq2 = blockSize2 / ((cParams2.searchLength == 3) ? 3 : 4);
1149 size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4);
980 size_t const maxNbLit2 = blockSize2;
1150 size_t const maxNbLit2 = blockSize2;
981 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
1151 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
982 DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
1152 DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
@@ -1034,8 +1204,8 b' static void ZSTD_invalidateMatchState(ZS'
1034 {
1204 {
1035 ZSTD_window_clear(&ms->window);
1205 ZSTD_window_clear(&ms->window);
1036
1206
1037 ms->nextToUpdate = ms->window.dictLimit + 1;
1207 ms->nextToUpdate = ms->window.dictLimit;
1038 ms->nextToUpdate3 = ms->window.dictLimit + 1;
1208 ms->nextToUpdate3 = ms->window.dictLimit;
1039 ms->loadedDictEnd = 0;
1209 ms->loadedDictEnd = 0;
1040 ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1210 ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1041 ms->dictMatchState = NULL;
1211 ms->dictMatchState = NULL;
@@ -1080,7 +1250,7 b' ZSTD_reset_matchState(ZSTD_matchState_t*'
1080 {
1250 {
1081 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1251 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1082 size_t const hSize = ((size_t)1) << cParams->hashLog;
1252 size_t const hSize = ((size_t)1) << cParams->hashLog;
1083 U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1253 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1084 size_t const h3Size = ((size_t)1) << hashLog3;
1254 size_t const h3Size = ((size_t)1) << hashLog3;
1085 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1255 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1086
1256
@@ -1094,9 +1264,9 b' ZSTD_reset_matchState(ZSTD_matchState_t*'
1094 ZSTD_invalidateMatchState(ms);
1264 ZSTD_invalidateMatchState(ms);
1095
1265
1096 /* opt parser space */
1266 /* opt parser space */
1097 if (forCCtx && ((cParams->strategy == ZSTD_btopt) | (cParams->strategy == ZSTD_btultra))) {
1267 if (forCCtx && (cParams->strategy >= ZSTD_btopt)) {
1098 DEBUGLOG(4, "reserving optimal parser space");
1268 DEBUGLOG(4, "reserving optimal parser space");
1099 ms->opt.litFreq = (U32*)ptr;
1269 ms->opt.litFreq = (unsigned*)ptr;
1100 ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
1270 ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
1101 ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
1271 ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
1102 ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
1272 ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
@@ -1158,13 +1328,13 b' static size_t ZSTD_resetCCtx_internal(ZS'
1158 /* Adjust long distance matching parameters */
1328 /* Adjust long distance matching parameters */
1159 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
1329 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
1160 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
1330 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
1161 assert(params.ldmParams.hashEveryLog < 32);
1331 assert(params.ldmParams.hashRateLog < 32);
1162 zc->ldmState.hashPower = ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
1332 zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
1163 }
1333 }
1164
1334
1165 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1335 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1166 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1336 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1167 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
1337 U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
1168 size_t const maxNbSeq = blockSize / divider;
1338 size_t const maxNbSeq = blockSize / divider;
1169 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
1339 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
1170 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
1340 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
@@ -1227,7 +1397,7 b' static size_t ZSTD_resetCCtx_internal(ZS'
1227 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1397 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1228 zc->appliedParams.fParams.contentSizeFlag = 0;
1398 zc->appliedParams.fParams.contentSizeFlag = 0;
1229 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1399 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1230 (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
1400 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
1231 zc->blockSize = blockSize;
1401 zc->blockSize = blockSize;
1232
1402
1233 XXH64_reset(&zc->xxhState, 0);
1403 XXH64_reset(&zc->xxhState, 0);
@@ -1306,16 +1476,17 b' void ZSTD_invalidateRepCodes(ZSTD_CCtx* '
1306 * dictionary tables into the working context is faster than using them
1476 * dictionary tables into the working context is faster than using them
1307 * in-place.
1477 * in-place.
1308 */
1478 */
1309 static const size_t attachDictSizeCutoffs[(unsigned)ZSTD_btultra+1] = {
1479 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
1310 8 KB, /* unused */
1480 8 KB, /* unused */
1311 8 KB, /* ZSTD_fast */
1481 8 KB, /* ZSTD_fast */
1312 16 KB, /* ZSTD_dfast */
1482 16 KB, /* ZSTD_dfast */
1313 32 KB, /* ZSTD_greedy */
1483 32 KB, /* ZSTD_greedy */
1314 32 KB, /* ZSTD_lazy */
1484 32 KB, /* ZSTD_lazy */
1315 32 KB, /* ZSTD_lazy2 */
1485 32 KB, /* ZSTD_lazy2 */
1316 32 KB, /* ZSTD_btlazy2 */
1486 32 KB, /* ZSTD_btlazy2 */
1317 32 KB, /* ZSTD_btopt */
1487 32 KB, /* ZSTD_btopt */
1318 8 KB /* ZSTD_btultra */
1488 8 KB, /* ZSTD_btultra */
1489 8 KB /* ZSTD_btultra2 */
1319 };
1490 };
1320
1491
1321 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1492 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
@@ -1447,7 +1618,8 b' static size_t ZSTD_resetCCtx_usingCDict('
1447 ZSTD_buffered_policy_e zbuff)
1618 ZSTD_buffered_policy_e zbuff)
1448 {
1619 {
1449
1620
1450 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
1621 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
1622 (unsigned)pledgedSrcSize);
1451
1623
1452 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1624 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1453 return ZSTD_resetCCtx_byAttachingCDict(
1625 return ZSTD_resetCCtx_byAttachingCDict(
@@ -1670,7 +1842,9 b' static size_t ZSTD_compressRleLiteralsBl'
1670 * note : use same formula for both situations */
1842 * note : use same formula for both situations */
1671 static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
1843 static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
1672 {
1844 {
1673 U32 const minlog = (strat==ZSTD_btultra) ? 7 : 6;
1845 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
1846 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
1847 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
1674 return (srcSize >> minlog) + 2;
1848 return (srcSize >> minlog) + 2;
1675 }
1849 }
1676
1850
@@ -1679,7 +1853,8 b' static size_t ZSTD_compressLiterals (ZST'
1679 ZSTD_strategy strategy, int disableLiteralCompression,
1853 ZSTD_strategy strategy, int disableLiteralCompression,
1680 void* dst, size_t dstCapacity,
1854 void* dst, size_t dstCapacity,
1681 const void* src, size_t srcSize,
1855 const void* src, size_t srcSize,
1682 U32* workspace, const int bmi2)
1856 void* workspace, size_t wkspSize,
1857 const int bmi2)
1683 {
1858 {
1684 size_t const minGain = ZSTD_minGain(srcSize, strategy);
1859 size_t const minGain = ZSTD_minGain(srcSize, strategy);
1685 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
1860 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
@@ -1708,9 +1883,9 b' static size_t ZSTD_compressLiterals (ZST'
1708 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
1883 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
1709 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
1884 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
1710 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1885 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1711 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
1886 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
1712 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1887 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1713 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
1888 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
1714 if (repeat != HUF_repeat_none) {
1889 if (repeat != HUF_repeat_none) {
1715 /* reused the existing table */
1890 /* reused the existing table */
1716 hType = set_repeat;
1891 hType = set_repeat;
@@ -1977,7 +2152,7 b' ZSTD_selectEncodingType('
1977 assert(!ZSTD_isError(NCountCost));
2152 assert(!ZSTD_isError(NCountCost));
1978 assert(compressedCost < ERROR(maxCode));
2153 assert(compressedCost < ERROR(maxCode));
1979 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
2154 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
1980 (U32)basicCost, (U32)repeatCost, (U32)compressedCost);
2155 (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
1981 if (basicCost <= repeatCost && basicCost <= compressedCost) {
2156 if (basicCost <= repeatCost && basicCost <= compressedCost) {
1982 DEBUGLOG(5, "Selected set_basic");
2157 DEBUGLOG(5, "Selected set_basic");
1983 assert(isDefaultAllowed);
2158 assert(isDefaultAllowed);
@@ -1999,7 +2174,7 b' ZSTD_selectEncodingType('
1999 MEM_STATIC size_t
2174 MEM_STATIC size_t
2000 ZSTD_buildCTable(void* dst, size_t dstCapacity,
2175 ZSTD_buildCTable(void* dst, size_t dstCapacity,
2001 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2176 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2002 U32* count, U32 max,
2177 unsigned* count, U32 max,
2003 const BYTE* codeTable, size_t nbSeq,
2178 const BYTE* codeTable, size_t nbSeq,
2004 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2179 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2005 const FSE_CTable* prevCTable, size_t prevCTableSize,
2180 const FSE_CTable* prevCTable, size_t prevCTableSize,
@@ -2007,11 +2182,13 b' ZSTD_buildCTable(void* dst, size_t dstCa'
2007 {
2182 {
2008 BYTE* op = (BYTE*)dst;
2183 BYTE* op = (BYTE*)dst;
2009 const BYTE* const oend = op + dstCapacity;
2184 const BYTE* const oend = op + dstCapacity;
2185 DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
2010
2186
2011 switch (type) {
2187 switch (type) {
2012 case set_rle:
2188 case set_rle:
2189 CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max));
2190 if (dstCapacity==0) return ERROR(dstSize_tooSmall);
2013 *op = codeTable[0];
2191 *op = codeTable[0];
2014 CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max));
2015 return 1;
2192 return 1;
2016 case set_repeat:
2193 case set_repeat:
2017 memcpy(nextCTable, prevCTable, prevCTableSize);
2194 memcpy(nextCTable, prevCTable, prevCTableSize);
@@ -2053,6 +2230,9 b' ZSTD_encodeSequences_body('
2053 FSE_CState_t stateLitLength;
2230 FSE_CState_t stateLitLength;
2054
2231
2055 CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */
2232 CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */
2233 DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
2234 (int)(blockStream.endPtr - blockStream.startPtr),
2235 (unsigned)dstCapacity);
2056
2236
2057 /* first symbols */
2237 /* first symbols */
2058 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
2238 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
@@ -2085,9 +2265,9 b' ZSTD_encodeSequences_body('
2085 U32 const ofBits = ofCode;
2265 U32 const ofBits = ofCode;
2086 U32 const mlBits = ML_bits[mlCode];
2266 U32 const mlBits = ML_bits[mlCode];
2087 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
2267 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
2088 sequences[n].litLength,
2268 (unsigned)sequences[n].litLength,
2089 sequences[n].matchLength + MINMATCH,
2269 (unsigned)sequences[n].matchLength + MINMATCH,
2090 sequences[n].offset);
2270 (unsigned)sequences[n].offset);
2091 /* 32b*/ /* 64b*/
2271 /* 32b*/ /* 64b*/
2092 /* (7)*/ /* (7)*/
2272 /* (7)*/ /* (7)*/
2093 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
2273 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
@@ -2112,6 +2292,7 b' ZSTD_encodeSequences_body('
2112 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
2292 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
2113 }
2293 }
2114 BIT_flushBits(&blockStream); /* (7)*/
2294 BIT_flushBits(&blockStream); /* (7)*/
2295 DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
2115 } }
2296 } }
2116
2297
2117 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
2298 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
@@ -2169,6 +2350,7 b' static size_t ZSTD_encodeSequences('
2169 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2350 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2170 seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
2351 seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
2171 {
2352 {
2353 DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
2172 #if DYNAMIC_BMI2
2354 #if DYNAMIC_BMI2
2173 if (bmi2) {
2355 if (bmi2) {
2174 return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
2356 return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
@@ -2186,16 +2368,20 b' static size_t ZSTD_encodeSequences('
2186 sequences, nbSeq, longOffsets);
2368 sequences, nbSeq, longOffsets);
2187 }
2369 }
2188
2370
2189 MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2371 /* ZSTD_compressSequences_internal():
2190 ZSTD_entropyCTables_t const* prevEntropy,
2372 * actually compresses both literals and sequences */
2191 ZSTD_entropyCTables_t* nextEntropy,
2373 MEM_STATIC size_t
2192 ZSTD_CCtx_params const* cctxParams,
2374 ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2193 void* dst, size_t dstCapacity, U32* workspace,
2375 const ZSTD_entropyCTables_t* prevEntropy,
2194 const int bmi2)
2376 ZSTD_entropyCTables_t* nextEntropy,
2377 const ZSTD_CCtx_params* cctxParams,
2378 void* dst, size_t dstCapacity,
2379 void* workspace, size_t wkspSize,
2380 const int bmi2)
2195 {
2381 {
2196 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2382 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2197 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
2383 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
2198 U32 count[MaxSeq+1];
2384 unsigned count[MaxSeq+1];
2199 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2385 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2200 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2386 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2201 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
2387 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
@@ -2212,6 +2398,7 b' MEM_STATIC size_t ZSTD_compressSequences'
2212 BYTE* lastNCount = NULL;
2398 BYTE* lastNCount = NULL;
2213
2399
2214 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2400 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2401 DEBUGLOG(5, "ZSTD_compressSequences_internal");
2215
2402
2216 /* Compress literals */
2403 /* Compress literals */
2217 { const BYTE* const literals = seqStorePtr->litStart;
2404 { const BYTE* const literals = seqStorePtr->litStart;
@@ -2222,7 +2409,8 b' MEM_STATIC size_t ZSTD_compressSequences'
2222 cctxParams->cParams.strategy, disableLiteralCompression,
2409 cctxParams->cParams.strategy, disableLiteralCompression,
2223 op, dstCapacity,
2410 op, dstCapacity,
2224 literals, litSize,
2411 literals, litSize,
2225 workspace, bmi2);
2412 workspace, wkspSize,
2413 bmi2);
2226 if (ZSTD_isError(cSize))
2414 if (ZSTD_isError(cSize))
2227 return cSize;
2415 return cSize;
2228 assert(cSize <= dstCapacity);
2416 assert(cSize <= dstCapacity);
@@ -2249,51 +2437,63 b' MEM_STATIC size_t ZSTD_compressSequences'
2249 /* convert length/distances into codes */
2437 /* convert length/distances into codes */
2250 ZSTD_seqToCodes(seqStorePtr);
2438 ZSTD_seqToCodes(seqStorePtr);
2251 /* build CTable for Literal Lengths */
2439 /* build CTable for Literal Lengths */
2252 { U32 max = MaxLL;
2440 { unsigned max = MaxLL;
2253 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); /* can't fail */
2441 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
2254 DEBUGLOG(5, "Building LL table");
2442 DEBUGLOG(5, "Building LL table");
2255 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2443 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2256 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy);
2444 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
2445 count, max, mostFrequent, nbSeq,
2446 LLFSELog, prevEntropy->fse.litlengthCTable,
2447 LL_defaultNorm, LL_defaultNormLog,
2448 ZSTD_defaultAllowed, strategy);
2257 assert(set_basic < set_compressed && set_rle < set_compressed);
2449 assert(set_basic < set_compressed && set_rle < set_compressed);
2258 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2450 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2259 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2451 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2260 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
2452 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
2261 prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
2453 prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
2262 workspace, HUF_WORKSPACE_SIZE);
2454 workspace, wkspSize);
2263 if (ZSTD_isError(countSize)) return countSize;
2455 if (ZSTD_isError(countSize)) return countSize;
2264 if (LLtype == set_compressed)
2456 if (LLtype == set_compressed)
2265 lastNCount = op;
2457 lastNCount = op;
2266 op += countSize;
2458 op += countSize;
2267 } }
2459 } }
2268 /* build CTable for Offsets */
2460 /* build CTable for Offsets */
2269 { U32 max = MaxOff;
2461 { unsigned max = MaxOff;
2270 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); /* can't fail */
2462 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
2271 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2463 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2272 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2464 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2273 DEBUGLOG(5, "Building OF table");
2465 DEBUGLOG(5, "Building OF table");
2274 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2466 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2275 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy);
2467 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
2468 count, max, mostFrequent, nbSeq,
2469 OffFSELog, prevEntropy->fse.offcodeCTable,
2470 OF_defaultNorm, OF_defaultNormLog,
2471 defaultPolicy, strategy);
2276 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2472 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2277 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2473 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2278 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2474 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2279 prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
2475 prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
2280 workspace, HUF_WORKSPACE_SIZE);
2476 workspace, wkspSize);
2281 if (ZSTD_isError(countSize)) return countSize;
2477 if (ZSTD_isError(countSize)) return countSize;
2282 if (Offtype == set_compressed)
2478 if (Offtype == set_compressed)
2283 lastNCount = op;
2479 lastNCount = op;
2284 op += countSize;
2480 op += countSize;
2285 } }
2481 } }
2286 /* build CTable for MatchLengths */
2482 /* build CTable for MatchLengths */
2287 { U32 max = MaxML;
2483 { unsigned max = MaxML;
2288 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); /* can't fail */
2484 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
2289 DEBUGLOG(5, "Building ML table");
2485 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2290 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2486 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2291 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy);
2487 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
2488 count, max, mostFrequent, nbSeq,
2489 MLFSELog, prevEntropy->fse.matchlengthCTable,
2490 ML_defaultNorm, ML_defaultNormLog,
2491 ZSTD_defaultAllowed, strategy);
2292 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2492 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2293 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2493 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2294 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
2494 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
2295 prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
2495 prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
2296 workspace, HUF_WORKSPACE_SIZE);
2496 workspace, wkspSize);
2297 if (ZSTD_isError(countSize)) return countSize;
2497 if (ZSTD_isError(countSize)) return countSize;
2298 if (MLtype == set_compressed)
2498 if (MLtype == set_compressed)
2299 lastNCount = op;
2499 lastNCount = op;
@@ -2328,19 +2528,24 b' MEM_STATIC size_t ZSTD_compressSequences'
2328 }
2528 }
2329 }
2529 }
2330
2530
2531 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
2331 return op - ostart;
2532 return op - ostart;
2332 }
2533 }
2333
2534
2334 MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
2535 MEM_STATIC size_t
2335 const ZSTD_entropyCTables_t* prevEntropy,
2536 ZSTD_compressSequences(seqStore_t* seqStorePtr,
2336 ZSTD_entropyCTables_t* nextEntropy,
2537 const ZSTD_entropyCTables_t* prevEntropy,
2337 const ZSTD_CCtx_params* cctxParams,
2538 ZSTD_entropyCTables_t* nextEntropy,
2338 void* dst, size_t dstCapacity,
2539 const ZSTD_CCtx_params* cctxParams,
2339 size_t srcSize, U32* workspace, int bmi2)
2540 void* dst, size_t dstCapacity,
2541 size_t srcSize,
2542 void* workspace, size_t wkspSize,
2543 int bmi2)
2340 {
2544 {
2341 size_t const cSize = ZSTD_compressSequences_internal(
2545 size_t const cSize = ZSTD_compressSequences_internal(
2342 seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
2546 seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2343 workspace, bmi2);
2547 dst, dstCapacity,
2548 workspace, wkspSize, bmi2);
2344 if (cSize == 0) return 0;
2549 if (cSize == 0) return 0;
2345 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2550 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2346 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2551 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
@@ -2362,7 +2567,7 b' MEM_STATIC size_t ZSTD_compressSequences'
2362 * assumption : strat is a valid strategy */
2567 * assumption : strat is a valid strategy */
2363 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
2568 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
2364 {
2569 {
2365 static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = {
2570 static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = {
2366 { ZSTD_compressBlock_fast /* default for 0 */,
2571 { ZSTD_compressBlock_fast /* default for 0 */,
2367 ZSTD_compressBlock_fast,
2572 ZSTD_compressBlock_fast,
2368 ZSTD_compressBlock_doubleFast,
2573 ZSTD_compressBlock_doubleFast,
@@ -2371,7 +2576,8 b' ZSTD_blockCompressor ZSTD_selectBlockCom'
2371 ZSTD_compressBlock_lazy2,
2576 ZSTD_compressBlock_lazy2,
2372 ZSTD_compressBlock_btlazy2,
2577 ZSTD_compressBlock_btlazy2,
2373 ZSTD_compressBlock_btopt,
2578 ZSTD_compressBlock_btopt,
2374 ZSTD_compressBlock_btultra },
2579 ZSTD_compressBlock_btultra,
2580 ZSTD_compressBlock_btultra2 },
2375 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
2581 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
2376 ZSTD_compressBlock_fast_extDict,
2582 ZSTD_compressBlock_fast_extDict,
2377 ZSTD_compressBlock_doubleFast_extDict,
2583 ZSTD_compressBlock_doubleFast_extDict,
@@ -2380,6 +2586,7 b' ZSTD_blockCompressor ZSTD_selectBlockCom'
2380 ZSTD_compressBlock_lazy2_extDict,
2586 ZSTD_compressBlock_lazy2_extDict,
2381 ZSTD_compressBlock_btlazy2_extDict,
2587 ZSTD_compressBlock_btlazy2_extDict,
2382 ZSTD_compressBlock_btopt_extDict,
2588 ZSTD_compressBlock_btopt_extDict,
2589 ZSTD_compressBlock_btultra_extDict,
2383 ZSTD_compressBlock_btultra_extDict },
2590 ZSTD_compressBlock_btultra_extDict },
2384 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
2591 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
2385 ZSTD_compressBlock_fast_dictMatchState,
2592 ZSTD_compressBlock_fast_dictMatchState,
@@ -2389,14 +2596,14 b' ZSTD_blockCompressor ZSTD_selectBlockCom'
2389 ZSTD_compressBlock_lazy2_dictMatchState,
2596 ZSTD_compressBlock_lazy2_dictMatchState,
2390 ZSTD_compressBlock_btlazy2_dictMatchState,
2597 ZSTD_compressBlock_btlazy2_dictMatchState,
2391 ZSTD_compressBlock_btopt_dictMatchState,
2598 ZSTD_compressBlock_btopt_dictMatchState,
2599 ZSTD_compressBlock_btultra_dictMatchState,
2392 ZSTD_compressBlock_btultra_dictMatchState }
2600 ZSTD_compressBlock_btultra_dictMatchState }
2393 };
2601 };
2394 ZSTD_blockCompressor selectedCompressor;
2602 ZSTD_blockCompressor selectedCompressor;
2395 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2603 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2396
2604
2397 assert((U32)strat >= (U32)ZSTD_fast);
2605 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
2398 assert((U32)strat <= (U32)ZSTD_btultra);
2606 selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2399 selectedCompressor = blockCompressor[(int)dictMode][(U32)strat];
2400 assert(selectedCompressor != NULL);
2607 assert(selectedCompressor != NULL);
2401 return selectedCompressor;
2608 return selectedCompressor;
2402 }
2609 }
@@ -2421,15 +2628,15 b' static size_t ZSTD_compressBlock_interna'
2421 {
2628 {
2422 ZSTD_matchState_t* const ms = &zc->blockState.matchState;
2629 ZSTD_matchState_t* const ms = &zc->blockState.matchState;
2423 size_t cSize;
2630 size_t cSize;
2424 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%zu, dictLimit=%u, nextToUpdate=%u)",
2631 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2425 dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
2632 (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate);
2426 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2633 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2427
2634
2428 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2635 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2429 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2636 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2430
2637
2431 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
2638 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
2432 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
2639 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
2433 cSize = 0;
2640 cSize = 0;
2434 goto out; /* don't even attempt compression below a certain srcSize */
2641 goto out; /* don't even attempt compression below a certain srcSize */
2435 }
2642 }
@@ -2437,8 +2644,8 b' static size_t ZSTD_compressBlock_interna'
2437 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
2644 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
2438
2645
2439 /* a gap between an attached dict and the current window is not safe,
2646 /* a gap between an attached dict and the current window is not safe,
2440 * they must remain adjacent, and when that stops being the case, the dict
2647 * they must remain adjacent,
2441 * must be unset */
2648 * and when that stops being the case, the dict must be unset */
2442 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
2649 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
2443
2650
2444 /* limited update after a very long match */
2651 /* limited update after a very long match */
@@ -2495,7 +2702,9 b' static size_t ZSTD_compressBlock_interna'
2495 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2702 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2496 &zc->appliedParams,
2703 &zc->appliedParams,
2497 dst, dstCapacity,
2704 dst, dstCapacity,
2498 srcSize, zc->entropyWorkspace, zc->bmi2);
2705 srcSize,
2706 zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
2707 zc->bmi2);
2499
2708
2500 out:
2709 out:
2501 if (!ZSTD_isError(cSize) && cSize != 0) {
2710 if (!ZSTD_isError(cSize) && cSize != 0) {
@@ -2535,7 +2744,7 b' static size_t ZSTD_compress_frameChunk ('
2535 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
2744 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
2536 assert(cctx->appliedParams.cParams.windowLog <= 31);
2745 assert(cctx->appliedParams.cParams.windowLog <= 31);
2537
2746
2538 DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (U32)blockSize);
2747 DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
2539 if (cctx->appliedParams.fParams.checksumFlag && srcSize)
2748 if (cctx->appliedParams.fParams.checksumFlag && srcSize)
2540 XXH64_update(&cctx->xxhState, src, srcSize);
2749 XXH64_update(&cctx->xxhState, src, srcSize);
2541
2750
@@ -2583,7 +2792,7 b' static size_t ZSTD_compress_frameChunk ('
2583 assert(dstCapacity >= cSize);
2792 assert(dstCapacity >= cSize);
2584 dstCapacity -= cSize;
2793 dstCapacity -= cSize;
2585 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
2794 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
2586 (U32)cSize);
2795 (unsigned)cSize);
2587 } }
2796 } }
2588
2797
2589 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2798 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
@@ -2606,9 +2815,9 b' static size_t ZSTD_writeFrameHeader(void'
2606 size_t pos=0;
2815 size_t pos=0;
2607
2816
2608 assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2817 assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2609 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2818 if (dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX) return ERROR(dstSize_tooSmall);
2610 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2819 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2611 !params.fParams.noDictIDFlag, dictID, dictIDSizeCode);
2820 !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
2612
2821
2613 if (params.format == ZSTD_f_zstd1) {
2822 if (params.format == ZSTD_f_zstd1) {
2614 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2823 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
@@ -2672,7 +2881,7 b' static size_t ZSTD_compressContinue_inte'
2672 size_t fhSize = 0;
2881 size_t fhSize = 0;
2673
2882
2674 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
2883 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
2675 cctx->stage, (U32)srcSize);
2884 cctx->stage, (unsigned)srcSize);
2676 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
2885 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
2677
2886
2678 if (frame && (cctx->stage==ZSTDcs_init)) {
2887 if (frame && (cctx->stage==ZSTDcs_init)) {
@@ -2709,7 +2918,7 b' static size_t ZSTD_compressContinue_inte'
2709 }
2918 }
2710 }
2919 }
2711
2920
2712 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize);
2921 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
2713 { size_t const cSize = frame ?
2922 { size_t const cSize = frame ?
2714 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2923 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2715 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2924 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
@@ -2721,7 +2930,7 b' static size_t ZSTD_compressContinue_inte'
2721 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
2930 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
2722 if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) {
2931 if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) {
2723 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u",
2932 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u",
2724 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
2933 (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize);
2725 return ERROR(srcSize_wrong);
2934 return ERROR(srcSize_wrong);
2726 }
2935 }
2727 }
2936 }
@@ -2733,7 +2942,7 b' size_t ZSTD_compressContinue (ZSTD_CCtx*'
2733 void* dst, size_t dstCapacity,
2942 void* dst, size_t dstCapacity,
2734 const void* src, size_t srcSize)
2943 const void* src, size_t srcSize)
2735 {
2944 {
2736 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (U32)srcSize);
2945 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
2737 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
2946 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
2738 }
2947 }
2739
2948
@@ -2791,6 +3000,7 b' static size_t ZSTD_loadDictionaryContent'
2791 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
3000 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
2792 case ZSTD_btopt:
3001 case ZSTD_btopt:
2793 case ZSTD_btultra:
3002 case ZSTD_btultra:
3003 case ZSTD_btultra2:
2794 if (srcSize >= HASH_READ_SIZE)
3004 if (srcSize >= HASH_READ_SIZE)
2795 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
3005 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
2796 break;
3006 break;
@@ -2861,7 +3071,9 b' static size_t ZSTD_loadZstdDictionary(ZS'
2861 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
3071 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2862 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
3072 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2863 /* fill all offset symbols to avoid garbage at end of table */
3073 /* fill all offset symbols to avoid garbage at end of table */
2864 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
3074 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable,
3075 offcodeNCount, MaxOff, offcodeLog,
3076 workspace, HUF_WORKSPACE_SIZE),
2865 dictionary_corrupted);
3077 dictionary_corrupted);
2866 dictPtr += offcodeHeaderSize;
3078 dictPtr += offcodeHeaderSize;
2867 }
3079 }
@@ -2873,7 +3085,9 b' static size_t ZSTD_loadZstdDictionary(ZS'
2873 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
3085 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2874 /* Every match length code must have non-zero probability */
3086 /* Every match length code must have non-zero probability */
2875 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
3087 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2876 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
3088 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable,
3089 matchlengthNCount, matchlengthMaxValue, matchlengthLog,
3090 workspace, HUF_WORKSPACE_SIZE),
2877 dictionary_corrupted);
3091 dictionary_corrupted);
2878 dictPtr += matchlengthHeaderSize;
3092 dictPtr += matchlengthHeaderSize;
2879 }
3093 }
@@ -2885,7 +3099,9 b' static size_t ZSTD_loadZstdDictionary(ZS'
2885 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
3099 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2886 /* Every literal length code must have non-zero probability */
3100 /* Every literal length code must have non-zero probability */
2887 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
3101 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2888 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
3102 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable,
3103 litlengthNCount, litlengthMaxValue, litlengthLog,
3104 workspace, HUF_WORKSPACE_SIZE),
2889 dictionary_corrupted);
3105 dictionary_corrupted);
2890 dictPtr += litlengthHeaderSize;
3106 dictPtr += litlengthHeaderSize;
2891 }
3107 }
@@ -3023,7 +3239,7 b' size_t ZSTD_compressBegin_usingDict(ZSTD'
3023 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
3239 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
3024 ZSTD_CCtx_params const cctxParams =
3240 ZSTD_CCtx_params const cctxParams =
3025 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3241 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3026 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
3242 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
3027 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3243 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3028 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
3244 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
3029 }
3245 }
@@ -3067,7 +3283,7 b' static size_t ZSTD_writeEpilogue(ZSTD_CC'
3067 if (cctx->appliedParams.fParams.checksumFlag) {
3283 if (cctx->appliedParams.fParams.checksumFlag) {
3068 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
3284 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
3069 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
3285 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
3070 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", checksum);
3286 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
3071 MEM_writeLE32(op, checksum);
3287 MEM_writeLE32(op, checksum);
3072 op += 4;
3288 op += 4;
3073 }
3289 }
@@ -3093,7 +3309,7 b' size_t ZSTD_compressEnd (ZSTD_CCtx* cctx'
3093 DEBUGLOG(4, "end of frame : controlling src size");
3309 DEBUGLOG(4, "end of frame : controlling src size");
3094 if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
3310 if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
3095 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u",
3311 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u",
3096 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
3312 (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize);
3097 return ERROR(srcSize_wrong);
3313 return ERROR(srcSize_wrong);
3098 } }
3314 } }
3099 return cSize + endResult;
3315 return cSize + endResult;
@@ -3139,7 +3355,7 b' size_t ZSTD_compress_advanced_internal('
3139 const void* dict,size_t dictSize,
3355 const void* dict,size_t dictSize,
3140 ZSTD_CCtx_params params)
3356 ZSTD_CCtx_params params)
3141 {
3357 {
3142 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (U32)srcSize);
3358 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
3143 CHECK_F( ZSTD_compressBegin_internal(cctx,
3359 CHECK_F( ZSTD_compressBegin_internal(cctx,
3144 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3360 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3145 params, srcSize, ZSTDb_not_buffered) );
3361 params, srcSize, ZSTDb_not_buffered) );
@@ -3163,7 +3379,7 b' size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx'
3163 const void* src, size_t srcSize,
3379 const void* src, size_t srcSize,
3164 int compressionLevel)
3380 int compressionLevel)
3165 {
3381 {
3166 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize);
3382 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
3167 assert(cctx != NULL);
3383 assert(cctx != NULL);
3168 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
3384 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
3169 }
3385 }
@@ -3189,7 +3405,7 b' size_t ZSTD_estimateCDictSize_advanced('
3189 size_t dictSize, ZSTD_compressionParameters cParams,
3405 size_t dictSize, ZSTD_compressionParameters cParams,
3190 ZSTD_dictLoadMethod_e dictLoadMethod)
3406 ZSTD_dictLoadMethod_e dictLoadMethod)
3191 {
3407 {
3192 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
3408 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
3193 return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
3409 return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
3194 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
3410 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
3195 }
3411 }
@@ -3203,7 +3419,7 b' size_t ZSTD_estimateCDictSize(size_t dic'
3203 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
3419 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
3204 {
3420 {
3205 if (cdict==NULL) return 0; /* support sizeof on NULL */
3421 if (cdict==NULL) return 0; /* support sizeof on NULL */
3206 DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
3422 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
3207 return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
3423 return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
3208 }
3424 }
3209
3425
@@ -3214,7 +3430,7 b' static size_t ZSTD_initCDict_internal('
3214 ZSTD_dictContentType_e dictContentType,
3430 ZSTD_dictContentType_e dictContentType,
3215 ZSTD_compressionParameters cParams)
3431 ZSTD_compressionParameters cParams)
3216 {
3432 {
3217 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (U32)dictContentType);
3433 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
3218 assert(!ZSTD_checkCParams(cParams));
3434 assert(!ZSTD_checkCParams(cParams));
3219 cdict->matchState.cParams = cParams;
3435 cdict->matchState.cParams = cParams;
3220 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3436 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
@@ -3264,7 +3480,7 b' ZSTD_CDict* ZSTD_createCDict_advanced(co'
3264 ZSTD_dictContentType_e dictContentType,
3480 ZSTD_dictContentType_e dictContentType,
3265 ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
3481 ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
3266 {
3482 {
3267 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictContentType);
3483 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
3268 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
3484 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
3269
3485
3270 { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
3486 { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
@@ -3345,7 +3561,7 b' const ZSTD_CDict* ZSTD_initStaticCDict('
3345 void* ptr;
3561 void* ptr;
3346 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
3562 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
3347 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
3563 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
3348 (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
3564 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
3349 if (workspaceSize < neededSize) return NULL;
3565 if (workspaceSize < neededSize) return NULL;
3350
3566
3351 if (dictLoadMethod == ZSTD_dlm_byCopy) {
3567 if (dictLoadMethod == ZSTD_dlm_byCopy) {
@@ -3505,7 +3721,7 b' static size_t ZSTD_resetCStream_internal'
3505 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
3721 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
3506 {
3722 {
3507 ZSTD_CCtx_params params = zcs->requestedParams;
3723 ZSTD_CCtx_params params = zcs->requestedParams;
3508 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize);
3724 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
3509 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
3725 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
3510 params.fParams.contentSizeFlag = 1;
3726 params.fParams.contentSizeFlag = 1;
3511 return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
3727 return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
@@ -3525,7 +3741,7 b' size_t ZSTD_initCStream_internal(ZSTD_CS'
3525 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3741 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3526
3742
3527 if (dict && dictSize >= 8) {
3743 if (dict && dictSize >= 8) {
3528 DEBUGLOG(4, "loading dictionary of size %u", (U32)dictSize);
3744 DEBUGLOG(4, "loading dictionary of size %u", (unsigned)dictSize);
3529 if (zcs->staticSize) { /* static CCtx : never uses malloc */
3745 if (zcs->staticSize) { /* static CCtx : never uses malloc */
3530 /* incompatible with internal cdict creation */
3746 /* incompatible with internal cdict creation */
3531 return ERROR(memory_allocation);
3747 return ERROR(memory_allocation);
@@ -3584,7 +3800,7 b' size_t ZSTD_initCStream_advanced(ZSTD_CS'
3584 ZSTD_parameters params, unsigned long long pledgedSrcSize)
3800 ZSTD_parameters params, unsigned long long pledgedSrcSize)
3585 {
3801 {
3586 DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u",
3802 DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u",
3587 (U32)pledgedSrcSize, params.fParams.contentSizeFlag);
3803 (unsigned)pledgedSrcSize, params.fParams.contentSizeFlag);
3588 CHECK_F( ZSTD_checkCParams(params.cParams) );
3804 CHECK_F( ZSTD_checkCParams(params.cParams) );
3589 if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */
3805 if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */
3590 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3806 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
@@ -3612,8 +3828,15 b' size_t ZSTD_initCStream(ZSTD_CStream* zc'
3612
3828
3613 /*====== Compression ======*/
3829 /*====== Compression ======*/
3614
3830
3615 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
3831 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
3616 const void* src, size_t srcSize)
3832 {
3833 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
3834 if (hintInSize==0) hintInSize = cctx->blockSize;
3835 return hintInSize;
3836 }
3837
3838 static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
3839 const void* src, size_t srcSize)
3617 {
3840 {
3618 size_t const length = MIN(dstCapacity, srcSize);
3841 size_t const length = MIN(dstCapacity, srcSize);
3619 if (length) memcpy(dst, src, length);
3842 if (length) memcpy(dst, src, length);
@@ -3621,7 +3844,7 b' MEM_STATIC size_t ZSTD_limitCopy(void* d'
3621 }
3844 }
3622
3845
3623 /** ZSTD_compressStream_generic():
3846 /** ZSTD_compressStream_generic():
3624 * internal function for all *compressStream*() variants and *compress_generic()
3847 * internal function for all *compressStream*() variants
3625 * non-static, because can be called from zstdmt_compress.c
3848 * non-static, because can be called from zstdmt_compress.c
3626 * @return : hint size for next input */
3849 * @return : hint size for next input */
3627 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
3850 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
@@ -3638,7 +3861,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3638 U32 someMoreWork = 1;
3861 U32 someMoreWork = 1;
3639
3862
3640 /* check expectations */
3863 /* check expectations */
3641 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode);
3864 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
3642 assert(zcs->inBuff != NULL);
3865 assert(zcs->inBuff != NULL);
3643 assert(zcs->inBuffSize > 0);
3866 assert(zcs->inBuffSize > 0);
3644 assert(zcs->outBuff != NULL);
3867 assert(zcs->outBuff != NULL);
@@ -3660,12 +3883,12 b' size_t ZSTD_compressStream_generic(ZSTD_'
3660 /* shortcut to compression pass directly into output buffer */
3883 /* shortcut to compression pass directly into output buffer */
3661 size_t const cSize = ZSTD_compressEnd(zcs,
3884 size_t const cSize = ZSTD_compressEnd(zcs,
3662 op, oend-op, ip, iend-ip);
3885 op, oend-op, ip, iend-ip);
3663 DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize);
3886 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
3664 if (ZSTD_isError(cSize)) return cSize;
3887 if (ZSTD_isError(cSize)) return cSize;
3665 ip = iend;
3888 ip = iend;
3666 op += cSize;
3889 op += cSize;
3667 zcs->frameEnded = 1;
3890 zcs->frameEnded = 1;
3668 ZSTD_CCtx_reset(zcs);
3891 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
3669 someMoreWork = 0; break;
3892 someMoreWork = 0; break;
3670 }
3893 }
3671 /* complete loading into inBuffer */
3894 /* complete loading into inBuffer */
@@ -3709,7 +3932,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3709 if (zcs->inBuffTarget > zcs->inBuffSize)
3932 if (zcs->inBuffTarget > zcs->inBuffSize)
3710 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
3933 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
3711 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
3934 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
3712 (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize);
3935 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
3713 if (!lastBlock)
3936 if (!lastBlock)
3714 assert(zcs->inBuffTarget <= zcs->inBuffSize);
3937 assert(zcs->inBuffTarget <= zcs->inBuffSize);
3715 zcs->inToCompress = zcs->inBuffPos;
3938 zcs->inToCompress = zcs->inBuffPos;
@@ -3718,7 +3941,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3718 if (zcs->frameEnded) {
3941 if (zcs->frameEnded) {
3719 DEBUGLOG(5, "Frame completed directly in outBuffer");
3942 DEBUGLOG(5, "Frame completed directly in outBuffer");
3720 someMoreWork = 0;
3943 someMoreWork = 0;
3721 ZSTD_CCtx_reset(zcs);
3944 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
3722 }
3945 }
3723 break;
3946 break;
3724 }
3947 }
@@ -3733,7 +3956,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3733 size_t const flushed = ZSTD_limitCopy(op, oend-op,
3956 size_t const flushed = ZSTD_limitCopy(op, oend-op,
3734 zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3957 zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3735 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
3958 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
3736 (U32)toFlush, (U32)(oend-op), (U32)flushed);
3959 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
3737 op += flushed;
3960 op += flushed;
3738 zcs->outBuffFlushedSize += flushed;
3961 zcs->outBuffFlushedSize += flushed;
3739 if (toFlush!=flushed) {
3962 if (toFlush!=flushed) {
@@ -3746,7 +3969,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3746 if (zcs->frameEnded) {
3969 if (zcs->frameEnded) {
3747 DEBUGLOG(5, "Frame completed on flush");
3970 DEBUGLOG(5, "Frame completed on flush");
3748 someMoreWork = 0;
3971 someMoreWork = 0;
3749 ZSTD_CCtx_reset(zcs);
3972 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
3750 break;
3973 break;
3751 }
3974 }
3752 zcs->streamStage = zcss_load;
3975 zcs->streamStage = zcss_load;
@@ -3761,28 +3984,34 b' size_t ZSTD_compressStream_generic(ZSTD_'
3761 input->pos = ip - istart;
3984 input->pos = ip - istart;
3762 output->pos = op - ostart;
3985 output->pos = op - ostart;
3763 if (zcs->frameEnded) return 0;
3986 if (zcs->frameEnded) return 0;
3764 { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
3987 return ZSTD_nextInputSizeHint(zcs);
3765 if (hintInSize==0) hintInSize = zcs->blockSize;
3988 }
3766 return hintInSize;
3989
3990 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
3991 {
3992 #ifdef ZSTD_MULTITHREAD
3993 if (cctx->appliedParams.nbWorkers >= 1) {
3994 assert(cctx->mtctx != NULL);
3995 return ZSTDMT_nextInputSizeHint(cctx->mtctx);
3767 }
3996 }
3997 #endif
3998 return ZSTD_nextInputSizeHint(cctx);
3999
3768 }
4000 }
3769
4001
3770 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
4002 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
3771 {
4003 {
3772 /* check conditions */
4004 CHECK_F( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) );
3773 if (output->pos > output->size) return ERROR(GENERIC);
4005 return ZSTD_nextInputSizeHint_MTorST(zcs);
3774 if (input->pos > input->size) return ERROR(GENERIC);
3775
3776 return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue);
3777 }
4006 }
3778
4007
3779
4008
3780 size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
4009 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
3781 ZSTD_outBuffer* output,
4010 ZSTD_outBuffer* output,
3782 ZSTD_inBuffer* input,
4011 ZSTD_inBuffer* input,
3783 ZSTD_EndDirective endOp)
4012 ZSTD_EndDirective endOp)
3784 {
4013 {
3785 DEBUGLOG(5, "ZSTD_compress_generic, endOp=%u ", (U32)endOp);
4014 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
3786 /* check conditions */
4015 /* check conditions */
3787 if (output->pos > output->size) return ERROR(GENERIC);
4016 if (output->pos > output->size) return ERROR(GENERIC);
3788 if (input->pos > input->size) return ERROR(GENERIC);
4017 if (input->pos > input->size) return ERROR(GENERIC);
@@ -3792,9 +4021,9 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3792 if (cctx->streamStage == zcss_init) {
4021 if (cctx->streamStage == zcss_init) {
3793 ZSTD_CCtx_params params = cctx->requestedParams;
4022 ZSTD_CCtx_params params = cctx->requestedParams;
3794 ZSTD_prefixDict const prefixDict = cctx->prefixDict;
4023 ZSTD_prefixDict const prefixDict = cctx->prefixDict;
3795 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
4024 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
3796 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
4025 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
3797 DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage");
4026 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
3798 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
4027 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
3799 params.cParams = ZSTD_getCParamsFromCCtxParams(
4028 params.cParams = ZSTD_getCParamsFromCCtxParams(
3800 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
4029 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
@@ -3807,7 +4036,7 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3807 if (params.nbWorkers > 0) {
4036 if (params.nbWorkers > 0) {
3808 /* mt context creation */
4037 /* mt context creation */
3809 if (cctx->mtctx == NULL) {
4038 if (cctx->mtctx == NULL) {
3810 DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u",
4039 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
3811 params.nbWorkers);
4040 params.nbWorkers);
3812 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
4041 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
3813 if (cctx->mtctx == NULL) return ERROR(memory_allocation);
4042 if (cctx->mtctx == NULL) return ERROR(memory_allocation);
@@ -3829,6 +4058,7 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3829 assert(cctx->streamStage == zcss_load);
4058 assert(cctx->streamStage == zcss_load);
3830 assert(cctx->appliedParams.nbWorkers == 0);
4059 assert(cctx->appliedParams.nbWorkers == 0);
3831 } }
4060 } }
4061 /* end of transparent initialization stage */
3832
4062
3833 /* compression stage */
4063 /* compression stage */
3834 #ifdef ZSTD_MULTITHREAD
4064 #ifdef ZSTD_MULTITHREAD
@@ -3840,18 +4070,18 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3840 { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
4070 { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
3841 if ( ZSTD_isError(flushMin)
4071 if ( ZSTD_isError(flushMin)
3842 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
4072 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
3843 ZSTD_CCtx_reset(cctx);
4073 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
3844 }
4074 }
3845 DEBUGLOG(5, "completed ZSTD_compress_generic delegating to ZSTDMT_compressStream_generic");
4075 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
3846 return flushMin;
4076 return flushMin;
3847 } }
4077 } }
3848 #endif
4078 #endif
3849 CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
4079 CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
3850 DEBUGLOG(5, "completed ZSTD_compress_generic");
4080 DEBUGLOG(5, "completed ZSTD_compressStream2");
3851 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
4081 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
3852 }
4082 }
3853
4083
3854 size_t ZSTD_compress_generic_simpleArgs (
4084 size_t ZSTD_compressStream2_simpleArgs (
3855 ZSTD_CCtx* cctx,
4085 ZSTD_CCtx* cctx,
3856 void* dst, size_t dstCapacity, size_t* dstPos,
4086 void* dst, size_t dstCapacity, size_t* dstPos,
3857 const void* src, size_t srcSize, size_t* srcPos,
4087 const void* src, size_t srcSize, size_t* srcPos,
@@ -3859,13 +4089,33 b' size_t ZSTD_compress_generic_simpleArgs '
3859 {
4089 {
3860 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
4090 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
3861 ZSTD_inBuffer input = { src, srcSize, *srcPos };
4091 ZSTD_inBuffer input = { src, srcSize, *srcPos };
3862 /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
4092 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
3863 size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp);
4093 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
3864 *dstPos = output.pos;
4094 *dstPos = output.pos;
3865 *srcPos = input.pos;
4095 *srcPos = input.pos;
3866 return cErr;
4096 return cErr;
3867 }
4097 }
3868
4098
4099 size_t ZSTD_compress2(ZSTD_CCtx* cctx,
4100 void* dst, size_t dstCapacity,
4101 const void* src, size_t srcSize)
4102 {
4103 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
4104 { size_t oPos = 0;
4105 size_t iPos = 0;
4106 size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
4107 dst, dstCapacity, &oPos,
4108 src, srcSize, &iPos,
4109 ZSTD_e_end);
4110 if (ZSTD_isError(result)) return result;
4111 if (result != 0) { /* compression not completed, due to lack of output space */
4112 assert(oPos == dstCapacity);
4113 return ERROR(dstSize_tooSmall);
4114 }
4115 assert(iPos == srcSize); /* all input is expected consumed */
4116 return oPos;
4117 }
4118 }
3869
4119
3870 /*====== Finalize ======*/
4120 /*====== Finalize ======*/
3871
4121
@@ -3874,21 +4124,21 b' size_t ZSTD_compress_generic_simpleArgs '
3874 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
4124 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3875 {
4125 {
3876 ZSTD_inBuffer input = { NULL, 0, 0 };
4126 ZSTD_inBuffer input = { NULL, 0, 0 };
3877 if (output->pos > output->size) return ERROR(GENERIC);
4127 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
3878 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) );
3879 return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
3880 }
4128 }
3881
4129
3882
4130
3883 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
4131 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3884 {
4132 {
3885 ZSTD_inBuffer input = { NULL, 0, 0 };
4133 ZSTD_inBuffer input = { NULL, 0, 0 };
3886 if (output->pos > output->size) return ERROR(GENERIC);
4134 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
3887 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) );
4135 CHECK_F( remainingToFlush );
4136 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
4137 /* single thread mode : attempt to calculate remaining to flush more precisely */
3888 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
4138 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
3889 size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
4139 size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
3890 size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize;
4140 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
3891 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (U32)toFlush);
4141 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
3892 return toFlush;
4142 return toFlush;
3893 }
4143 }
3894 }
4144 }
@@ -3905,27 +4155,27 b' static const ZSTD_compressionParameters '
3905 /* W, C, H, S, L, TL, strat */
4155 /* W, C, H, S, L, TL, strat */
3906 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
4156 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
3907 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
4157 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
3908 { 19, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
4158 { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
3909 { 20, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
4159 { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
3910 { 20, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
4160 { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
3911 { 20, 18, 18, 2, 5, 2, ZSTD_greedy }, /* level 5 */
4161 { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */
3912 { 21, 18, 19, 2, 5, 4, ZSTD_lazy }, /* level 6 */
4162 { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
3913 { 21, 18, 19, 3, 5, 8, ZSTD_lazy2 }, /* level 7 */
4163 { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */
3914 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
4164 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3915 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
4165 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
3916 { 21, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
4166 { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3917 { 21, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
4167 { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3918 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
4168 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3919 { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
4169 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */
3920 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
4170 { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
3921 { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
4171 { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
3922 { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */
4172 { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
3923 { 23, 22, 22, 4, 4, 64, ZSTD_btopt }, /* level 17 */
4173 { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
3924 { 23, 23, 22, 6, 3,256, ZSTD_btopt }, /* level 18 */
4174 { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
3925 { 23, 24, 22, 7, 3,256, ZSTD_btultra }, /* level 19 */
4175 { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
3926 { 25, 25, 23, 7, 3,256, ZSTD_btultra }, /* level 20 */
4176 { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
3927 { 26, 26, 24, 7, 3,512, ZSTD_btultra }, /* level 21 */
4177 { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
3928 { 27, 27, 25, 9, 3,999, ZSTD_btultra }, /* level 22 */
4178 { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
3929 },
4179 },
3930 { /* for srcSize <= 256 KB */
4180 { /* for srcSize <= 256 KB */
3931 /* W, C, H, S, L, T, strat */
4181 /* W, C, H, S, L, T, strat */
@@ -3940,18 +4190,18 b' static const ZSTD_compressionParameters '
3940 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4190 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3941 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4191 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3942 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
4192 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3943 { 18, 18, 19, 5, 4, 16, ZSTD_btlazy2 }, /* level 11.*/
4193 { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
3944 { 18, 19, 19, 6, 4, 16, ZSTD_btlazy2 }, /* level 12.*/
4194 { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
3945 { 18, 19, 19, 8, 4, 16, ZSTD_btlazy2 }, /* level 13 */
4195 { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
3946 { 18, 18, 19, 4, 4, 24, ZSTD_btopt }, /* level 14.*/
4196 { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
3947 { 18, 18, 19, 4, 3, 24, ZSTD_btopt }, /* level 15.*/
4197 { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
3948 { 18, 19, 19, 6, 3, 64, ZSTD_btopt }, /* level 16.*/
4198 { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
3949 { 18, 19, 19, 8, 3,128, ZSTD_btopt }, /* level 17.*/
4199 { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
3950 { 18, 19, 19, 10, 3,256, ZSTD_btopt }, /* level 18.*/
4200 { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
3951 { 18, 19, 19, 10, 3,256, ZSTD_btultra }, /* level 19.*/
4201 { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
3952 { 18, 19, 19, 11, 3,512, ZSTD_btultra }, /* level 20.*/
4202 { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
3953 { 18, 19, 19, 12, 3,512, ZSTD_btultra }, /* level 21.*/
4203 { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
3954 { 18, 19, 19, 13, 3,999, ZSTD_btultra }, /* level 22.*/
4204 { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
3955 },
4205 },
3956 { /* for srcSize <= 128 KB */
4206 { /* for srcSize <= 128 KB */
3957 /* W, C, H, S, L, T, strat */
4207 /* W, C, H, S, L, T, strat */
@@ -3966,26 +4216,26 b' static const ZSTD_compressionParameters '
3966 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4216 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3967 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4217 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3968 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
4218 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3969 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
4219 { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
3970 { 17, 18, 17, 6, 4, 16, ZSTD_btlazy2 }, /* level 12 */
4220 { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
3971 { 17, 18, 17, 8, 4, 16, ZSTD_btlazy2 }, /* level 13.*/
4221 { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
3972 { 17, 18, 17, 4, 4, 32, ZSTD_btopt }, /* level 14.*/
4222 { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
3973 { 17, 18, 17, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
4223 { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
3974 { 17, 18, 17, 7, 3,128, ZSTD_btopt }, /* level 16.*/
4224 { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
3975 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 17.*/
4225 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
3976 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 18.*/
4226 { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
3977 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 19.*/
4227 { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
3978 { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/
4228 { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
3979 { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/
4229 { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
3980 { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/
4230 { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
3981 },
4231 },
3982 { /* for srcSize <= 16 KB */
4232 { /* for srcSize <= 16 KB */
3983 /* W, C, H, S, L, T, strat */
4233 /* W, C, H, S, L, T, strat */
3984 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
4234 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3985 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
4235 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
3986 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
4236 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
3987 { 14, 14, 14, 2, 4, 1, ZSTD_dfast }, /* level 3.*/
4237 { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */
3988 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4.*/
4238 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
3989 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
4239 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
3990 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
4240 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
3991 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
4241 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
@@ -3993,17 +4243,17 b' static const ZSTD_compressionParameters '
3993 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
4243 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
3994 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
4244 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
3995 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
4245 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
3996 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
4246 { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
3997 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
4247 { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
3998 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
4248 { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
3999 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
4249 { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
4000 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
4250 { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
4001 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
4251 { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
4002 { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 18.*/
4252 { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
4003 { 14, 15, 15, 6, 3,256, ZSTD_btultra }, /* level 19.*/
4253 { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
4004 { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
4254 { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
4005 { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
4255 { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
4006 { 14, 15, 15, 10, 3,512, ZSTD_btultra }, /* level 22.*/
4256 { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
4007 },
4257 },
4008 };
4258 };
4009
4259
@@ -4022,8 +4272,8 b' ZSTD_compressionParameters ZSTD_getCPara'
4022 if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
4272 if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
4023 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
4273 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
4024 if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
4274 if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
4025 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
4275 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
4026
4276 }
4027 }
4277 }
4028
4278
4029 /*! ZSTD_getParams() :
4279 /*! ZSTD_getParams() :
@@ -48,12 +48,6 b' extern "C" {'
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
50
50
51 typedef enum {
52 ZSTD_dictDefaultAttach = 0,
53 ZSTD_dictForceAttach = 1,
54 ZSTD_dictForceCopy = -1,
55 } ZSTD_dictAttachPref_e;
56
57 typedef struct ZSTD_prefixDict_s {
51 typedef struct ZSTD_prefixDict_s {
58 const void* dict;
52 const void* dict;
59 size_t dictSize;
53 size_t dictSize;
@@ -96,10 +90,10 b' typedef enum { zop_dynamic=0, zop_predef'
96
90
97 typedef struct {
91 typedef struct {
98 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
92 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
99 U32* litFreq; /* table of literals statistics, of size 256 */
93 unsigned* litFreq; /* table of literals statistics, of size 256 */
100 U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
94 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
101 U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
95 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
102 U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
96 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
103 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
97 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
104 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
98 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
105
99
@@ -139,7 +133,7 b' struct ZSTD_matchState_t {'
139 U32* hashTable3;
133 U32* hashTable3;
140 U32* chainTable;
134 U32* chainTable;
141 optState_t opt; /* optimal parser state */
135 optState_t opt; /* optimal parser state */
142 const ZSTD_matchState_t *dictMatchState;
136 const ZSTD_matchState_t * dictMatchState;
143 ZSTD_compressionParameters cParams;
137 ZSTD_compressionParameters cParams;
144 };
138 };
145
139
@@ -167,7 +161,7 b' typedef struct {'
167 U32 hashLog; /* Log size of hashTable */
161 U32 hashLog; /* Log size of hashTable */
168 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
162 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
169 U32 minMatchLength; /* Minimum match length */
163 U32 minMatchLength; /* Minimum match length */
170 U32 hashEveryLog; /* Log number of entries to skip */
164 U32 hashRateLog; /* Log number of entries to skip */
171 U32 windowLog; /* Window log for the LDM */
165 U32 windowLog; /* Window log for the LDM */
172 } ldmParams_t;
166 } ldmParams_t;
173
167
@@ -196,9 +190,10 b' struct ZSTD_CCtx_params_s {'
196 ZSTD_dictAttachPref_e attachDictPref;
190 ZSTD_dictAttachPref_e attachDictPref;
197
191
198 /* Multithreading: used to pass parameters to mtctx */
192 /* Multithreading: used to pass parameters to mtctx */
199 unsigned nbWorkers;
193 int nbWorkers;
200 unsigned jobSize;
194 size_t jobSize;
201 unsigned overlapSizeLog;
195 int overlapLog;
196 int rsyncable;
202
197
203 /* Long distance matching parameters */
198 /* Long distance matching parameters */
204 ldmParams_t ldmParams;
199 ldmParams_t ldmParams;
@@ -498,6 +493,64 b' MEM_STATIC size_t ZSTD_hashPtr(const voi'
498 }
493 }
499 }
494 }
500
495
496 /** ZSTD_ipow() :
497 * Return base^exponent.
498 */
499 static U64 ZSTD_ipow(U64 base, U64 exponent)
500 {
501 U64 power = 1;
502 while (exponent) {
503 if (exponent & 1) power *= base;
504 exponent >>= 1;
505 base *= base;
506 }
507 return power;
508 }
509
510 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
511
512 /** ZSTD_rollingHash_append() :
513 * Add the buffer to the hash value.
514 */
515 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
516 {
517 BYTE const* istart = (BYTE const*)buf;
518 size_t pos;
519 for (pos = 0; pos < size; ++pos) {
520 hash *= prime8bytes;
521 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
522 }
523 return hash;
524 }
525
526 /** ZSTD_rollingHash_compute() :
527 * Compute the rolling hash value of the buffer.
528 */
529 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
530 {
531 return ZSTD_rollingHash_append(0, buf, size);
532 }
533
534 /** ZSTD_rollingHash_primePower() :
535 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
536 * over a window of length bytes.
537 */
538 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
539 {
540 return ZSTD_ipow(prime8bytes, length - 1);
541 }
542
543 /** ZSTD_rollingHash_rotate() :
544 * Rotate the rolling hash by one byte.
545 */
546 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
547 {
548 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
549 hash *= prime8bytes;
550 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
551 return hash;
552 }
553
501 /*-*************************************
554 /*-*************************************
502 * Round buffer management
555 * Round buffer management
503 ***************************************/
556 ***************************************/
@@ -626,20 +679,23 b' MEM_STATIC U32 ZSTD_window_correctOverfl'
626 * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
679 * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
627 * is below them. forceWindow and dictMatchState are therefore incompatible.
680 * is below them. forceWindow and dictMatchState are therefore incompatible.
628 */
681 */
629 MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
682 MEM_STATIC void
630 void const* srcEnd, U32 maxDist,
683 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
631 U32* loadedDictEndPtr,
684 void const* srcEnd,
632 const ZSTD_matchState_t** dictMatchStatePtr)
685 U32 maxDist,
686 U32* loadedDictEndPtr,
687 const ZSTD_matchState_t** dictMatchStatePtr)
633 {
688 {
634 U32 const current = (U32)((BYTE const*)srcEnd - window->base);
689 U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
635 U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
690 U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
636 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: current=%u, maxDist=%u", current, maxDist);
691 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
637 if (current > maxDist + loadedDictEnd) {
692 (unsigned)blockEndIdx, (unsigned)maxDist);
638 U32 const newLowLimit = current - maxDist;
693 if (blockEndIdx > maxDist + loadedDictEnd) {
694 U32 const newLowLimit = blockEndIdx - maxDist;
639 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
695 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
640 if (window->dictLimit < window->lowLimit) {
696 if (window->dictLimit < window->lowLimit) {
641 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
697 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
642 window->dictLimit, window->lowLimit);
698 (unsigned)window->dictLimit, (unsigned)window->lowLimit);
643 window->dictLimit = window->lowLimit;
699 window->dictLimit = window->lowLimit;
644 }
700 }
645 if (loadedDictEndPtr)
701 if (loadedDictEndPtr)
@@ -690,20 +746,23 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w'
690
746
691
747
692 /* debug functions */
748 /* debug functions */
749 #if (DEBUGLEVEL>=2)
693
750
694 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
751 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
695 {
752 {
696 U32 const fp_accuracy = 8;
753 U32 const fp_accuracy = 8;
697 U32 const fp_multiplier = (1 << fp_accuracy);
754 U32 const fp_multiplier = (1 << fp_accuracy);
698 U32 const stat = rawStat + 1;
755 U32 const newStat = rawStat + 1;
699 U32 const hb = ZSTD_highbit32(stat);
756 U32 const hb = ZSTD_highbit32(newStat);
700 U32 const BWeight = hb * fp_multiplier;
757 U32 const BWeight = hb * fp_multiplier;
701 U32 const FWeight = (stat << fp_accuracy) >> hb;
758 U32 const FWeight = (newStat << fp_accuracy) >> hb;
702 U32 const weight = BWeight + FWeight;
759 U32 const weight = BWeight + FWeight;
703 assert(hb + fp_accuracy < 31);
760 assert(hb + fp_accuracy < 31);
704 return (double)weight / fp_multiplier;
761 return (double)weight / fp_multiplier;
705 }
762 }
706
763
764 /* display a table content,
765 * listing each element, its frequency, and its predicted bit cost */
707 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
766 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
708 {
767 {
709 unsigned u, sum;
768 unsigned u, sum;
@@ -715,6 +774,9 b' MEM_STATIC void ZSTD_debugTable(const U3'
715 }
774 }
716 }
775 }
717
776
777 #endif
778
779
718 #if defined (__cplusplus)
780 #if defined (__cplusplus)
719 }
781 }
720 #endif
782 #endif
@@ -18,7 +18,7 b' void ZSTD_fillDoubleHashTable(ZSTD_match'
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
19 U32* const hashLarge = ms->hashTable;
19 U32* const hashLarge = ms->hashTable;
20 U32 const hBitsL = cParams->hashLog;
20 U32 const hBitsL = cParams->hashLog;
21 U32 const mls = cParams->searchLength;
21 U32 const mls = cParams->minMatch;
22 U32* const hashSmall = ms->chainTable;
22 U32* const hashSmall = ms->chainTable;
23 U32 const hBitsS = cParams->chainLog;
23 U32 const hBitsS = cParams->chainLog;
24 const BYTE* const base = ms->window.base;
24 const BYTE* const base = ms->window.base;
@@ -309,7 +309,7 b' size_t ZSTD_compressBlock_doubleFast('
309 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
309 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
310 void const* src, size_t srcSize)
310 void const* src, size_t srcSize)
311 {
311 {
312 const U32 mls = ms->cParams.searchLength;
312 const U32 mls = ms->cParams.minMatch;
313 switch(mls)
313 switch(mls)
314 {
314 {
315 default: /* includes case 3 */
315 default: /* includes case 3 */
@@ -329,7 +329,7 b' size_t ZSTD_compressBlock_doubleFast_dic'
329 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
329 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
330 void const* src, size_t srcSize)
330 void const* src, size_t srcSize)
331 {
331 {
332 const U32 mls = ms->cParams.searchLength;
332 const U32 mls = ms->cParams.minMatch;
333 switch(mls)
333 switch(mls)
334 {
334 {
335 default: /* includes case 3 */
335 default: /* includes case 3 */
@@ -483,7 +483,7 b' size_t ZSTD_compressBlock_doubleFast_ext'
483 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
483 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
484 void const* src, size_t srcSize)
484 void const* src, size_t srcSize)
485 {
485 {
486 U32 const mls = ms->cParams.searchLength;
486 U32 const mls = ms->cParams.minMatch;
487 switch(mls)
487 switch(mls)
488 {
488 {
489 default: /* includes case 3 */
489 default: /* includes case 3 */
@@ -18,7 +18,7 b' void ZSTD_fillHashTable(ZSTD_matchState_'
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
19 U32* const hashTable = ms->hashTable;
19 U32* const hashTable = ms->hashTable;
20 U32 const hBits = cParams->hashLog;
20 U32 const hBits = cParams->hashLog;
21 U32 const mls = cParams->searchLength;
21 U32 const mls = cParams->minMatch;
22 const BYTE* const base = ms->window.base;
22 const BYTE* const base = ms->window.base;
23 const BYTE* ip = base + ms->nextToUpdate;
23 const BYTE* ip = base + ms->nextToUpdate;
24 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
24 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
@@ -27,18 +27,18 b' void ZSTD_fillHashTable(ZSTD_matchState_'
27 /* Always insert every fastHashFillStep position into the hash table.
27 /* Always insert every fastHashFillStep position into the hash table.
28 * Insert the other positions if their hash entry is empty.
28 * Insert the other positions if their hash entry is empty.
29 */
29 */
30 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
30 for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
31 U32 const current = (U32)(ip - base);
31 U32 const current = (U32)(ip - base);
32 U32 i;
32 size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
33 for (i = 0; i < fastHashFillStep; ++i) {
33 hashTable[hash0] = current;
34 size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
34 if (dtlm == ZSTD_dtlm_fast) continue;
35 if (i == 0 || hashTable[hash] == 0)
35 /* Only load extra positions for ZSTD_dtlm_full */
36 hashTable[hash] = current + i;
36 { U32 p;
37 /* Only load extra positions for ZSTD_dtlm_full */
37 for (p = 1; p < fastHashFillStep; ++p) {
38 if (dtlm == ZSTD_dtlm_fast)
38 size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
39 break;
39 if (hashTable[hash] == 0) { /* not yet filled */
40 }
40 hashTable[hash] = current + p;
41 }
41 } } } }
42 }
42 }
43
43
44 FORCE_INLINE_TEMPLATE
44 FORCE_INLINE_TEMPLATE
@@ -235,7 +235,7 b' size_t ZSTD_compressBlock_fast('
235 void const* src, size_t srcSize)
235 void const* src, size_t srcSize)
236 {
236 {
237 ZSTD_compressionParameters const* cParams = &ms->cParams;
237 ZSTD_compressionParameters const* cParams = &ms->cParams;
238 U32 const mls = cParams->searchLength;
238 U32 const mls = cParams->minMatch;
239 assert(ms->dictMatchState == NULL);
239 assert(ms->dictMatchState == NULL);
240 switch(mls)
240 switch(mls)
241 {
241 {
@@ -256,7 +256,7 b' size_t ZSTD_compressBlock_fast_dictMatch'
256 void const* src, size_t srcSize)
256 void const* src, size_t srcSize)
257 {
257 {
258 ZSTD_compressionParameters const* cParams = &ms->cParams;
258 ZSTD_compressionParameters const* cParams = &ms->cParams;
259 U32 const mls = cParams->searchLength;
259 U32 const mls = cParams->minMatch;
260 assert(ms->dictMatchState != NULL);
260 assert(ms->dictMatchState != NULL);
261 switch(mls)
261 switch(mls)
262 {
262 {
@@ -375,7 +375,7 b' size_t ZSTD_compressBlock_fast_extDict('
375 void const* src, size_t srcSize)
375 void const* src, size_t srcSize)
376 {
376 {
377 ZSTD_compressionParameters const* cParams = &ms->cParams;
377 ZSTD_compressionParameters const* cParams = &ms->cParams;
378 U32 const mls = cParams->searchLength;
378 U32 const mls = cParams->minMatch;
379 switch(mls)
379 switch(mls)
380 {
380 {
381 default: /* includes case 3 */
381 default: /* includes case 3 */
@@ -63,12 +63,13 b' ZSTD_updateDUBT(ZSTD_matchState_t* ms,'
63 static void
63 static void
64 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
64 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65 U32 current, const BYTE* inputEnd,
65 U32 current, const BYTE* inputEnd,
66 U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
66 U32 nbCompares, U32 btLow,
67 const ZSTD_dictMode_e dictMode)
67 {
68 {
68 const ZSTD_compressionParameters* const cParams = &ms->cParams;
69 const ZSTD_compressionParameters* const cParams = &ms->cParams;
69 U32* const bt = ms->chainTable;
70 U32* const bt = ms->chainTable;
70 U32 const btLog = cParams->chainLog - 1;
71 U32 const btLog = cParams->chainLog - 1;
71 U32 const btMask = (1 << btLog) - 1;
72 U32 const btMask = (1 << btLog) - 1;
72 size_t commonLengthSmaller=0, commonLengthLarger=0;
73 size_t commonLengthSmaller=0, commonLengthLarger=0;
73 const BYTE* const base = ms->window.base;
74 const BYTE* const base = ms->window.base;
74 const BYTE* const dictBase = ms->window.dictBase;
75 const BYTE* const dictBase = ms->window.dictBase;
@@ -80,7 +81,7 b' ZSTD_insertDUBT1(ZSTD_matchState_t* ms,'
80 const BYTE* match;
81 const BYTE* match;
81 U32* smallerPtr = bt + 2*(current&btMask);
82 U32* smallerPtr = bt + 2*(current&btMask);
82 U32* largerPtr = smallerPtr + 1;
83 U32* largerPtr = smallerPtr + 1;
83 U32 matchIndex = *smallerPtr;
84 U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
84 U32 dummy32; /* to be nullified at the end */
85 U32 dummy32; /* to be nullified at the end */
85 U32 const windowLow = ms->window.lowLimit;
86 U32 const windowLow = ms->window.lowLimit;
86
87
@@ -93,6 +94,9 b' ZSTD_insertDUBT1(ZSTD_matchState_t* ms,'
93 U32* const nextPtr = bt + 2*(matchIndex & btMask);
94 U32* const nextPtr = bt + 2*(matchIndex & btMask);
94 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
95 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
95 assert(matchIndex < current);
96 assert(matchIndex < current);
97 /* note : all candidates are now supposed sorted,
98 * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
99 * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
96
100
97 if ( (dictMode != ZSTD_extDict)
101 if ( (dictMode != ZSTD_extDict)
98 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
102 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
@@ -108,7 +112,7 b' ZSTD_insertDUBT1(ZSTD_matchState_t* ms,'
108 match = dictBase + matchIndex;
112 match = dictBase + matchIndex;
109 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
113 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
110 if (matchIndex+matchLength >= dictLimit)
114 if (matchIndex+matchLength >= dictLimit)
111 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
115 match = base + matchIndex; /* preparation for next read of match[matchLength] */
112 }
116 }
113
117
114 DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
118 DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
@@ -147,6 +151,7 b' ZSTD_DUBT_findBetterDictMatch ('
147 ZSTD_matchState_t* ms,
151 ZSTD_matchState_t* ms,
148 const BYTE* const ip, const BYTE* const iend,
152 const BYTE* const ip, const BYTE* const iend,
149 size_t* offsetPtr,
153 size_t* offsetPtr,
154 size_t bestLength,
150 U32 nbCompares,
155 U32 nbCompares,
151 U32 const mls,
156 U32 const mls,
152 const ZSTD_dictMode_e dictMode)
157 const ZSTD_dictMode_e dictMode)
@@ -172,8 +177,7 b' ZSTD_DUBT_findBetterDictMatch ('
172 U32 const btMask = (1 << btLog) - 1;
177 U32 const btMask = (1 << btLog) - 1;
173 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
178 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
174
179
175 size_t commonLengthSmaller=0, commonLengthLarger=0, bestLength=0;
180 size_t commonLengthSmaller=0, commonLengthLarger=0;
176 U32 matchEndIdx = current+8+1;
177
181
178 (void)dictMode;
182 (void)dictMode;
179 assert(dictMode == ZSTD_dictMatchState);
183 assert(dictMode == ZSTD_dictMatchState);
@@ -188,10 +192,8 b' ZSTD_DUBT_findBetterDictMatch ('
188
192
189 if (matchLength > bestLength) {
193 if (matchLength > bestLength) {
190 U32 matchIndex = dictMatchIndex + dictIndexDelta;
194 U32 matchIndex = dictMatchIndex + dictIndexDelta;
191 if (matchLength > matchEndIdx - matchIndex)
192 matchEndIdx = matchIndex + (U32)matchLength;
193 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
195 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
194 DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
196 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
195 current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
197 current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
196 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
198 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
197 }
199 }
@@ -200,7 +202,6 b' ZSTD_DUBT_findBetterDictMatch ('
200 }
202 }
201 }
203 }
202
204
203 DEBUGLOG(2, "matchLength:%6zu, match:%p, prefixStart:%p, ip:%p", matchLength, match, prefixStart, ip);
204 if (match[matchLength] < ip[matchLength]) {
205 if (match[matchLength] < ip[matchLength]) {
205 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
206 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
206 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
207 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
@@ -215,7 +216,7 b' ZSTD_DUBT_findBetterDictMatch ('
215
216
216 if (bestLength >= MINMATCH) {
217 if (bestLength >= MINMATCH) {
217 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
218 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
218 DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
219 DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
219 current, (U32)bestLength, (U32)*offsetPtr, mIndex);
220 current, (U32)bestLength, (U32)*offsetPtr, mIndex);
220 }
221 }
221 return bestLength;
222 return bestLength;
@@ -261,7 +262,7 b' ZSTD_DUBT_findBestMatch(ZSTD_matchState_'
261 && (nbCandidates > 1) ) {
262 && (nbCandidates > 1) ) {
262 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
263 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
263 matchIndex);
264 matchIndex);
264 *unsortedMark = previousCandidate;
265 *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
265 previousCandidate = matchIndex;
266 previousCandidate = matchIndex;
266 matchIndex = *nextCandidate;
267 matchIndex = *nextCandidate;
267 nextCandidate = bt + 2*(matchIndex&btMask);
268 nextCandidate = bt + 2*(matchIndex&btMask);
@@ -269,11 +270,13 b' ZSTD_DUBT_findBestMatch(ZSTD_matchState_'
269 nbCandidates --;
270 nbCandidates --;
270 }
271 }
271
272
273 /* nullify last candidate if it's still unsorted
274 * simplification, detrimental to compression ratio, beneficial for speed */
272 if ( (matchIndex > unsortLimit)
275 if ( (matchIndex > unsortLimit)
273 && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
276 && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
274 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
277 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
275 matchIndex);
278 matchIndex);
276 *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
279 *nextCandidate = *unsortedMark = 0;
277 }
280 }
278
281
279 /* batch sort stacked candidates */
282 /* batch sort stacked candidates */
@@ -288,14 +291,14 b' ZSTD_DUBT_findBestMatch(ZSTD_matchState_'
288 }
291 }
289
292
290 /* find longest match */
293 /* find longest match */
291 { size_t commonLengthSmaller=0, commonLengthLarger=0;
294 { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
292 const BYTE* const dictBase = ms->window.dictBase;
295 const BYTE* const dictBase = ms->window.dictBase;
293 const U32 dictLimit = ms->window.dictLimit;
296 const U32 dictLimit = ms->window.dictLimit;
294 const BYTE* const dictEnd = dictBase + dictLimit;
297 const BYTE* const dictEnd = dictBase + dictLimit;
295 const BYTE* const prefixStart = base + dictLimit;
298 const BYTE* const prefixStart = base + dictLimit;
296 U32* smallerPtr = bt + 2*(current&btMask);
299 U32* smallerPtr = bt + 2*(current&btMask);
297 U32* largerPtr = bt + 2*(current&btMask) + 1;
300 U32* largerPtr = bt + 2*(current&btMask) + 1;
298 U32 matchEndIdx = current+8+1;
301 U32 matchEndIdx = current + 8 + 1;
299 U32 dummy32; /* to be nullified at the end */
302 U32 dummy32; /* to be nullified at the end */
300 size_t bestLength = 0;
303 size_t bestLength = 0;
301
304
@@ -323,6 +326,11 b' ZSTD_DUBT_findBestMatch(ZSTD_matchState_'
323 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
326 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
324 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
327 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
325 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
328 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
329 if (dictMode == ZSTD_dictMatchState) {
330 nbCompares = 0; /* in addition to avoiding checking any
331 * further in this loop, make sure we
332 * skip checking in the dictionary. */
333 }
326 break; /* drop, to guarantee consistency (miss a little bit of compression) */
334 break; /* drop, to guarantee consistency (miss a little bit of compression) */
327 }
335 }
328 }
336 }
@@ -346,7 +354,10 b' ZSTD_DUBT_findBestMatch(ZSTD_matchState_'
346 *smallerPtr = *largerPtr = 0;
354 *smallerPtr = *largerPtr = 0;
347
355
348 if (dictMode == ZSTD_dictMatchState && nbCompares) {
356 if (dictMode == ZSTD_dictMatchState && nbCompares) {
349 bestLength = ZSTD_DUBT_findBetterDictMatch(ms, ip, iend, offsetPtr, nbCompares, mls, dictMode);
357 bestLength = ZSTD_DUBT_findBetterDictMatch(
358 ms, ip, iend,
359 offsetPtr, bestLength, nbCompares,
360 mls, dictMode);
350 }
361 }
351
362
352 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
363 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
@@ -381,7 +392,7 b' ZSTD_BtFindBestMatch_selectMLS ( ZSTD_m'
381 const BYTE* ip, const BYTE* const iLimit,
392 const BYTE* ip, const BYTE* const iLimit,
382 size_t* offsetPtr)
393 size_t* offsetPtr)
383 {
394 {
384 switch(ms->cParams.searchLength)
395 switch(ms->cParams.minMatch)
385 {
396 {
386 default : /* includes case 3 */
397 default : /* includes case 3 */
387 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
398 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
@@ -397,7 +408,7 b' static size_t ZSTD_BtFindBestMatch_dictM'
397 const BYTE* ip, const BYTE* const iLimit,
408 const BYTE* ip, const BYTE* const iLimit,
398 size_t* offsetPtr)
409 size_t* offsetPtr)
399 {
410 {
400 switch(ms->cParams.searchLength)
411 switch(ms->cParams.minMatch)
401 {
412 {
402 default : /* includes case 3 */
413 default : /* includes case 3 */
403 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
414 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
@@ -413,7 +424,7 b' static size_t ZSTD_BtFindBestMatch_extDi'
413 const BYTE* ip, const BYTE* const iLimit,
424 const BYTE* ip, const BYTE* const iLimit,
414 size_t* offsetPtr)
425 size_t* offsetPtr)
415 {
426 {
416 switch(ms->cParams.searchLength)
427 switch(ms->cParams.minMatch)
417 {
428 {
418 default : /* includes case 3 */
429 default : /* includes case 3 */
419 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
430 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
@@ -428,7 +439,7 b' static size_t ZSTD_BtFindBestMatch_extDi'
428 /* *********************************
439 /* *********************************
429 * Hash Chain
440 * Hash Chain
430 ***********************************/
441 ***********************************/
431 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
442 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
432
443
433 /* Update chains up to ip (excluded)
444 /* Update chains up to ip (excluded)
434 Assumption : always within prefix (i.e. not within extDict) */
445 Assumption : always within prefix (i.e. not within extDict) */
@@ -458,7 +469,7 b' static U32 ZSTD_insertAndFindFirstIndex_'
458
469
459 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
470 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
460 const ZSTD_compressionParameters* const cParams = &ms->cParams;
471 const ZSTD_compressionParameters* const cParams = &ms->cParams;
461 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.searchLength);
472 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
462 }
473 }
463
474
464
475
@@ -492,6 +503,7 b' size_t ZSTD_HcFindBestMatch_generic ('
492 size_t currentMl=0;
503 size_t currentMl=0;
493 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
504 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
494 const BYTE* const match = base + matchIndex;
505 const BYTE* const match = base + matchIndex;
506 assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
495 if (match[ml] == ip[ml]) /* potentially better */
507 if (match[ml] == ip[ml]) /* potentially better */
496 currentMl = ZSTD_count(ip, match, iLimit);
508 currentMl = ZSTD_count(ip, match, iLimit);
497 } else {
509 } else {
@@ -554,7 +566,7 b' FORCE_INLINE_TEMPLATE size_t ZSTD_HcFind'
554 const BYTE* ip, const BYTE* const iLimit,
566 const BYTE* ip, const BYTE* const iLimit,
555 size_t* offsetPtr)
567 size_t* offsetPtr)
556 {
568 {
557 switch(ms->cParams.searchLength)
569 switch(ms->cParams.minMatch)
558 {
570 {
559 default : /* includes case 3 */
571 default : /* includes case 3 */
560 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
572 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
@@ -570,7 +582,7 b' static size_t ZSTD_HcFindBestMatch_dictM'
570 const BYTE* ip, const BYTE* const iLimit,
582 const BYTE* ip, const BYTE* const iLimit,
571 size_t* offsetPtr)
583 size_t* offsetPtr)
572 {
584 {
573 switch(ms->cParams.searchLength)
585 switch(ms->cParams.minMatch)
574 {
586 {
575 default : /* includes case 3 */
587 default : /* includes case 3 */
576 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
588 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
@@ -586,7 +598,7 b' FORCE_INLINE_TEMPLATE size_t ZSTD_HcFind'
586 const BYTE* ip, const BYTE* const iLimit,
598 const BYTE* ip, const BYTE* const iLimit,
587 size_t* offsetPtr)
599 size_t* offsetPtr)
588 {
600 {
589 switch(ms->cParams.searchLength)
601 switch(ms->cParams.minMatch)
590 {
602 {
591 default : /* includes case 3 */
603 default : /* includes case 3 */
592 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
604 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
@@ -37,8 +37,8 b' void ZSTD_ldm_adjustParameters(ldmParams'
37 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
37 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
38 assert(params->hashLog <= ZSTD_HASHLOG_MAX);
38 assert(params->hashLog <= ZSTD_HASHLOG_MAX);
39 }
39 }
40 if (params->hashEveryLog == 0) {
40 if (params->hashRateLog == 0) {
41 params->hashEveryLog = params->windowLog < params->hashLog
41 params->hashRateLog = params->windowLog < params->hashLog
42 ? 0
42 ? 0
43 : params->windowLog - params->hashLog;
43 : params->windowLog - params->hashLog;
44 }
44 }
@@ -119,20 +119,20 b' static void ZSTD_ldm_insertEntry(ldmStat'
119 *
119 *
120 * Gets the small hash, checksum, and tag from the rollingHash.
120 * Gets the small hash, checksum, and tag from the rollingHash.
121 *
121 *
122 * If the tag matches (1 << ldmParams.hashEveryLog)-1, then
122 * If the tag matches (1 << ldmParams.hashRateLog)-1, then
123 * creates an ldmEntry from the offset, and inserts it into the hash table.
123 * creates an ldmEntry from the offset, and inserts it into the hash table.
124 *
124 *
125 * hBits is the length of the small hash, which is the most significant hBits
125 * hBits is the length of the small hash, which is the most significant hBits
126 * of rollingHash. The checksum is the next 32 most significant bits, followed
126 * of rollingHash. The checksum is the next 32 most significant bits, followed
127 * by ldmParams.hashEveryLog bits that make up the tag. */
127 * by ldmParams.hashRateLog bits that make up the tag. */
128 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
128 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
129 U64 const rollingHash,
129 U64 const rollingHash,
130 U32 const hBits,
130 U32 const hBits,
131 U32 const offset,
131 U32 const offset,
132 ldmParams_t const ldmParams)
132 ldmParams_t const ldmParams)
133 {
133 {
134 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
134 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
135 U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
135 U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
136 if (tag == tagMask) {
136 if (tag == tagMask) {
137 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
137 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
138 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
138 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
@@ -143,56 +143,6 b' static void ZSTD_ldm_makeEntryAndInsertB'
143 }
143 }
144 }
144 }
145
145
146 /** ZSTD_ldm_getRollingHash() :
147 * Get a 64-bit hash using the first len bytes from buf.
148 *
149 * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
150 * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
151 *
152 * where the constant a is defined to be prime8bytes.
153 *
154 * The implementation adds an offset to each byte, so
155 * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
156 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
157 {
158 U64 ret = 0;
159 U32 i;
160 for (i = 0; i < len; i++) {
161 ret *= prime8bytes;
162 ret += buf[i] + LDM_HASH_CHAR_OFFSET;
163 }
164 return ret;
165 }
166
167 /** ZSTD_ldm_ipow() :
168 * Return base^exp. */
169 static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
170 {
171 U64 ret = 1;
172 while (exp) {
173 if (exp & 1) { ret *= base; }
174 exp >>= 1;
175 base *= base;
176 }
177 return ret;
178 }
179
180 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
181 DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
182 assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
183 return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
184 }
185
186 /** ZSTD_ldm_updateHash() :
187 * Updates hash by removing toRemove and adding toAdd. */
188 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
189 {
190 hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
191 hash *= prime8bytes;
192 hash += toAdd + LDM_HASH_CHAR_OFFSET;
193 return hash;
194 }
195
196 /** ZSTD_ldm_countBackwardsMatch() :
146 /** ZSTD_ldm_countBackwardsMatch() :
197 * Returns the number of bytes that match backwards before pIn and pMatch.
147 * Returns the number of bytes that match backwards before pIn and pMatch.
198 *
148 *
@@ -238,6 +188,7 b' static size_t ZSTD_ldm_fillFastTables(ZS'
238 case ZSTD_btlazy2:
188 case ZSTD_btlazy2:
239 case ZSTD_btopt:
189 case ZSTD_btopt:
240 case ZSTD_btultra:
190 case ZSTD_btultra:
191 case ZSTD_btultra2:
241 break;
192 break;
242 default:
193 default:
243 assert(0); /* not possible : not a valid strategy id */
194 assert(0); /* not possible : not a valid strategy id */
@@ -261,9 +212,9 b' static U64 ZSTD_ldm_fillLdmHashTable(ldm'
261 const BYTE* cur = lastHashed + 1;
212 const BYTE* cur = lastHashed + 1;
262
213
263 while (cur < iend) {
214 while (cur < iend) {
264 rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
215 rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
265 cur[ldmParams.minMatchLength-1],
216 cur[ldmParams.minMatchLength-1],
266 state->hashPower);
217 state->hashPower);
267 ZSTD_ldm_makeEntryAndInsertByTag(state,
218 ZSTD_ldm_makeEntryAndInsertByTag(state,
268 rollingHash, hBits,
219 rollingHash, hBits,
269 (U32)(cur - base), ldmParams);
220 (U32)(cur - base), ldmParams);
@@ -297,8 +248,8 b' static size_t ZSTD_ldm_generateSequences'
297 U64 const hashPower = ldmState->hashPower;
248 U64 const hashPower = ldmState->hashPower;
298 U32 const hBits = params->hashLog - params->bucketSizeLog;
249 U32 const hBits = params->hashLog - params->bucketSizeLog;
299 U32 const ldmBucketSize = 1U << params->bucketSizeLog;
250 U32 const ldmBucketSize = 1U << params->bucketSizeLog;
300 U32 const hashEveryLog = params->hashEveryLog;
251 U32 const hashRateLog = params->hashRateLog;
301 U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
252 U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
302 /* Prefix and extDict parameters */
253 /* Prefix and extDict parameters */
303 U32 const dictLimit = ldmState->window.dictLimit;
254 U32 const dictLimit = ldmState->window.dictLimit;
304 U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
255 U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@@ -324,16 +275,16 b' static size_t ZSTD_ldm_generateSequences'
324 size_t forwardMatchLength = 0, backwardMatchLength = 0;
275 size_t forwardMatchLength = 0, backwardMatchLength = 0;
325 ldmEntry_t* bestEntry = NULL;
276 ldmEntry_t* bestEntry = NULL;
326 if (ip != istart) {
277 if (ip != istart) {
327 rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
278 rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
328 lastHashed[minMatchLength],
279 lastHashed[minMatchLength],
329 hashPower);
280 hashPower);
330 } else {
281 } else {
331 rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
282 rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
332 }
283 }
333 lastHashed = ip;
284 lastHashed = ip;
334
285
335 /* Do not insert and do not look for a match */
286 /* Do not insert and do not look for a match */
336 if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
287 if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
337 ip++;
288 ip++;
338 continue;
289 continue;
339 }
290 }
@@ -593,7 +544,7 b' size_t ZSTD_ldm_blockCompress(rawSeqStor'
593 void const* src, size_t srcSize)
544 void const* src, size_t srcSize)
594 {
545 {
595 const ZSTD_compressionParameters* const cParams = &ms->cParams;
546 const ZSTD_compressionParameters* const cParams = &ms->cParams;
596 unsigned const minMatch = cParams->searchLength;
547 unsigned const minMatch = cParams->minMatch;
597 ZSTD_blockCompressor const blockCompressor =
548 ZSTD_blockCompressor const blockCompressor =
598 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
549 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
599 /* Input bounds */
550 /* Input bounds */
@@ -21,7 +21,7 b' extern "C" {'
21 * Long distance matching
21 * Long distance matching
22 ***************************************/
22 ***************************************/
23
23
24 #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX
24 #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
25
25
26 /**
26 /**
27 * ZSTD_ldm_generateSequences():
27 * ZSTD_ldm_generateSequences():
@@ -86,12 +86,8 b' size_t ZSTD_ldm_getTableSize(ldmParams_t'
86 */
86 */
87 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
87 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
88
88
89 /** ZSTD_ldm_getTableSize() :
90 * Return prime8bytes^(minMatchLength-1) */
91 U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
92
93 /** ZSTD_ldm_adjustParameters() :
89 /** ZSTD_ldm_adjustParameters() :
94 * If the params->hashEveryLog is not set, set it to its default value based on
90 * If the params->hashRateLog is not set, set it to its default value based on
95 * windowLog and params->hashLog.
91 * windowLog and params->hashLog.
96 *
92 *
97 * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
93 * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
@@ -17,6 +17,8 b''
17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18 #define ZSTD_MAX_PRICE (1<<30)
18 #define ZSTD_MAX_PRICE (1<<30)
19
19
20 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
21
20
22
21 /*-*************************************
23 /*-*************************************
22 * Price functions for optimal parser
24 * Price functions for optimal parser
@@ -52,11 +54,15 b' MEM_STATIC U32 ZSTD_fracWeight(U32 rawSt'
52 return weight;
54 return weight;
53 }
55 }
54
56
55 /* debugging function, @return price in bytes */
57 #if (DEBUGLEVEL>=2)
58 /* debugging function,
59 * @return price in bytes as fractional value
60 * for debug messages only */
56 MEM_STATIC double ZSTD_fCost(U32 price)
61 MEM_STATIC double ZSTD_fCost(U32 price)
57 {
62 {
58 return (double)price / (BITCOST_MULTIPLIER*8);
63 return (double)price / (BITCOST_MULTIPLIER*8);
59 }
64 }
65 #endif
60
66
61 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
67 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
62 {
68 {
@@ -67,29 +73,44 b' static void ZSTD_setBasePrices(optState_'
67 }
73 }
68
74
69
75
70 static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
76 /* ZSTD_downscaleStat() :
77 * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
78 * return the resulting sum of elements */
79 static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
71 {
80 {
72 U32 s, sum=0;
81 U32 s, sum=0;
82 DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
73 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
83 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
74 for (s=0; s<=lastEltIndex; s++) {
84 for (s=0; s<lastEltIndex+1; s++) {
75 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
85 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
76 sum += table[s];
86 sum += table[s];
77 }
87 }
78 return sum;
88 return sum;
79 }
89 }
80
90
81 static void ZSTD_rescaleFreqs(optState_t* const optPtr,
91 /* ZSTD_rescaleFreqs() :
82 const BYTE* const src, size_t const srcSize,
92 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
83 int optLevel)
93 * take hints from dictionary if there is one
94 * or init from zero, using src for literals stats, or flat 1 for match symbols
95 * otherwise downscale existing stats, to be used as seed for next block.
96 */
97 static void
98 ZSTD_rescaleFreqs(optState_t* const optPtr,
99 const BYTE* const src, size_t const srcSize,
100 int const optLevel)
84 {
101 {
102 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
85 optPtr->priceType = zop_dynamic;
103 optPtr->priceType = zop_dynamic;
86
104
87 if (optPtr->litLengthSum == 0) { /* first block : init */
105 if (optPtr->litLengthSum == 0) { /* first block : init */
88 if (srcSize <= 1024) /* heuristic */
106 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
107 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
89 optPtr->priceType = zop_predef;
108 optPtr->priceType = zop_predef;
109 }
90
110
91 assert(optPtr->symbolCosts != NULL);
111 assert(optPtr->symbolCosts != NULL);
92 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
112 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
113 /* huffman table presumed generated by dictionary */
93 optPtr->priceType = zop_dynamic;
114 optPtr->priceType = zop_dynamic;
94
115
95 assert(optPtr->litFreq != NULL);
116 assert(optPtr->litFreq != NULL);
@@ -208,7 +229,9 b' static U32 ZSTD_litLengthPrice(U32 const'
208
229
209 /* dynamic statistics */
230 /* dynamic statistics */
210 { U32 const llCode = ZSTD_LLcode(litLength);
231 { U32 const llCode = ZSTD_LLcode(litLength);
211 return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
232 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
233 + optPtr->litLengthSumBasePrice
234 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
212 }
235 }
213 }
236 }
214
237
@@ -253,7 +276,7 b' static int ZSTD_literalsContribution(con'
253 FORCE_INLINE_TEMPLATE U32
276 FORCE_INLINE_TEMPLATE U32
254 ZSTD_getMatchPrice(U32 const offset,
277 ZSTD_getMatchPrice(U32 const offset,
255 U32 const matchLength,
278 U32 const matchLength,
256 const optState_t* const optPtr,
279 const optState_t* const optPtr,
257 int const optLevel)
280 int const optLevel)
258 {
281 {
259 U32 price;
282 U32 price;
@@ -385,7 +408,6 b' static U32 ZSTD_insertBt1('
385 U32* largerPtr = smallerPtr + 1;
408 U32* largerPtr = smallerPtr + 1;
386 U32 dummy32; /* to be nullified at the end */
409 U32 dummy32; /* to be nullified at the end */
387 U32 const windowLow = ms->window.lowLimit;
410 U32 const windowLow = ms->window.lowLimit;
388 U32 const matchLow = windowLow ? windowLow : 1;
389 U32 matchEndIdx = current+8+1;
411 U32 matchEndIdx = current+8+1;
390 size_t bestLength = 8;
412 size_t bestLength = 8;
391 U32 nbCompares = 1U << cParams->searchLog;
413 U32 nbCompares = 1U << cParams->searchLog;
@@ -401,7 +423,8 b' static U32 ZSTD_insertBt1('
401 assert(ip <= iend-8); /* required for h calculation */
423 assert(ip <= iend-8); /* required for h calculation */
402 hashTable[h] = current; /* Update Hash Table */
424 hashTable[h] = current; /* Update Hash Table */
403
425
404 while (nbCompares-- && (matchIndex >= matchLow)) {
426 assert(windowLow > 0);
427 while (nbCompares-- && (matchIndex >= windowLow)) {
405 U32* const nextPtr = bt + 2*(matchIndex & btMask);
428 U32* const nextPtr = bt + 2*(matchIndex & btMask);
406 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
429 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
407 assert(matchIndex < current);
430 assert(matchIndex < current);
@@ -479,7 +502,7 b' void ZSTD_updateTree_internal('
479 const BYTE* const base = ms->window.base;
502 const BYTE* const base = ms->window.base;
480 U32 const target = (U32)(ip - base);
503 U32 const target = (U32)(ip - base);
481 U32 idx = ms->nextToUpdate;
504 U32 idx = ms->nextToUpdate;
482 DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
505 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
483 idx, target, dictMode);
506 idx, target, dictMode);
484
507
485 while(idx < target)
508 while(idx < target)
@@ -488,15 +511,18 b' void ZSTD_updateTree_internal('
488 }
511 }
489
512
490 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
513 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
491 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict);
514 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
492 }
515 }
493
516
494 FORCE_INLINE_TEMPLATE
517 FORCE_INLINE_TEMPLATE
495 U32 ZSTD_insertBtAndGetAllMatches (
518 U32 ZSTD_insertBtAndGetAllMatches (
496 ZSTD_matchState_t* ms,
519 ZSTD_matchState_t* ms,
497 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
520 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
498 U32 rep[ZSTD_REP_NUM], U32 const ll0,
521 U32 rep[ZSTD_REP_NUM],
499 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
522 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
523 ZSTD_match_t* matches,
524 const U32 lengthToBeat,
525 U32 const mls /* template */)
500 {
526 {
501 const ZSTD_compressionParameters* const cParams = &ms->cParams;
527 const ZSTD_compressionParameters* const cParams = &ms->cParams;
502 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
528 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@@ -542,6 +568,7 b' U32 ZSTD_insertBtAndGetAllMatches ('
542 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
568 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
543
569
544 /* check repCode */
570 /* check repCode */
571 assert(ll0 <= 1); /* necessarily 1 or 0 */
545 { U32 const lastR = ZSTD_REP_NUM + ll0;
572 { U32 const lastR = ZSTD_REP_NUM + ll0;
546 U32 repCode;
573 U32 repCode;
547 for (repCode = ll0; repCode < lastR; repCode++) {
574 for (repCode = ll0; repCode < lastR; repCode++) {
@@ -724,7 +751,7 b' FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllM'
724 ZSTD_match_t* matches, U32 const lengthToBeat)
751 ZSTD_match_t* matches, U32 const lengthToBeat)
725 {
752 {
726 const ZSTD_compressionParameters* const cParams = &ms->cParams;
753 const ZSTD_compressionParameters* const cParams = &ms->cParams;
727 U32 const matchLengthSearch = cParams->searchLength;
754 U32 const matchLengthSearch = cParams->minMatch;
728 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
755 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
729 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
756 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
730 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
757 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
@@ -774,12 +801,30 b' static U32 ZSTD_totalLen(ZSTD_optimal_t '
774 return sol.litlen + sol.mlen;
801 return sol.litlen + sol.mlen;
775 }
802 }
776
803
804 #if 0 /* debug */
805
806 static void
807 listStats(const U32* table, int lastEltID)
808 {
809 int const nbElts = lastEltID + 1;
810 int enb;
811 for (enb=0; enb < nbElts; enb++) {
812 (void)table;
813 //RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
814 RAWLOG(2, "%4i,", table[enb]);
815 }
816 RAWLOG(2, " \n");
817 }
818
819 #endif
820
777 FORCE_INLINE_TEMPLATE size_t
821 FORCE_INLINE_TEMPLATE size_t
778 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
822 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
779 seqStore_t* seqStore,
823 seqStore_t* seqStore,
780 U32 rep[ZSTD_REP_NUM],
824 U32 rep[ZSTD_REP_NUM],
781 const void* src, size_t srcSize,
825 const void* src, size_t srcSize,
782 const int optLevel, const ZSTD_dictMode_e dictMode)
826 const int optLevel,
827 const ZSTD_dictMode_e dictMode)
783 {
828 {
784 optState_t* const optStatePtr = &ms->opt;
829 optState_t* const optStatePtr = &ms->opt;
785 const BYTE* const istart = (const BYTE*)src;
830 const BYTE* const istart = (const BYTE*)src;
@@ -792,14 +837,15 b' ZSTD_compressBlock_opt_generic(ZSTD_matc'
792 const ZSTD_compressionParameters* const cParams = &ms->cParams;
837 const ZSTD_compressionParameters* const cParams = &ms->cParams;
793
838
794 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
839 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
795 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
840 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
796
841
797 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
842 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
798 ZSTD_match_t* const matches = optStatePtr->matchTable;
843 ZSTD_match_t* const matches = optStatePtr->matchTable;
799 ZSTD_optimal_t lastSequence;
844 ZSTD_optimal_t lastSequence;
800
845
801 /* init */
846 /* init */
802 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
847 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
848 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
803 assert(optLevel <= 2);
849 assert(optLevel <= 2);
804 ms->nextToUpdate3 = ms->nextToUpdate;
850 ms->nextToUpdate3 = ms->nextToUpdate;
805 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
851 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
@@ -999,7 +1045,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc'
999 U32 const offCode = opt[storePos].off;
1045 U32 const offCode = opt[storePos].off;
1000 U32 const advance = llen + mlen;
1046 U32 const advance = llen + mlen;
1001 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1047 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1002 anchor - istart, llen, mlen);
1048 anchor - istart, (unsigned)llen, (unsigned)mlen);
1003
1049
1004 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1050 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1005 assert(storePos == storeEnd); /* must be last sequence */
1051 assert(storePos == storeEnd); /* must be last sequence */
@@ -1047,11 +1093,11 b' size_t ZSTD_compressBlock_btopt('
1047
1093
1048
1094
1049 /* used in 2-pass strategy */
1095 /* used in 2-pass strategy */
1050 static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
1096 static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1051 {
1097 {
1052 U32 s, sum=0;
1098 U32 s, sum=0;
1053 assert(ZSTD_FREQ_DIV+bonus > 0);
1099 assert(ZSTD_FREQ_DIV+bonus >= 0);
1054 for (s=0; s<=lastEltIndex; s++) {
1100 for (s=0; s<lastEltIndex+1; s++) {
1055 table[s] <<= ZSTD_FREQ_DIV+bonus;
1101 table[s] <<= ZSTD_FREQ_DIV+bonus;
1056 table[s]--;
1102 table[s]--;
1057 sum += table[s];
1103 sum += table[s];
@@ -1063,9 +1109,43 b' static U32 ZSTD_upscaleStat(U32* table, '
1063 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1109 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1064 {
1110 {
1065 optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1111 optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1066 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
1112 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1067 optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
1113 optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1068 optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
1114 optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1115 }
1116
1117 /* ZSTD_initStats_ultra():
1118 * make a first compression pass, just to seed stats with more accurate starting values.
1119 * only works on first block, with no dictionary and no ldm.
1120 * this function cannot error, hence its constract must be respected.
1121 */
1122 static void
1123 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1124 seqStore_t* seqStore,
1125 U32 rep[ZSTD_REP_NUM],
1126 const void* src, size_t srcSize)
1127 {
1128 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1129 memcpy(tmpRep, rep, sizeof(tmpRep));
1130
1131 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1132 assert(ms->opt.litLengthSum == 0); /* first block */
1133 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1134 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1135 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1136
1137 ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1138
1139 /* invalidate first scan from history */
1140 ZSTD_resetSeqStore(seqStore);
1141 ms->window.base -= srcSize;
1142 ms->window.dictLimit += (U32)srcSize;
1143 ms->window.lowLimit = ms->window.dictLimit;
1144 ms->nextToUpdate = ms->window.dictLimit;
1145 ms->nextToUpdate3 = ms->window.dictLimit;
1146
1147 /* re-inforce weight of collected statistics */
1148 ZSTD_upscaleStats(&ms->opt);
1069 }
1149 }
1070
1150
1071 size_t ZSTD_compressBlock_btultra(
1151 size_t ZSTD_compressBlock_btultra(
@@ -1073,33 +1153,34 b' size_t ZSTD_compressBlock_btultra('
1073 const void* src, size_t srcSize)
1153 const void* src, size_t srcSize)
1074 {
1154 {
1075 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1155 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1076 #if 0
1156 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1077 /* 2-pass strategy (disabled)
1157 }
1158
1159 size_t ZSTD_compressBlock_btultra2(
1160 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1161 const void* src, size_t srcSize)
1162 {
1163 U32 const current = (U32)((const BYTE*)src - ms->window.base);
1164 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1165
1166 /* 2-pass strategy:
1078 * this strategy makes a first pass over first block to collect statistics
1167 * this strategy makes a first pass over first block to collect statistics
1079 * and seed next round's statistics with it.
1168 * and seed next round's statistics with it.
1169 * After 1st pass, function forgets everything, and starts a new block.
1170 * Consequently, this can only work if no data has been previously loaded in tables,
1171 * aka, no dictionary, no prefix, no ldm preprocessing.
1080 * The compression ratio gain is generally small (~0.5% on first block),
1172 * The compression ratio gain is generally small (~0.5% on first block),
1081 * the cost is 2x cpu time on first block. */
1173 * the cost is 2x cpu time on first block. */
1082 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1174 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1083 if ( (ms->opt.litLengthSum==0) /* first block */
1175 if ( (ms->opt.litLengthSum==0) /* first block */
1084 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1176 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1085 && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
1177 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1086 U32 tmpRep[ZSTD_REP_NUM];
1178 && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1087 DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
1179 && (srcSize > ZSTD_PREDEF_THRESHOLD)
1088 assert(ms->nextToUpdate >= ms->window.dictLimit
1180 ) {
1089 && ms->nextToUpdate <= ms->window.dictLimit + 1);
1181 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1090 memcpy(tmpRep, rep, sizeof(tmpRep));
1091 ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1092 ZSTD_resetSeqStore(seqStore);
1093 /* invalidate first scan from history */
1094 ms->window.base -= srcSize;
1095 ms->window.dictLimit += (U32)srcSize;
1096 ms->window.lowLimit = ms->window.dictLimit;
1097 ms->nextToUpdate = ms->window.dictLimit;
1098 ms->nextToUpdate3 = ms->window.dictLimit;
1099 /* re-inforce weight of collected statistics */
1100 ZSTD_upscaleStats(&ms->opt);
1101 }
1182 }
1102 #endif
1183
1103 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1184 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1104 }
1185 }
1105
1186
@@ -1130,3 +1211,7 b' size_t ZSTD_compressBlock_btultra_extDic'
1130 {
1211 {
1131 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1212 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1132 }
1213 }
1214
1215 /* note : no btultra2 variant for extDict nor dictMatchState,
1216 * because btultra2 is not meant to work with dictionaries
1217 * and is only specific for the first block (no prefix) */
@@ -26,6 +26,10 b' size_t ZSTD_compressBlock_btopt('
26 size_t ZSTD_compressBlock_btultra(
26 size_t ZSTD_compressBlock_btultra(
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 void const* src, size_t srcSize);
28 void const* src, size_t srcSize);
29 size_t ZSTD_compressBlock_btultra2(
30 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31 void const* src, size_t srcSize);
32
29
33
30 size_t ZSTD_compressBlock_btopt_dictMatchState(
34 size_t ZSTD_compressBlock_btopt_dictMatchState(
31 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
35 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -41,6 +45,10 b' size_t ZSTD_compressBlock_btultra_extDic'
41 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
45 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42 void const* src, size_t srcSize);
46 void const* src, size_t srcSize);
43
47
48 /* note : no btultra2 variant for extDict nor dictMatchState,
49 * because btultra2 is not meant to work with dictionaries
50 * and is only specific for the first block (no prefix) */
51
44 #if defined (__cplusplus)
52 #if defined (__cplusplus)
45 }
53 }
46 #endif
54 #endif
@@ -9,21 +9,19 b''
9 */
9 */
10
10
11
11
12 /* ====== Tuning parameters ====== */
13 #define ZSTDMT_NBWORKERS_MAX 200
14 #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
15 #define ZSTDMT_OVERLAPLOG_DEFAULT 6
16
17
18 /* ====== Compiler specifics ====== */
12 /* ====== Compiler specifics ====== */
19 #if defined(_MSC_VER)
13 #if defined(_MSC_VER)
20 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
14 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
21 #endif
15 #endif
22
16
23
17
18 /* ====== Constants ====== */
19 #define ZSTDMT_OVERLAPLOG_DEFAULT 0
20
21
24 /* ====== Dependencies ====== */
22 /* ====== Dependencies ====== */
25 #include <string.h> /* memcpy, memset */
23 #include <string.h> /* memcpy, memset */
26 #include <limits.h> /* INT_MAX */
24 #include <limits.h> /* INT_MAX, UINT_MAX */
27 #include "pool.h" /* threadpool */
25 #include "pool.h" /* threadpool */
28 #include "threading.h" /* mutex */
26 #include "threading.h" /* mutex */
29 #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27 #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
@@ -57,9 +55,9 b' static unsigned long long GetCurrentCloc'
57 static clock_t _ticksPerSecond = 0;
55 static clock_t _ticksPerSecond = 0;
58 if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
56 if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
59
57
60 { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
58 { struct tms junk; clock_t newTicks = (clock_t) times(&junk);
61 return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
59 return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
62 }
60 } }
63
61
64 #define MUTEX_WAIT_TIME_DLEVEL 6
62 #define MUTEX_WAIT_TIME_DLEVEL 6
65 #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
63 #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
@@ -342,8 +340,8 b' static ZSTDMT_seqPool* ZSTDMT_expandSeqP'
342
340
343 typedef struct {
341 typedef struct {
344 ZSTD_pthread_mutex_t poolMutex;
342 ZSTD_pthread_mutex_t poolMutex;
345 unsigned totalCCtx;
343 int totalCCtx;
346 unsigned availCCtx;
344 int availCCtx;
347 ZSTD_customMem cMem;
345 ZSTD_customMem cMem;
348 ZSTD_CCtx* cctx[1]; /* variable size */
346 ZSTD_CCtx* cctx[1]; /* variable size */
349 } ZSTDMT_CCtxPool;
347 } ZSTDMT_CCtxPool;
@@ -351,16 +349,16 b' typedef struct {'
351 /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
349 /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
352 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
350 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
353 {
351 {
354 unsigned u;
352 int cid;
355 for (u=0; u<pool->totalCCtx; u++)
353 for (cid=0; cid<pool->totalCCtx; cid++)
356 ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
354 ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
357 ZSTD_pthread_mutex_destroy(&pool->poolMutex);
355 ZSTD_pthread_mutex_destroy(&pool->poolMutex);
358 ZSTD_free(pool, pool->cMem);
356 ZSTD_free(pool, pool->cMem);
359 }
357 }
360
358
361 /* ZSTDMT_createCCtxPool() :
359 /* ZSTDMT_createCCtxPool() :
362 * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
360 * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
363 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
361 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
364 ZSTD_customMem cMem)
362 ZSTD_customMem cMem)
365 {
363 {
366 ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
364 ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
@@ -381,7 +379,7 b' static ZSTDMT_CCtxPool* ZSTDMT_createCCt'
381 }
379 }
382
380
383 static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
381 static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
384 unsigned nbWorkers)
382 int nbWorkers)
385 {
383 {
386 if (srcPool==NULL) return NULL;
384 if (srcPool==NULL) return NULL;
387 if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
385 if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
@@ -469,9 +467,9 b' static int ZSTDMT_serialState_reset(seri'
469 DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
467 DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
470 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
468 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
471 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
469 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
472 assert(params.ldmParams.hashEveryLog < 32);
470 assert(params.ldmParams.hashRateLog < 32);
473 serialState->ldmState.hashPower =
471 serialState->ldmState.hashPower =
474 ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
472 ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
475 } else {
473 } else {
476 memset(&params.ldmParams, 0, sizeof(params.ldmParams));
474 memset(&params.ldmParams, 0, sizeof(params.ldmParams));
477 }
475 }
@@ -674,7 +672,7 b' static void ZSTDMT_compressionJob(void* '
674 if (ZSTD_isError(initError)) JOB_ERROR(initError);
672 if (ZSTD_isError(initError)) JOB_ERROR(initError);
675 } else { /* srcStart points at reloaded section */
673 } else { /* srcStart points at reloaded section */
676 U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
674 U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
677 { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
675 { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
678 if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
676 if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
679 }
677 }
680 { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
678 { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
@@ -777,6 +775,14 b' typedef struct {'
777
775
778 static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
776 static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
779
777
778 #define RSYNC_LENGTH 32
779
780 typedef struct {
781 U64 hash;
782 U64 hitMask;
783 U64 primePower;
784 } rsyncState_t;
785
780 struct ZSTDMT_CCtx_s {
786 struct ZSTDMT_CCtx_s {
781 POOL_ctx* factory;
787 POOL_ctx* factory;
782 ZSTDMT_jobDescription* jobs;
788 ZSTDMT_jobDescription* jobs;
@@ -790,6 +796,7 b' struct ZSTDMT_CCtx_s {'
790 inBuff_t inBuff;
796 inBuff_t inBuff;
791 roundBuff_t roundBuff;
797 roundBuff_t roundBuff;
792 serialState_t serial;
798 serialState_t serial;
799 rsyncState_t rsync;
793 unsigned singleBlockingThread;
800 unsigned singleBlockingThread;
794 unsigned jobIDMask;
801 unsigned jobIDMask;
795 unsigned doneJobID;
802 unsigned doneJobID;
@@ -859,7 +866,7 b' size_t ZSTDMT_CCtxParam_setNbWorkers(ZST'
859 {
866 {
860 if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
867 if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
861 params->nbWorkers = nbWorkers;
868 params->nbWorkers = nbWorkers;
862 params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
869 params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
863 params->jobSize = 0;
870 params->jobSize = 0;
864 return nbWorkers;
871 return nbWorkers;
865 }
872 }
@@ -969,52 +976,59 b' size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* m'
969 }
976 }
970
977
971 /* Internal only */
978 /* Internal only */
972 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
979 size_t
973 ZSTDMT_parameter parameter, unsigned value) {
980 ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
981 ZSTDMT_parameter parameter,
982 int value)
983 {
974 DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
984 DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
975 switch(parameter)
985 switch(parameter)
976 {
986 {
977 case ZSTDMT_p_jobSize :
987 case ZSTDMT_p_jobSize :
978 DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
988 DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
979 if ( (value > 0) /* value==0 => automatic job size */
989 if ( value != 0 /* default */
980 & (value < ZSTDMT_JOBSIZE_MIN) )
990 && value < ZSTDMT_JOBSIZE_MIN)
981 value = ZSTDMT_JOBSIZE_MIN;
991 value = ZSTDMT_JOBSIZE_MIN;
982 if (value > ZSTDMT_JOBSIZE_MAX)
992 assert(value >= 0);
983 value = ZSTDMT_JOBSIZE_MAX;
993 if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
984 params->jobSize = value;
994 params->jobSize = value;
985 return value;
995 return value;
986 case ZSTDMT_p_overlapSectionLog :
996
987 if (value > 9) value = 9;
997 case ZSTDMT_p_overlapLog :
988 DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
998 DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
989 params->overlapSizeLog = (value >= 9) ? 9 : value;
999 if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
1000 if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
1001 params->overlapLog = value;
990 return value;
1002 return value;
1003
1004 case ZSTDMT_p_rsyncable :
1005 value = (value != 0);
1006 params->rsyncable = value;
1007 return value;
1008
991 default :
1009 default :
992 return ERROR(parameter_unsupported);
1010 return ERROR(parameter_unsupported);
993 }
1011 }
994 }
1012 }
995
1013
996 size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
1014 size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
997 {
1015 {
998 DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
1016 DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
999 switch(parameter)
1017 return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1000 {
1001 case ZSTDMT_p_jobSize :
1002 return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1003 case ZSTDMT_p_overlapSectionLog :
1004 return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
1005 default :
1006 return ERROR(parameter_unsupported);
1007 }
1008 }
1018 }
1009
1019
1010 size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
1020 size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
1011 {
1021 {
1012 switch (parameter) {
1022 switch (parameter) {
1013 case ZSTDMT_p_jobSize:
1023 case ZSTDMT_p_jobSize:
1014 *value = mtctx->params.jobSize;
1024 assert(mtctx->params.jobSize <= INT_MAX);
1025 *value = (int)(mtctx->params.jobSize);
1015 break;
1026 break;
1016 case ZSTDMT_p_overlapSectionLog:
1027 case ZSTDMT_p_overlapLog:
1017 *value = mtctx->params.overlapSizeLog;
1028 *value = mtctx->params.overlapLog;
1029 break;
1030 case ZSTDMT_p_rsyncable:
1031 *value = mtctx->params.rsyncable;
1018 break;
1032 break;
1019 default:
1033 default:
1020 return ERROR(parameter_unsupported);
1034 return ERROR(parameter_unsupported);
@@ -1140,22 +1154,66 b' size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt'
1140 /* ===== Multi-threaded compression ===== */
1154 /* ===== Multi-threaded compression ===== */
1141 /* ------------------------------------------ */
1155 /* ------------------------------------------ */
1142
1156
1143 static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1157 static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1144 {
1158 {
1145 if (params.ldmParams.enableLdm)
1159 if (params.ldmParams.enableLdm)
1160 /* In Long Range Mode, the windowLog is typically oversized.
1161 * In which case, it's preferable to determine the jobSize
1162 * based on chainLog instead. */
1146 return MAX(21, params.cParams.chainLog + 4);
1163 return MAX(21, params.cParams.chainLog + 4);
1147 return MAX(20, params.cParams.windowLog + 2);
1164 return MAX(20, params.cParams.windowLog + 2);
1148 }
1165 }
1149
1166
1150 static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
1167 static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
1151 {
1168 {
1152 unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
1169 switch(strat)
1153 if (params.ldmParams.enableLdm)
1170 {
1154 return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog);
1171 case ZSTD_btultra2:
1155 return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog);
1172 return 9;
1173 case ZSTD_btultra:
1174 case ZSTD_btopt:
1175 return 8;
1176 case ZSTD_btlazy2:
1177 case ZSTD_lazy2:
1178 return 7;
1179 case ZSTD_lazy:
1180 case ZSTD_greedy:
1181 case ZSTD_dfast:
1182 case ZSTD_fast:
1183 default:;
1184 }
1185 return 6;
1156 }
1186 }
1157
1187
1158 static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
1188 static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1189 {
1190 assert(0 <= ovlog && ovlog <= 9);
1191 if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
1192 return ovlog;
1193 }
1194
1195 static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1196 {
1197 int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1198 int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1199 assert(0 <= overlapRLog && overlapRLog <= 8);
1200 if (params.ldmParams.enableLdm) {
1201 /* In Long Range Mode, the windowLog is typically oversized.
1202 * In which case, it's preferable to determine the jobSize
1203 * based on chainLog instead.
1204 * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1205 ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1206 - overlapRLog;
1207 }
1208 assert(0 <= ovLog && ovLog <= 30);
1209 DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1210 DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1211 return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1212 }
1213
1214 static unsigned
1215 ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1216 {
1159 assert(nbWorkers>0);
1217 assert(nbWorkers>0);
1160 { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
1218 { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
1161 size_t const jobMaxSize = jobSizeTarget << 2;
1219 size_t const jobMaxSize = jobSizeTarget << 2;
@@ -1178,7 +1236,7 b' static size_t ZSTDMT_compress_advanced_i'
1178 ZSTD_CCtx_params params)
1236 ZSTD_CCtx_params params)
1179 {
1237 {
1180 ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1238 ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1181 size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1239 size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1182 unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1240 unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1183 size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1241 size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1184 size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
1242 size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
@@ -1289,16 +1347,17 b' static size_t ZSTDMT_compress_advanced_i'
1289 }
1347 }
1290
1348
1291 size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
1349 size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
1292 void* dst, size_t dstCapacity,
1350 void* dst, size_t dstCapacity,
1293 const void* src, size_t srcSize,
1351 const void* src, size_t srcSize,
1294 const ZSTD_CDict* cdict,
1352 const ZSTD_CDict* cdict,
1295 ZSTD_parameters params,
1353 ZSTD_parameters params,
1296 unsigned overlapLog)
1354 int overlapLog)
1297 {
1355 {
1298 ZSTD_CCtx_params cctxParams = mtctx->params;
1356 ZSTD_CCtx_params cctxParams = mtctx->params;
1299 cctxParams.cParams = params.cParams;
1357 cctxParams.cParams = params.cParams;
1300 cctxParams.fParams = params.fParams;
1358 cctxParams.fParams = params.fParams;
1301 cctxParams.overlapSizeLog = overlapLog;
1359 assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
1360 cctxParams.overlapLog = overlapLog;
1302 return ZSTDMT_compress_advanced_internal(mtctx,
1361 return ZSTDMT_compress_advanced_internal(mtctx,
1303 dst, dstCapacity,
1362 dst, dstCapacity,
1304 src, srcSize,
1363 src, srcSize,
@@ -1311,8 +1370,8 b' size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* '
1311 const void* src, size_t srcSize,
1370 const void* src, size_t srcSize,
1312 int compressionLevel)
1371 int compressionLevel)
1313 {
1372 {
1314 U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
1315 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
1373 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
1374 int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
1316 params.fParams.contentSizeFlag = 1;
1375 params.fParams.contentSizeFlag = 1;
1317 return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
1376 return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
1318 }
1377 }
@@ -1339,8 +1398,8 b' size_t ZSTDMT_initCStream_internal('
1339 if (params.nbWorkers != mtctx->params.nbWorkers)
1398 if (params.nbWorkers != mtctx->params.nbWorkers)
1340 CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1399 CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1341
1400
1342 if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1401 if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1343 if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1402 if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1344
1403
1345 mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1404 mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1346 if (mtctx->singleBlockingThread) {
1405 if (mtctx->singleBlockingThread) {
@@ -1375,14 +1434,24 b' size_t ZSTDMT_initCStream_internal('
1375 mtctx->cdict = cdict;
1434 mtctx->cdict = cdict;
1376 }
1435 }
1377
1436
1378 mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1437 mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1379 DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1438 DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1380 mtctx->targetSectionSize = params.jobSize;
1439 mtctx->targetSectionSize = params.jobSize;
1381 if (mtctx->targetSectionSize == 0) {
1440 if (mtctx->targetSectionSize == 0) {
1382 mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1441 mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1383 }
1442 }
1443 if (params.rsyncable) {
1444 /* Aim for the targetsectionSize as the average job size. */
1445 U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1446 U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1447 assert(jobSizeMB >= 1);
1448 DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1449 mtctx->rsync.hash = 0;
1450 mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
1451 mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
1452 }
1384 if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1453 if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1385 DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1454 DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
1386 DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
1455 DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
1387 ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1456 ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1388 {
1457 {
@@ -1818,6 +1887,89 b' static int ZSTDMT_tryGetInputRange(ZSTDM'
1818 return 1;
1887 return 1;
1819 }
1888 }
1820
1889
1890 typedef struct {
1891 size_t toLoad; /* The number of bytes to load from the input. */
1892 int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
1893 } syncPoint_t;
1894
1895 /**
1896 * Searches through the input for a synchronization point. If one is found, we
1897 * will instruct the caller to flush, and return the number of bytes to load.
1898 * Otherwise, we will load as many bytes as possible and instruct the caller
1899 * to continue as normal.
1900 */
1901 static syncPoint_t
1902 findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1903 {
1904 BYTE const* const istart = (BYTE const*)input.src + input.pos;
1905 U64 const primePower = mtctx->rsync.primePower;
1906 U64 const hitMask = mtctx->rsync.hitMask;
1907
1908 syncPoint_t syncPoint;
1909 U64 hash;
1910 BYTE const* prev;
1911 size_t pos;
1912
1913 syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
1914 syncPoint.flush = 0;
1915 if (!mtctx->params.rsyncable)
1916 /* Rsync is disabled. */
1917 return syncPoint;
1918 if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1919 /* Not enough to compute the hash.
1920 * We will miss any synchronization points in this RSYNC_LENGTH byte
1921 * window. However, since it depends only in the internal buffers, if the
1922 * state is already synchronized, we will remain synchronized.
1923 * Additionally, the probability that we miss a synchronization point is
1924 * low: RSYNC_LENGTH / targetSectionSize.
1925 */
1926 return syncPoint;
1927 /* Initialize the loop variables. */
1928 if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1929 /* We have enough bytes buffered to initialize the hash.
1930 * Start scanning at the beginning of the input.
1931 */
1932 pos = 0;
1933 prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1934 hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1935 } else {
1936 /* We don't have enough bytes buffered to initialize the hash, but
1937 * we know we have at least RSYNC_LENGTH bytes total.
1938 * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1939 * already buffered.
1940 */
1941 pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1942 prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1943 hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1944 hash = ZSTD_rollingHash_append(hash, istart, pos);
1945 }
1946 /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1947 * through the input. If we hit a synchronization point, then cut the
1948 * job off, and tell the compressor to flush the job. Otherwise, load
1949 * all the bytes and continue as normal.
1950 * If we go too long without a synchronization point (targetSectionSize)
1951 * then a block will be emitted anyways, but this is okay, since if we
1952 * are already synchronized we will remain synchronized.
1953 */
1954 for (; pos < syncPoint.toLoad; ++pos) {
1955 BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1956 /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1957 hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1958 if ((hash & hitMask) == hitMask) {
1959 syncPoint.toLoad = pos + 1;
1960 syncPoint.flush = 1;
1961 break;
1962 }
1963 }
1964 return syncPoint;
1965 }
1966
1967 size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
1968 {
1969 size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
1970 if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
1971 return hintInSize;
1972 }
1821
1973
1822 /** ZSTDMT_compressStream_generic() :
1974 /** ZSTDMT_compressStream_generic() :
1823 * internal use only - exposed to be invoked from zstd_compress.c
1975 * internal use only - exposed to be invoked from zstd_compress.c
@@ -1844,7 +1996,8 b' size_t ZSTDMT_compressStream_generic(ZST'
1844 }
1996 }
1845
1997
1846 /* single-pass shortcut (note : synchronous-mode) */
1998 /* single-pass shortcut (note : synchronous-mode) */
1847 if ( (mtctx->nextJobID == 0) /* just started */
1999 if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
2000 && (mtctx->nextJobID == 0) /* just started */
1848 && (mtctx->inBuff.filled == 0) /* nothing buffered */
2001 && (mtctx->inBuff.filled == 0) /* nothing buffered */
1849 && (!mtctx->jobReady) /* no job already created */
2002 && (!mtctx->jobReady) /* no job already created */
1850 && (endOp == ZSTD_e_end) /* end order */
2003 && (endOp == ZSTD_e_end) /* end order */
@@ -1876,14 +2029,17 b' size_t ZSTDMT_compressStream_generic(ZST'
1876 DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
2029 DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
1877 }
2030 }
1878 if (mtctx->inBuff.buffer.start != NULL) {
2031 if (mtctx->inBuff.buffer.start != NULL) {
1879 size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
2032 syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
2033 if (syncPoint.flush && endOp == ZSTD_e_continue) {
2034 endOp = ZSTD_e_flush;
2035 }
1880 assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
2036 assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
1881 DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
2037 DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
1882 (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
2038 (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
1883 memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
2039 memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
1884 input->pos += toLoad;
2040 input->pos += syncPoint.toLoad;
1885 mtctx->inBuff.filled += toLoad;
2041 mtctx->inBuff.filled += syncPoint.toLoad;
1886 forwardInputProgress = toLoad>0;
2042 forwardInputProgress = syncPoint.toLoad>0;
1887 }
2043 }
1888 if ((input->pos < input->size) && (endOp == ZSTD_e_end))
2044 if ((input->pos < input->size) && (endOp == ZSTD_e_end))
1889 endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
2045 endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
@@ -28,6 +28,16 b''
28 #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
28 #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
29
29
30
30
31 /* === Constants === */
32 #ifndef ZSTDMT_NBWORKERS_MAX
33 # define ZSTDMT_NBWORKERS_MAX 200
34 #endif
35 #ifndef ZSTDMT_JOBSIZE_MIN
36 # define ZSTDMT_JOBSIZE_MIN (1 MB)
37 #endif
38 #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
39
40
31 /* === Memory management === */
41 /* === Memory management === */
32 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
42 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
33 ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
43 ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
@@ -52,6 +62,7 b' ZSTDLIB_API size_t ZSTDMT_compressCCtx(Z'
52 ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
62 ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
53 ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
63 ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
54
64
65 ZSTDLIB_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
55 ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
66 ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
56
67
57 ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
68 ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
@@ -60,16 +71,12 b' ZSTDLIB_API size_t ZSTDMT_endStream(ZSTD'
60
71
61 /* === Advanced functions and parameters === */
72 /* === Advanced functions and parameters === */
62
73
63 #ifndef ZSTDMT_JOBSIZE_MIN
64 # define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
65 #endif
66
67 ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
74 ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
68 void* dst, size_t dstCapacity,
75 void* dst, size_t dstCapacity,
69 const void* src, size_t srcSize,
76 const void* src, size_t srcSize,
70 const ZSTD_CDict* cdict,
77 const ZSTD_CDict* cdict,
71 ZSTD_parameters params,
78 ZSTD_parameters params,
72 unsigned overlapLog);
79 int overlapLog);
73
80
74 ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
81 ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
75 const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
82 const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
@@ -84,8 +91,9 b' ZSTDLIB_API size_t ZSTDMT_initCStream_us'
84 /* ZSTDMT_parameter :
91 /* ZSTDMT_parameter :
85 * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
92 * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
86 typedef enum {
93 typedef enum {
87 ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
94 ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
88 ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
95 ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
96 ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
89 } ZSTDMT_parameter;
97 } ZSTDMT_parameter;
90
98
91 /* ZSTDMT_setMTCtxParameter() :
99 /* ZSTDMT_setMTCtxParameter() :
@@ -93,12 +101,12 b' typedef enum {'
93 * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
101 * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
94 * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
102 * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
95 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
103 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
96 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
104 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
97
105
98 /* ZSTDMT_getMTCtxParameter() :
106 /* ZSTDMT_getMTCtxParameter() :
99 * Query the ZSTDMT_CCtx for a parameter value.
107 * Query the ZSTDMT_CCtx for a parameter value.
100 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
108 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
101 ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
109 ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
102
110
103
111
104 /*! ZSTDMT_compressStream_generic() :
112 /*! ZSTDMT_compressStream_generic() :
@@ -129,7 +137,7 b' size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt'
129
137
130 /*! ZSTDMT_CCtxParam_setMTCtxParameter()
138 /*! ZSTDMT_CCtxParam_setMTCtxParameter()
131 * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
139 * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
132 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
140 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
133
141
134 /*! ZSTDMT_CCtxParam_setNbWorkers()
142 /*! ZSTDMT_CCtxParam_setNbWorkers()
135 * Set nbWorkers, and clamp it.
143 * Set nbWorkers, and clamp it.
@@ -43,6 +43,19 b''
43 #include "huf.h"
43 #include "huf.h"
44 #include "error_private.h"
44 #include "error_private.h"
45
45
46 /* **************************************************************
47 * Macros
48 ****************************************************************/
49
50 /* These two optional macros force the use one way or another of the two
51 * Huffman decompression implementations. You can't force in both directions
52 * at the same time.
53 */
54 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
55 defined(HUF_FORCE_DECOMPRESS_X2)
56 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
57 #endif
58
46
59
47 /* **************************************************************
60 /* **************************************************************
48 * Error Management
61 * Error Management
@@ -58,6 +71,51 b''
58 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
71 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
59
72
60
73
74 /* **************************************************************
75 * BMI2 Variant Wrappers
76 ****************************************************************/
77 #if DYNAMIC_BMI2
78
79 #define HUF_DGEN(fn) \
80 \
81 static size_t fn##_default( \
82 void* dst, size_t dstSize, \
83 const void* cSrc, size_t cSrcSize, \
84 const HUF_DTable* DTable) \
85 { \
86 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
87 } \
88 \
89 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
90 void* dst, size_t dstSize, \
91 const void* cSrc, size_t cSrcSize, \
92 const HUF_DTable* DTable) \
93 { \
94 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
95 } \
96 \
97 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
98 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
99 { \
100 if (bmi2) { \
101 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
102 } \
103 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
104 }
105
106 #else
107
108 #define HUF_DGEN(fn) \
109 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
110 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
111 { \
112 (void)bmi2; \
113 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
114 }
115
116 #endif
117
118
61 /*-***************************/
119 /*-***************************/
62 /* generic DTableDesc */
120 /* generic DTableDesc */
63 /*-***************************/
121 /*-***************************/
@@ -71,6 +129,8 b' static DTableDesc HUF_getDTableDesc(cons'
71 }
129 }
72
130
73
131
132 #ifndef HUF_FORCE_DECOMPRESS_X2
133
74 /*-***************************/
134 /*-***************************/
75 /* single-symbol decoding */
135 /* single-symbol decoding */
76 /*-***************************/
136 /*-***************************/
@@ -307,46 +367,6 b' typedef size_t (*HUF_decompress_usingDTa'
307 const void *cSrc,
367 const void *cSrc,
308 size_t cSrcSize,
368 size_t cSrcSize,
309 const HUF_DTable *DTable);
369 const HUF_DTable *DTable);
310 #if DYNAMIC_BMI2
311
312 #define HUF_DGEN(fn) \
313 \
314 static size_t fn##_default( \
315 void* dst, size_t dstSize, \
316 const void* cSrc, size_t cSrcSize, \
317 const HUF_DTable* DTable) \
318 { \
319 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
320 } \
321 \
322 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
323 void* dst, size_t dstSize, \
324 const void* cSrc, size_t cSrcSize, \
325 const HUF_DTable* DTable) \
326 { \
327 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
328 } \
329 \
330 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
331 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
332 { \
333 if (bmi2) { \
334 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
335 } \
336 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
337 }
338
339 #else
340
341 #define HUF_DGEN(fn) \
342 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
343 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
344 { \
345 (void)bmi2; \
346 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
347 }
348
349 #endif
350
370
351 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
371 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
352 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
372 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
@@ -437,6 +457,10 b' size_t HUF_decompress4X1 (void* dst, siz'
437 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
457 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
438 }
458 }
439
459
460 #endif /* HUF_FORCE_DECOMPRESS_X2 */
461
462
463 #ifndef HUF_FORCE_DECOMPRESS_X1
440
464
441 /* *************************/
465 /* *************************/
442 /* double-symbols decoding */
466 /* double-symbols decoding */
@@ -911,6 +935,8 b' size_t HUF_decompress4X2 (void* dst, siz'
911 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
935 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912 }
936 }
913
937
938 #endif /* HUF_FORCE_DECOMPRESS_X1 */
939
914
940
915 /* ***********************************/
941 /* ***********************************/
916 /* Universal decompression selectors */
942 /* Universal decompression selectors */
@@ -921,8 +947,18 b' size_t HUF_decompress1X_usingDTable(void'
921 const HUF_DTable* DTable)
947 const HUF_DTable* DTable)
922 {
948 {
923 DTableDesc const dtd = HUF_getDTableDesc(DTable);
949 DTableDesc const dtd = HUF_getDTableDesc(DTable);
950 #if defined(HUF_FORCE_DECOMPRESS_X1)
951 (void)dtd;
952 assert(dtd.tableType == 0);
953 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
954 #elif defined(HUF_FORCE_DECOMPRESS_X2)
955 (void)dtd;
956 assert(dtd.tableType == 1);
957 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
958 #else
924 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
959 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
960 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
961 #endif
926 }
962 }
927
963
928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
964 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -930,11 +966,22 b' size_t HUF_decompress4X_usingDTable(void'
930 const HUF_DTable* DTable)
966 const HUF_DTable* DTable)
931 {
967 {
932 DTableDesc const dtd = HUF_getDTableDesc(DTable);
968 DTableDesc const dtd = HUF_getDTableDesc(DTable);
969 #if defined(HUF_FORCE_DECOMPRESS_X1)
970 (void)dtd;
971 assert(dtd.tableType == 0);
972 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
973 #elif defined(HUF_FORCE_DECOMPRESS_X2)
974 (void)dtd;
975 assert(dtd.tableType == 1);
976 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
977 #else
933 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
978 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
979 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
980 #endif
935 }
981 }
936
982
937
983
984 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
938 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
985 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
939 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
986 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
940 {
987 {
@@ -956,6 +1003,7 b' static const algo_time_t algoTime[16 /* '
956 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1003 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
957 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1004 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
958 };
1005 };
1006 #endif
959
1007
960 /** HUF_selectDecoder() :
1008 /** HUF_selectDecoder() :
961 * Tells which decoder is likely to decode faster,
1009 * Tells which decoder is likely to decode faster,
@@ -966,6 +1014,15 b' U32 HUF_selectDecoder (size_t dstSize, s'
966 {
1014 {
967 assert(dstSize > 0);
1015 assert(dstSize > 0);
968 assert(dstSize <= 128*1024);
1016 assert(dstSize <= 128*1024);
1017 #if defined(HUF_FORCE_DECOMPRESS_X1)
1018 (void)dstSize;
1019 (void)cSrcSize;
1020 return 0;
1021 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1022 (void)dstSize;
1023 (void)cSrcSize;
1024 return 1;
1025 #else
969 /* decoder timing evaluation */
1026 /* decoder timing evaluation */
970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1027 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
971 U32 const D256 = (U32)(dstSize >> 8);
1028 U32 const D256 = (U32)(dstSize >> 8);
@@ -973,14 +1030,18 b' U32 HUF_selectDecoder (size_t dstSize, s'
973 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1030 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
974 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1031 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
975 return DTime1 < DTime0;
1032 return DTime1 < DTime0;
976 } }
1033 }
1034 #endif
1035 }
977
1036
978
1037
979 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1038 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
980
1039
981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1040 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982 {
1041 {
1042 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
983 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1043 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1044 #endif
984
1045
985 /* validation checks */
1046 /* validation checks */
986 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1047 if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -989,7 +1050,17 b' size_t HUF_decompress (void* dst, size_t'
989 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1050 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
990
1051
991 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1052 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1053 #if defined(HUF_FORCE_DECOMPRESS_X1)
1054 (void)algoNb;
1055 assert(algoNb == 0);
1056 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1057 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1058 (void)algoNb;
1059 assert(algoNb == 1);
1060 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1061 #else
992 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1062 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1063 #endif
993 }
1064 }
994 }
1065 }
995
1066
@@ -1002,8 +1073,18 b' size_t HUF_decompress4X_DCtx (HUF_DTable'
1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1073 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1003
1074
1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1075 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1076 #if defined(HUF_FORCE_DECOMPRESS_X1)
1077 (void)algoNb;
1078 assert(algoNb == 0);
1079 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1080 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1081 (void)algoNb;
1082 assert(algoNb == 1);
1083 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1084 #else
1005 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1085 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1086 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1087 #endif
1007 }
1088 }
1008 }
1089 }
1009
1090
@@ -1025,8 +1106,19 b' size_t HUF_decompress4X_hufOnly_wksp(HUF'
1025 if (cSrcSize == 0) return ERROR(corruption_detected);
1106 if (cSrcSize == 0) return ERROR(corruption_detected);
1026
1107
1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1108 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1109 #if defined(HUF_FORCE_DECOMPRESS_X1)
1110 (void)algoNb;
1111 assert(algoNb == 0);
1112 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1113 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1114 (void)algoNb;
1115 assert(algoNb == 1);
1116 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1117 #else
1118 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1119 cSrcSize, workSpace, wkspSize):
1029 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1120 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1121 #endif
1030 }
1122 }
1031 }
1123 }
1032
1124
@@ -1041,10 +1133,22 b' size_t HUF_decompress1X_DCtx_wksp(HUF_DT'
1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1133 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1042
1134
1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1135 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1136 #if defined(HUF_FORCE_DECOMPRESS_X1)
1137 (void)algoNb;
1138 assert(algoNb == 0);
1139 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1140 cSrcSize, workSpace, wkspSize);
1141 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1142 (void)algoNb;
1143 assert(algoNb == 1);
1144 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1145 cSrcSize, workSpace, wkspSize);
1146 #else
1044 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1147 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045 cSrcSize, workSpace, wkspSize):
1148 cSrcSize, workSpace, wkspSize):
1046 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1149 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047 cSrcSize, workSpace, wkspSize);
1150 cSrcSize, workSpace, wkspSize);
1151 #endif
1048 }
1152 }
1049 }
1153 }
1050
1154
@@ -1060,10 +1164,21 b' size_t HUF_decompress1X_DCtx(HUF_DTable*'
1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1164 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061 {
1165 {
1062 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1166 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1167 #if defined(HUF_FORCE_DECOMPRESS_X1)
1168 (void)dtd;
1169 assert(dtd.tableType == 0);
1170 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1171 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1172 (void)dtd;
1173 assert(dtd.tableType == 1);
1174 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1175 #else
1063 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1176 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1177 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178 #endif
1065 }
1179 }
1066
1180
1181 #ifndef HUF_FORCE_DECOMPRESS_X2
1067 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1182 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068 {
1183 {
1069 const BYTE* ip = (const BYTE*) cSrc;
1184 const BYTE* ip = (const BYTE*) cSrc;
@@ -1075,12 +1190,23 b' size_t HUF_decompress1X1_DCtx_wksp_bmi2('
1075
1190
1076 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1191 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077 }
1192 }
1193 #endif
1078
1194
1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1195 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080 {
1196 {
1081 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1197 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1198 #if defined(HUF_FORCE_DECOMPRESS_X1)
1199 (void)dtd;
1200 assert(dtd.tableType == 0);
1201 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1202 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1203 (void)dtd;
1204 assert(dtd.tableType == 1);
1205 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1206 #else
1082 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1207 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1208 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1209 #endif
1084 }
1210 }
1085
1211
1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1212 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
@@ -1090,7 +1216,17 b' size_t HUF_decompress4X_hufOnly_wksp_bmi'
1090 if (cSrcSize == 0) return ERROR(corruption_detected);
1216 if (cSrcSize == 0) return ERROR(corruption_detected);
1091
1217
1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1218 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1219 #if defined(HUF_FORCE_DECOMPRESS_X1)
1220 (void)algoNb;
1221 assert(algoNb == 0);
1222 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1223 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1224 (void)algoNb;
1225 assert(algoNb == 1);
1226 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1227 #else
1093 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1228 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1229 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1230 #endif
1095 }
1231 }
1096 }
1232 }
This diff has been collapsed as it changes many lines, (1924 lines changed) Show them Hide them
@@ -37,12 +37,12 b''
37 * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
37 * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
38 */
38 */
39 #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
39 #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
40 # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
40 # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
41 #endif
41 #endif
42
42
43 /*!
43 /*!
44 * NO_FORWARD_PROGRESS_MAX :
44 * NO_FORWARD_PROGRESS_MAX :
45 * maximum allowed nb of calls to ZSTD_decompressStream() and ZSTD_decompress_generic()
45 * maximum allowed nb of calls to ZSTD_decompressStream()
46 * without any forward progress
46 * without any forward progress
47 * (defined as: no byte read from input, and no byte flushed to output)
47 * (defined as: no byte read from input, and no byte flushed to output)
48 * before triggering an error.
48 * before triggering an error.
@@ -56,128 +56,25 b''
56 * Dependencies
56 * Dependencies
57 *********************************************************/
57 *********************************************************/
58 #include <string.h> /* memcpy, memmove, memset */
58 #include <string.h> /* memcpy, memmove, memset */
59 #include "compiler.h" /* prefetch */
60 #include "cpu.h" /* bmi2 */
59 #include "cpu.h" /* bmi2 */
61 #include "mem.h" /* low level memory routines */
60 #include "mem.h" /* low level memory routines */
62 #define FSE_STATIC_LINKING_ONLY
61 #define FSE_STATIC_LINKING_ONLY
63 #include "fse.h"
62 #include "fse.h"
64 #define HUF_STATIC_LINKING_ONLY
63 #define HUF_STATIC_LINKING_ONLY
65 #include "huf.h"
64 #include "huf.h"
66 #include "zstd_internal.h"
65 #include "zstd_internal.h" /* blockProperties_t */
66 #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
67 #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
68 #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
67
69
68 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
70 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
69 # include "zstd_legacy.h"
71 # include "zstd_legacy.h"
70 #endif
72 #endif
71
73
72 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
73 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
74
75
76 /*-*************************************
77 * Errors
78 ***************************************/
79 #define ZSTD_isError ERR_isError /* for inlining */
80 #define FSE_isError ERR_isError
81 #define HUF_isError ERR_isError
82
83
84 /*_*******************************************************
85 * Memory operations
86 **********************************************************/
87 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
88
89
74
90 /*-*************************************************************
75 /*-*************************************************************
91 * Context management
76 * Context management
92 ***************************************************************/
77 ***************************************************************/
93 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
94 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
95 ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
96 ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
97
98 typedef enum { zdss_init=0, zdss_loadHeader,
99 zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
100
101
102 typedef struct {
103 U32 fastMode;
104 U32 tableLog;
105 } ZSTD_seqSymbol_header;
106
107 typedef struct {
108 U16 nextState;
109 BYTE nbAdditionalBits;
110 BYTE nbBits;
111 U32 baseValue;
112 } ZSTD_seqSymbol;
113
114 #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
115
116 typedef struct {
117 ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
118 ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
119 ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
120 HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
121 U32 rep[ZSTD_REP_NUM];
122 } ZSTD_entropyDTables_t;
123
124 struct ZSTD_DCtx_s
125 {
126 const ZSTD_seqSymbol* LLTptr;
127 const ZSTD_seqSymbol* MLTptr;
128 const ZSTD_seqSymbol* OFTptr;
129 const HUF_DTable* HUFptr;
130 ZSTD_entropyDTables_t entropy;
131 U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
132 const void* previousDstEnd; /* detect continuity */
133 const void* prefixStart; /* start of current segment */
134 const void* virtualStart; /* virtual start of previous segment if it was just before current one */
135 const void* dictEnd; /* end of previous segment */
136 size_t expected;
137 ZSTD_frameHeader fParams;
138 U64 decodedSize;
139 blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
140 ZSTD_dStage stage;
141 U32 litEntropy;
142 U32 fseEntropy;
143 XXH64_state_t xxhState;
144 size_t headerSize;
145 ZSTD_format_e format;
146 const BYTE* litPtr;
147 ZSTD_customMem customMem;
148 size_t litSize;
149 size_t rleSize;
150 size_t staticSize;
151 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
152
153 /* dictionary */
154 ZSTD_DDict* ddictLocal;
155 const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
156 U32 dictID;
157 int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
158
159 /* streaming */
160 ZSTD_dStreamStage streamStage;
161 char* inBuff;
162 size_t inBuffSize;
163 size_t inPos;
164 size_t maxWindowSize;
165 char* outBuff;
166 size_t outBuffSize;
167 size_t outStart;
168 size_t outEnd;
169 size_t lhSize;
170 void* legacyContext;
171 U32 previousLegacyVersion;
172 U32 legacyVersion;
173 U32 hostageByte;
174 int noForwardProgress;
175
176 /* workspace */
177 BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
178 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
179 }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
180
181 size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
78 size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
182 {
79 {
183 if (dctx==NULL) return 0; /* support sizeof NULL */
80 if (dctx==NULL) return 0; /* support sizeof NULL */
@@ -192,8 +89,8 b' size_t ZSTD_estimateDCtxSize(void) { ret'
192 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
89 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
193 {
90 {
194 size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
91 size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
195 ZSTD_frameHeaderSize_prefix - ZSTD_FRAMEIDSIZE :
92 ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
196 ZSTD_frameHeaderSize_prefix;
93 ZSTD_FRAMEHEADERSIZE_PREFIX;
197 ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
94 ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
198 /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
95 /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
199 assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
96 assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
@@ -290,7 +187,7 b' unsigned ZSTD_isFrame(const void* buffer'
290 if (size < ZSTD_FRAMEIDSIZE) return 0;
187 if (size < ZSTD_FRAMEIDSIZE) return 0;
291 { U32 const magic = MEM_readLE32(buffer);
188 { U32 const magic = MEM_readLE32(buffer);
292 if (magic == ZSTD_MAGICNUMBER) return 1;
189 if (magic == ZSTD_MAGICNUMBER) return 1;
293 if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
190 if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
294 }
191 }
295 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
192 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
296 if (ZSTD_isLegacy(buffer, size)) return 1;
193 if (ZSTD_isLegacy(buffer, size)) return 1;
@@ -345,10 +242,10 b' size_t ZSTD_getFrameHeader_advanced(ZSTD'
345
242
346 if ( (format != ZSTD_f_zstd1_magicless)
243 if ( (format != ZSTD_f_zstd1_magicless)
347 && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
244 && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
348 if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
245 if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
349 /* skippable frame */
246 /* skippable frame */
350 if (srcSize < ZSTD_skippableHeaderSize)
247 if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
351 return ZSTD_skippableHeaderSize; /* magic number + frame length */
248 return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
352 memset(zfhPtr, 0, sizeof(*zfhPtr));
249 memset(zfhPtr, 0, sizeof(*zfhPtr));
353 zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
250 zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
354 zfhPtr->frameType = ZSTD_skippableFrame;
251 zfhPtr->frameType = ZSTD_skippableFrame;
@@ -446,6 +343,21 b' unsigned long long ZSTD_getFrameContentS'
446 } }
343 } }
447 }
344 }
448
345
346 static size_t readSkippableFrameSize(void const* src, size_t srcSize)
347 {
348 size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
349 U32 sizeU32;
350
351 if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
352 return ERROR(srcSize_wrong);
353
354 sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
355 if ((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32)
356 return ERROR(frameParameter_unsupported);
357
358 return skippableHeaderSize + sizeU32;
359 }
360
449 /** ZSTD_findDecompressedSize() :
361 /** ZSTD_findDecompressedSize() :
450 * compatible with legacy mode
362 * compatible with legacy mode
451 * `srcSize` must be the exact length of some number of ZSTD compressed and/or
363 * `srcSize` must be the exact length of some number of ZSTD compressed and/or
@@ -455,15 +367,13 b' unsigned long long ZSTD_findDecompressed'
455 {
367 {
456 unsigned long long totalDstSize = 0;
368 unsigned long long totalDstSize = 0;
457
369
458 while (srcSize >= ZSTD_frameHeaderSize_prefix) {
370 while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
459 U32 const magicNumber = MEM_readLE32(src);
371 U32 const magicNumber = MEM_readLE32(src);
460
372
461 if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
373 if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
462 size_t skippableSize;
374 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
463 if (srcSize < ZSTD_skippableHeaderSize)
375 if (ZSTD_isError(skippableSize))
464 return ERROR(srcSize_wrong);
376 return skippableSize;
465 skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_FRAMEIDSIZE)
466 + ZSTD_skippableHeaderSize;
467 if (srcSize < skippableSize) {
377 if (srcSize < skippableSize) {
468 return ZSTD_CONTENTSIZE_ERROR;
378 return ZSTD_CONTENTSIZE_ERROR;
469 }
379 }
@@ -496,9 +406,9 b' unsigned long long ZSTD_findDecompressed'
496 }
406 }
497
407
498 /** ZSTD_getDecompressedSize() :
408 /** ZSTD_getDecompressedSize() :
499 * compatible with legacy mode
409 * compatible with legacy mode
500 * @return : decompressed size if known, 0 otherwise
410 * @return : decompressed size if known, 0 otherwise
501 note : 0 can mean any of the following :
411 note : 0 can mean any of the following :
502 - frame content is empty
412 - frame content is empty
503 - decompressed size field is not present in frame header
413 - decompressed size field is not present in frame header
504 - frame header unknown / not supported
414 - frame header unknown / not supported
@@ -512,8 +422,8 b' unsigned long long ZSTD_getDecompressedS'
512
422
513
423
514 /** ZSTD_decodeFrameHeader() :
424 /** ZSTD_decodeFrameHeader() :
515 * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
425 * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
516 * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
426 * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
517 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
427 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
518 {
428 {
519 size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
429 size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
@@ -526,1275 +436,6 b' static size_t ZSTD_decodeFrameHeader(ZST'
526 }
436 }
527
437
528
438
529 /*-*************************************************************
530 * Block decoding
531 ***************************************************************/
532
533 /*! ZSTD_getcBlockSize() :
534 * Provides the size of compressed block from block header `src` */
535 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
536 blockProperties_t* bpPtr)
537 {
538 if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
539 { U32 const cBlockHeader = MEM_readLE24(src);
540 U32 const cSize = cBlockHeader >> 3;
541 bpPtr->lastBlock = cBlockHeader & 1;
542 bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
543 bpPtr->origSize = cSize; /* only useful for RLE */
544 if (bpPtr->blockType == bt_rle) return 1;
545 if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
546 return cSize;
547 }
548 }
549
550
551 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
552 const void* src, size_t srcSize)
553 {
554 if (dst==NULL) return ERROR(dstSize_tooSmall);
555 if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
556 memcpy(dst, src, srcSize);
557 return srcSize;
558 }
559
560
561 static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
562 const void* src, size_t srcSize,
563 size_t regenSize)
564 {
565 if (srcSize != 1) return ERROR(srcSize_wrong);
566 if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
567 memset(dst, *(const BYTE*)src, regenSize);
568 return regenSize;
569 }
570
571 /* Hidden declaration for fullbench */
572 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
573 const void* src, size_t srcSize);
574 /*! ZSTD_decodeLiteralsBlock() :
575 * @return : nb of bytes read from src (< srcSize )
576 * note : symbol not declared but exposed for fullbench */
577 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
578 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
579 {
580 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
581
582 { const BYTE* const istart = (const BYTE*) src;
583 symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
584
585 switch(litEncType)
586 {
587 case set_repeat:
588 if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
589 /* fall-through */
590
591 case set_compressed:
592 if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
593 { size_t lhSize, litSize, litCSize;
594 U32 singleStream=0;
595 U32 const lhlCode = (istart[0] >> 2) & 3;
596 U32 const lhc = MEM_readLE32(istart);
597 switch(lhlCode)
598 {
599 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
600 /* 2 - 2 - 10 - 10 */
601 singleStream = !lhlCode;
602 lhSize = 3;
603 litSize = (lhc >> 4) & 0x3FF;
604 litCSize = (lhc >> 14) & 0x3FF;
605 break;
606 case 2:
607 /* 2 - 2 - 14 - 14 */
608 lhSize = 4;
609 litSize = (lhc >> 4) & 0x3FFF;
610 litCSize = lhc >> 18;
611 break;
612 case 3:
613 /* 2 - 2 - 18 - 18 */
614 lhSize = 5;
615 litSize = (lhc >> 4) & 0x3FFFF;
616 litCSize = (lhc >> 22) + (istart[4] << 10);
617 break;
618 }
619 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
620 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
621
622 /* prefetch huffman table if cold */
623 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
624 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
625 }
626
627 if (HUF_isError((litEncType==set_repeat) ?
628 ( singleStream ?
629 HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
630 HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
631 ( singleStream ?
632 HUF_decompress1X1_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
633 dctx->workspace, sizeof(dctx->workspace), dctx->bmi2) :
634 HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
635 dctx->workspace, sizeof(dctx->workspace), dctx->bmi2))))
636 return ERROR(corruption_detected);
637
638 dctx->litPtr = dctx->litBuffer;
639 dctx->litSize = litSize;
640 dctx->litEntropy = 1;
641 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
642 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
643 return litCSize + lhSize;
644 }
645
646 case set_basic:
647 { size_t litSize, lhSize;
648 U32 const lhlCode = ((istart[0]) >> 2) & 3;
649 switch(lhlCode)
650 {
651 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
652 lhSize = 1;
653 litSize = istart[0] >> 3;
654 break;
655 case 1:
656 lhSize = 2;
657 litSize = MEM_readLE16(istart) >> 4;
658 break;
659 case 3:
660 lhSize = 3;
661 litSize = MEM_readLE24(istart) >> 4;
662 break;
663 }
664
665 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
666 if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
667 memcpy(dctx->litBuffer, istart+lhSize, litSize);
668 dctx->litPtr = dctx->litBuffer;
669 dctx->litSize = litSize;
670 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
671 return lhSize+litSize;
672 }
673 /* direct reference into compressed stream */
674 dctx->litPtr = istart+lhSize;
675 dctx->litSize = litSize;
676 return lhSize+litSize;
677 }
678
679 case set_rle:
680 { U32 const lhlCode = ((istart[0]) >> 2) & 3;
681 size_t litSize, lhSize;
682 switch(lhlCode)
683 {
684 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
685 lhSize = 1;
686 litSize = istart[0] >> 3;
687 break;
688 case 1:
689 lhSize = 2;
690 litSize = MEM_readLE16(istart) >> 4;
691 break;
692 case 3:
693 lhSize = 3;
694 litSize = MEM_readLE24(istart) >> 4;
695 if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
696 break;
697 }
698 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
699 memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
700 dctx->litPtr = dctx->litBuffer;
701 dctx->litSize = litSize;
702 return lhSize+1;
703 }
704 default:
705 return ERROR(corruption_detected); /* impossible */
706 }
707 }
708 }
709
710 /* Default FSE distribution tables.
711 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
712 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
713 * They were generated programmatically with following method :
714 * - start from default distributions, present in /lib/common/zstd_internal.h
715 * - generate tables normally, using ZSTD_buildFSETable()
716 * - printout the content of tables
717 * - pretify output, report below, test with fuzzer to ensure it's correct */
718
719 /* Default FSE distribution table for Literal Lengths */
720 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
721 { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
722 /* nextState, nbAddBits, nbBits, baseVal */
723 { 0, 0, 4, 0}, { 16, 0, 4, 0},
724 { 32, 0, 5, 1}, { 0, 0, 5, 3},
725 { 0, 0, 5, 4}, { 0, 0, 5, 6},
726 { 0, 0, 5, 7}, { 0, 0, 5, 9},
727 { 0, 0, 5, 10}, { 0, 0, 5, 12},
728 { 0, 0, 6, 14}, { 0, 1, 5, 16},
729 { 0, 1, 5, 20}, { 0, 1, 5, 22},
730 { 0, 2, 5, 28}, { 0, 3, 5, 32},
731 { 0, 4, 5, 48}, { 32, 6, 5, 64},
732 { 0, 7, 5, 128}, { 0, 8, 6, 256},
733 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
734 { 32, 0, 4, 0}, { 0, 0, 4, 1},
735 { 0, 0, 5, 2}, { 32, 0, 5, 4},
736 { 0, 0, 5, 5}, { 32, 0, 5, 7},
737 { 0, 0, 5, 8}, { 32, 0, 5, 10},
738 { 0, 0, 5, 11}, { 0, 0, 6, 13},
739 { 32, 1, 5, 16}, { 0, 1, 5, 18},
740 { 32, 1, 5, 22}, { 0, 2, 5, 24},
741 { 32, 3, 5, 32}, { 0, 3, 5, 40},
742 { 0, 6, 4, 64}, { 16, 6, 4, 64},
743 { 32, 7, 5, 128}, { 0, 9, 6, 512},
744 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
745 { 16, 0, 4, 1}, { 32, 0, 5, 2},
746 { 32, 0, 5, 3}, { 32, 0, 5, 5},
747 { 32, 0, 5, 6}, { 32, 0, 5, 8},
748 { 32, 0, 5, 9}, { 32, 0, 5, 11},
749 { 32, 0, 5, 12}, { 0, 0, 6, 15},
750 { 32, 1, 5, 18}, { 32, 1, 5, 20},
751 { 32, 2, 5, 24}, { 32, 2, 5, 28},
752 { 32, 3, 5, 40}, { 32, 4, 5, 48},
753 { 0, 16, 6,65536}, { 0, 15, 6,32768},
754 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
755 }; /* LL_defaultDTable */
756
757 /* Default FSE distribution table for Offset Codes */
758 static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
759 { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
760 /* nextState, nbAddBits, nbBits, baseVal */
761 { 0, 0, 5, 0}, { 0, 6, 4, 61},
762 { 0, 9, 5, 509}, { 0, 15, 5,32765},
763 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
764 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
765 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
766 { 0, 5, 5, 29}, { 0, 8, 4, 253},
767 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
768 { 0, 2, 5, 1}, { 16, 7, 4, 125},
769 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
770 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
771 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
772 { 0, 19, 5,524285}, { 0, 1, 5, 1},
773 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
774 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
775 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
776 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
777 }; /* OF_defaultDTable */
778
779
780 /* Default FSE distribution table for Match Lengths */
781 static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
782 { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
783 /* nextState, nbAddBits, nbBits, baseVal */
784 { 0, 0, 6, 3}, { 0, 0, 4, 4},
785 { 32, 0, 5, 5}, { 0, 0, 5, 6},
786 { 0, 0, 5, 8}, { 0, 0, 5, 9},
787 { 0, 0, 5, 11}, { 0, 0, 6, 13},
788 { 0, 0, 6, 16}, { 0, 0, 6, 19},
789 { 0, 0, 6, 22}, { 0, 0, 6, 25},
790 { 0, 0, 6, 28}, { 0, 0, 6, 31},
791 { 0, 0, 6, 34}, { 0, 1, 6, 37},
792 { 0, 1, 6, 41}, { 0, 2, 6, 47},
793 { 0, 3, 6, 59}, { 0, 4, 6, 83},
794 { 0, 7, 6, 131}, { 0, 9, 6, 515},
795 { 16, 0, 4, 4}, { 0, 0, 4, 5},
796 { 32, 0, 5, 6}, { 0, 0, 5, 7},
797 { 32, 0, 5, 9}, { 0, 0, 5, 10},
798 { 0, 0, 6, 12}, { 0, 0, 6, 15},
799 { 0, 0, 6, 18}, { 0, 0, 6, 21},
800 { 0, 0, 6, 24}, { 0, 0, 6, 27},
801 { 0, 0, 6, 30}, { 0, 0, 6, 33},
802 { 0, 1, 6, 35}, { 0, 1, 6, 39},
803 { 0, 2, 6, 43}, { 0, 3, 6, 51},
804 { 0, 4, 6, 67}, { 0, 5, 6, 99},
805 { 0, 8, 6, 259}, { 32, 0, 4, 4},
806 { 48, 0, 4, 4}, { 16, 0, 4, 5},
807 { 32, 0, 5, 7}, { 32, 0, 5, 8},
808 { 32, 0, 5, 10}, { 32, 0, 5, 11},
809 { 0, 0, 6, 14}, { 0, 0, 6, 17},
810 { 0, 0, 6, 20}, { 0, 0, 6, 23},
811 { 0, 0, 6, 26}, { 0, 0, 6, 29},
812 { 0, 0, 6, 32}, { 0, 16, 6,65539},
813 { 0, 15, 6,32771}, { 0, 14, 6,16387},
814 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
815 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
816 }; /* ML_defaultDTable */
817
818
819 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
820 {
821 void* ptr = dt;
822 ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
823 ZSTD_seqSymbol* const cell = dt + 1;
824
825 DTableH->tableLog = 0;
826 DTableH->fastMode = 0;
827
828 cell->nbBits = 0;
829 cell->nextState = 0;
830 assert(nbAddBits < 255);
831 cell->nbAdditionalBits = (BYTE)nbAddBits;
832 cell->baseValue = baseValue;
833 }
834
835
836 /* ZSTD_buildFSETable() :
837 * generate FSE decoding table for one symbol (ll, ml or off) */
838 static void
839 ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
840 const short* normalizedCounter, unsigned maxSymbolValue,
841 const U32* baseValue, const U32* nbAdditionalBits,
842 unsigned tableLog)
843 {
844 ZSTD_seqSymbol* const tableDecode = dt+1;
845 U16 symbolNext[MaxSeq+1];
846
847 U32 const maxSV1 = maxSymbolValue + 1;
848 U32 const tableSize = 1 << tableLog;
849 U32 highThreshold = tableSize-1;
850
851 /* Sanity Checks */
852 assert(maxSymbolValue <= MaxSeq);
853 assert(tableLog <= MaxFSELog);
854
855 /* Init, lay down lowprob symbols */
856 { ZSTD_seqSymbol_header DTableH;
857 DTableH.tableLog = tableLog;
858 DTableH.fastMode = 1;
859 { S16 const largeLimit= (S16)(1 << (tableLog-1));
860 U32 s;
861 for (s=0; s<maxSV1; s++) {
862 if (normalizedCounter[s]==-1) {
863 tableDecode[highThreshold--].baseValue = s;
864 symbolNext[s] = 1;
865 } else {
866 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
867 symbolNext[s] = normalizedCounter[s];
868 } } }
869 memcpy(dt, &DTableH, sizeof(DTableH));
870 }
871
872 /* Spread symbols */
873 { U32 const tableMask = tableSize-1;
874 U32 const step = FSE_TABLESTEP(tableSize);
875 U32 s, position = 0;
876 for (s=0; s<maxSV1; s++) {
877 int i;
878 for (i=0; i<normalizedCounter[s]; i++) {
879 tableDecode[position].baseValue = s;
880 position = (position + step) & tableMask;
881 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
882 } }
883 assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
884 }
885
886 /* Build Decoding table */
887 { U32 u;
888 for (u=0; u<tableSize; u++) {
889 U32 const symbol = tableDecode[u].baseValue;
890 U32 const nextState = symbolNext[symbol]++;
891 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
892 tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
893 assert(nbAdditionalBits[symbol] < 255);
894 tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
895 tableDecode[u].baseValue = baseValue[symbol];
896 } }
897 }
898
899
900 /*! ZSTD_buildSeqTable() :
901 * @return : nb bytes read from src,
902 * or an error code if it fails */
903 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
904 symbolEncodingType_e type, U32 max, U32 maxLog,
905 const void* src, size_t srcSize,
906 const U32* baseValue, const U32* nbAdditionalBits,
907 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
908 int ddictIsCold, int nbSeq)
909 {
910 switch(type)
911 {
912 case set_rle :
913 if (!srcSize) return ERROR(srcSize_wrong);
914 if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
915 { U32 const symbol = *(const BYTE*)src;
916 U32 const baseline = baseValue[symbol];
917 U32 const nbBits = nbAdditionalBits[symbol];
918 ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
919 }
920 *DTablePtr = DTableSpace;
921 return 1;
922 case set_basic :
923 *DTablePtr = defaultTable;
924 return 0;
925 case set_repeat:
926 if (!flagRepeatTable) return ERROR(corruption_detected);
927 /* prefetch FSE table if used */
928 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
929 const void* const pStart = *DTablePtr;
930 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
931 PREFETCH_AREA(pStart, pSize);
932 }
933 return 0;
934 case set_compressed :
935 { U32 tableLog;
936 S16 norm[MaxSeq+1];
937 size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
938 if (FSE_isError(headerSize)) return ERROR(corruption_detected);
939 if (tableLog > maxLog) return ERROR(corruption_detected);
940 ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
941 *DTablePtr = DTableSpace;
942 return headerSize;
943 }
944 default : /* impossible */
945 assert(0);
946 return ERROR(GENERIC);
947 }
948 }
949
950 static const U32 LL_base[MaxLL+1] = {
951 0, 1, 2, 3, 4, 5, 6, 7,
952 8, 9, 10, 11, 12, 13, 14, 15,
953 16, 18, 20, 22, 24, 28, 32, 40,
954 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
955 0x2000, 0x4000, 0x8000, 0x10000 };
956
957 static const U32 OF_base[MaxOff+1] = {
958 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
959 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
960 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
961 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
962
963 static const U32 OF_bits[MaxOff+1] = {
964 0, 1, 2, 3, 4, 5, 6, 7,
965 8, 9, 10, 11, 12, 13, 14, 15,
966 16, 17, 18, 19, 20, 21, 22, 23,
967 24, 25, 26, 27, 28, 29, 30, 31 };
968
969 static const U32 ML_base[MaxML+1] = {
970 3, 4, 5, 6, 7, 8, 9, 10,
971 11, 12, 13, 14, 15, 16, 17, 18,
972 19, 20, 21, 22, 23, 24, 25, 26,
973 27, 28, 29, 30, 31, 32, 33, 34,
974 35, 37, 39, 41, 43, 47, 51, 59,
975 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
976 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
977
978 /* Hidden delcaration for fullbench */
979 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
980 const void* src, size_t srcSize);
981
982 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
983 const void* src, size_t srcSize)
984 {
985 const BYTE* const istart = (const BYTE* const)src;
986 const BYTE* const iend = istart + srcSize;
987 const BYTE* ip = istart;
988 int nbSeq;
989 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
990
991 /* check */
992 if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
993
994 /* SeqHead */
995 nbSeq = *ip++;
996 if (!nbSeq) { *nbSeqPtr=0; return 1; }
997 if (nbSeq > 0x7F) {
998 if (nbSeq == 0xFF) {
999 if (ip+2 > iend) return ERROR(srcSize_wrong);
1000 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
1001 } else {
1002 if (ip >= iend) return ERROR(srcSize_wrong);
1003 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
1004 }
1005 }
1006 *nbSeqPtr = nbSeq;
1007
1008 /* FSE table descriptors */
1009 if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
1010 { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
1011 symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
1012 symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
1013 ip++;
1014
1015 /* Build DTables */
1016 { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
1017 LLtype, MaxLL, LLFSELog,
1018 ip, iend-ip,
1019 LL_base, LL_bits,
1020 LL_defaultDTable, dctx->fseEntropy,
1021 dctx->ddictIsCold, nbSeq);
1022 if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
1023 ip += llhSize;
1024 }
1025
1026 { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
1027 OFtype, MaxOff, OffFSELog,
1028 ip, iend-ip,
1029 OF_base, OF_bits,
1030 OF_defaultDTable, dctx->fseEntropy,
1031 dctx->ddictIsCold, nbSeq);
1032 if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
1033 ip += ofhSize;
1034 }
1035
1036 { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
1037 MLtype, MaxML, MLFSELog,
1038 ip, iend-ip,
1039 ML_base, ML_bits,
1040 ML_defaultDTable, dctx->fseEntropy,
1041 dctx->ddictIsCold, nbSeq);
1042 if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
1043 ip += mlhSize;
1044 }
1045 }
1046
1047 /* prefetch dictionary content */
1048 if (dctx->ddictIsCold) {
1049 size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart;
1050 size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ );
1051 size_t const pSize = MIN(psmin, 128 KB /* protection */ );
1052 const void* const pStart = (const char*)dctx->dictEnd - pSize;
1053 PREFETCH_AREA(pStart, pSize);
1054 dctx->ddictIsCold = 0;
1055 }
1056
1057 return ip-istart;
1058 }
1059
1060
1061 typedef struct {
1062 size_t litLength;
1063 size_t matchLength;
1064 size_t offset;
1065 const BYTE* match;
1066 } seq_t;
1067
1068 typedef struct {
1069 size_t state;
1070 const ZSTD_seqSymbol* table;
1071 } ZSTD_fseState;
1072
1073 typedef struct {
1074 BIT_DStream_t DStream;
1075 ZSTD_fseState stateLL;
1076 ZSTD_fseState stateOffb;
1077 ZSTD_fseState stateML;
1078 size_t prevOffset[ZSTD_REP_NUM];
1079 const BYTE* prefixStart;
1080 const BYTE* dictEnd;
1081 size_t pos;
1082 } seqState_t;
1083
1084
1085 FORCE_NOINLINE
1086 size_t ZSTD_execSequenceLast7(BYTE* op,
1087 BYTE* const oend, seq_t sequence,
1088 const BYTE** litPtr, const BYTE* const litLimit,
1089 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
1090 {
1091 BYTE* const oLitEnd = op + sequence.litLength;
1092 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1093 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1094 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
1095 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1096 const BYTE* match = oLitEnd - sequence.offset;
1097
1098 /* check */
1099 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1100 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1101 if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */
1102
1103 /* copy literals */
1104 if (op < oend_w) {
1105 ZSTD_wildcopy(op, *litPtr, oend_w - op);
1106 *litPtr += oend_w - op;
1107 op = oend_w;
1108 }
1109 while (op < oLitEnd) *op++ = *(*litPtr)++;
1110
1111 /* copy Match */
1112 if (sequence.offset > (size_t)(oLitEnd - base)) {
1113 /* offset beyond prefix */
1114 if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
1115 match = dictEnd - (base-match);
1116 if (match + sequence.matchLength <= dictEnd) {
1117 memmove(oLitEnd, match, sequence.matchLength);
1118 return sequenceLength;
1119 }
1120 /* span extDict & currentPrefixSegment */
1121 { size_t const length1 = dictEnd - match;
1122 memmove(oLitEnd, match, length1);
1123 op = oLitEnd + length1;
1124 sequence.matchLength -= length1;
1125 match = base;
1126 } }
1127 while (op < oMatchEnd) *op++ = *match++;
1128 return sequenceLength;
1129 }
1130
1131
1132 HINT_INLINE
1133 size_t ZSTD_execSequence(BYTE* op,
1134 BYTE* const oend, seq_t sequence,
1135 const BYTE** litPtr, const BYTE* const litLimit,
1136 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1137 {
1138 BYTE* const oLitEnd = op + sequence.litLength;
1139 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1140 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1141 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
1142 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1143 const BYTE* match = oLitEnd - sequence.offset;
1144
1145 /* check */
1146 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1147 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1148 if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1149
1150 /* copy Literals */
1151 ZSTD_copy8(op, *litPtr);
1152 if (sequence.litLength > 8)
1153 ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
1154 op = oLitEnd;
1155 *litPtr = iLitEnd; /* update for next sequence */
1156
1157 /* copy Match */
1158 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1159 /* offset beyond prefix -> go into extDict */
1160 if (sequence.offset > (size_t)(oLitEnd - virtualStart))
1161 return ERROR(corruption_detected);
1162 match = dictEnd + (match - prefixStart);
1163 if (match + sequence.matchLength <= dictEnd) {
1164 memmove(oLitEnd, match, sequence.matchLength);
1165 return sequenceLength;
1166 }
1167 /* span extDict & currentPrefixSegment */
1168 { size_t const length1 = dictEnd - match;
1169 memmove(oLitEnd, match, length1);
1170 op = oLitEnd + length1;
1171 sequence.matchLength -= length1;
1172 match = prefixStart;
1173 if (op > oend_w || sequence.matchLength < MINMATCH) {
1174 U32 i;
1175 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
1176 return sequenceLength;
1177 }
1178 } }
1179 /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
1180
1181 /* match within prefix */
1182 if (sequence.offset < 8) {
1183 /* close range match, overlap */
1184 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
1185 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
1186 int const sub2 = dec64table[sequence.offset];
1187 op[0] = match[0];
1188 op[1] = match[1];
1189 op[2] = match[2];
1190 op[3] = match[3];
1191 match += dec32table[sequence.offset];
1192 ZSTD_copy4(op+4, match);
1193 match -= sub2;
1194 } else {
1195 ZSTD_copy8(op, match);
1196 }
1197 op += 8; match += 8;
1198
1199 if (oMatchEnd > oend-(16-MINMATCH)) {
1200 if (op < oend_w) {
1201 ZSTD_wildcopy(op, match, oend_w - op);
1202 match += oend_w - op;
1203 op = oend_w;
1204 }
1205 while (op < oMatchEnd) *op++ = *match++;
1206 } else {
1207 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1208 }
1209 return sequenceLength;
1210 }
1211
1212
1213 HINT_INLINE
1214 size_t ZSTD_execSequenceLong(BYTE* op,
1215 BYTE* const oend, seq_t sequence,
1216 const BYTE** litPtr, const BYTE* const litLimit,
1217 const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
1218 {
1219 BYTE* const oLitEnd = op + sequence.litLength;
1220 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1221 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1222 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
1223 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1224 const BYTE* match = sequence.match;
1225
1226 /* check */
1227 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1228 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1229 if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
1230
1231 /* copy Literals */
1232 ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
1233 if (sequence.litLength > 8)
1234 ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
1235 op = oLitEnd;
1236 *litPtr = iLitEnd; /* update for next sequence */
1237
1238 /* copy Match */
1239 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1240 /* offset beyond prefix */
1241 if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
1242 if (match + sequence.matchLength <= dictEnd) {
1243 memmove(oLitEnd, match, sequence.matchLength);
1244 return sequenceLength;
1245 }
1246 /* span extDict & currentPrefixSegment */
1247 { size_t const length1 = dictEnd - match;
1248 memmove(oLitEnd, match, length1);
1249 op = oLitEnd + length1;
1250 sequence.matchLength -= length1;
1251 match = prefixStart;
1252 if (op > oend_w || sequence.matchLength < MINMATCH) {
1253 U32 i;
1254 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
1255 return sequenceLength;
1256 }
1257 } }
1258 assert(op <= oend_w);
1259 assert(sequence.matchLength >= MINMATCH);
1260
1261 /* match within prefix */
1262 if (sequence.offset < 8) {
1263 /* close range match, overlap */
1264 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
1265 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
1266 int const sub2 = dec64table[sequence.offset];
1267 op[0] = match[0];
1268 op[1] = match[1];
1269 op[2] = match[2];
1270 op[3] = match[3];
1271 match += dec32table[sequence.offset];
1272 ZSTD_copy4(op+4, match);
1273 match -= sub2;
1274 } else {
1275 ZSTD_copy8(op, match);
1276 }
1277 op += 8; match += 8;
1278
1279 if (oMatchEnd > oend-(16-MINMATCH)) {
1280 if (op < oend_w) {
1281 ZSTD_wildcopy(op, match, oend_w - op);
1282 match += oend_w - op;
1283 op = oend_w;
1284 }
1285 while (op < oMatchEnd) *op++ = *match++;
1286 } else {
1287 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1288 }
1289 return sequenceLength;
1290 }
1291
1292 static void
1293 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1294 {
1295 const void* ptr = dt;
1296 const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1297 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1298 DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1299 (U32)DStatePtr->state, DTableH->tableLog);
1300 BIT_reloadDStream(bitD);
1301 DStatePtr->table = dt + 1;
1302 }
1303
1304 FORCE_INLINE_TEMPLATE void
1305 ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
1306 {
1307 ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
1308 U32 const nbBits = DInfo.nbBits;
1309 size_t const lowBits = BIT_readBits(bitD, nbBits);
1310 DStatePtr->state = DInfo.nextState + lowBits;
1311 }
1312
1313 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1314 * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1315 * bits before reloading. This value is the maximum number of bytes we read
1316 * after reloading when we are decoding long offets.
1317 */
1318 #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1319 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1320 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1321 : 0)
1322
1323 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1324
1325 FORCE_INLINE_TEMPLATE seq_t
1326 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1327 {
1328 seq_t seq;
1329 U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
1330 U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
1331 U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
1332 U32 const totalBits = llBits+mlBits+ofBits;
1333 U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
1334 U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
1335 U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
1336
1337 /* sequence */
1338 { size_t offset;
1339 if (!ofBits)
1340 offset = 0;
1341 else {
1342 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1343 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1344 assert(ofBits <= MaxOff);
1345 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1346 U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1347 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1348 BIT_reloadDStream(&seqState->DStream);
1349 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1350 assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
1351 } else {
1352 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1353 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1354 }
1355 }
1356
1357 if (ofBits <= 1) {
1358 offset += (llBase==0);
1359 if (offset) {
1360 size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1361 temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1362 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1363 seqState->prevOffset[1] = seqState->prevOffset[0];
1364 seqState->prevOffset[0] = offset = temp;
1365 } else { /* offset == 0 */
1366 offset = seqState->prevOffset[0];
1367 }
1368 } else {
1369 seqState->prevOffset[2] = seqState->prevOffset[1];
1370 seqState->prevOffset[1] = seqState->prevOffset[0];
1371 seqState->prevOffset[0] = offset;
1372 }
1373 seq.offset = offset;
1374 }
1375
1376 seq.matchLength = mlBase
1377 + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
1378 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1379 BIT_reloadDStream(&seqState->DStream);
1380 if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1381 BIT_reloadDStream(&seqState->DStream);
1382 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1383 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1384
1385 seq.litLength = llBase
1386 + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
1387 if (MEM_32bits())
1388 BIT_reloadDStream(&seqState->DStream);
1389
1390 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1391 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1392
1393 /* ANS state update */
1394 ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1395 ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1396 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1397 ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1398
1399 return seq;
1400 }
1401
1402 FORCE_INLINE_TEMPLATE size_t
1403 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1404 void* dst, size_t maxDstSize,
1405 const void* seqStart, size_t seqSize, int nbSeq,
1406 const ZSTD_longOffset_e isLongOffset)
1407 {
1408 const BYTE* ip = (const BYTE*)seqStart;
1409 const BYTE* const iend = ip + seqSize;
1410 BYTE* const ostart = (BYTE* const)dst;
1411 BYTE* const oend = ostart + maxDstSize;
1412 BYTE* op = ostart;
1413 const BYTE* litPtr = dctx->litPtr;
1414 const BYTE* const litEnd = litPtr + dctx->litSize;
1415 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1416 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1417 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1418 DEBUGLOG(5, "ZSTD_decompressSequences_body");
1419
1420 /* Regen sequences */
1421 if (nbSeq) {
1422 seqState_t seqState;
1423 dctx->fseEntropy = 1;
1424 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1425 CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1426 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1427 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1428 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1429
1430 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
1431 nbSeq--;
1432 { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1433 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1434 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1435 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1436 op += oneSeqSize;
1437 } }
1438
1439 /* check if reached exact end */
1440 DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1441 if (nbSeq) return ERROR(corruption_detected);
1442 /* save reps for next block */
1443 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1444 }
1445
1446 /* last literal segment */
1447 { size_t const lastLLSize = litEnd - litPtr;
1448 if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
1449 memcpy(op, litPtr, lastLLSize);
1450 op += lastLLSize;
1451 }
1452
1453 return op-ostart;
1454 }
1455
1456 static size_t
1457 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1458 void* dst, size_t maxDstSize,
1459 const void* seqStart, size_t seqSize, int nbSeq,
1460 const ZSTD_longOffset_e isLongOffset)
1461 {
1462 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1463 }
1464
1465
1466
1467 FORCE_INLINE_TEMPLATE seq_t
1468 ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
1469 {
1470 seq_t seq;
1471 U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
1472 U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
1473 U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
1474 U32 const totalBits = llBits+mlBits+ofBits;
1475 U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
1476 U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
1477 U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
1478
1479 /* sequence */
1480 { size_t offset;
1481 if (!ofBits)
1482 offset = 0;
1483 else {
1484 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1485 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1486 assert(ofBits <= MaxOff);
1487 if (MEM_32bits() && longOffsets) {
1488 U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
1489 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1490 if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
1491 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1492 } else {
1493 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1494 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1495 }
1496 }
1497
1498 if (ofBits <= 1) {
1499 offset += (llBase==0);
1500 if (offset) {
1501 size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1502 temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1503 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1504 seqState->prevOffset[1] = seqState->prevOffset[0];
1505 seqState->prevOffset[0] = offset = temp;
1506 } else {
1507 offset = seqState->prevOffset[0];
1508 }
1509 } else {
1510 seqState->prevOffset[2] = seqState->prevOffset[1];
1511 seqState->prevOffset[1] = seqState->prevOffset[0];
1512 seqState->prevOffset[0] = offset;
1513 }
1514 seq.offset = offset;
1515 }
1516
1517 seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1518 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1519 BIT_reloadDStream(&seqState->DStream);
1520 if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1521 BIT_reloadDStream(&seqState->DStream);
1522 /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1523 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1524
1525 seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1526 if (MEM_32bits())
1527 BIT_reloadDStream(&seqState->DStream);
1528
1529 { size_t const pos = seqState->pos + seq.litLength;
1530 const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
1531 seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1532 * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1533 seqState->pos = pos + seq.matchLength;
1534 }
1535
1536 /* ANS state update */
1537 ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1538 ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1539 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1540 ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1541
1542 return seq;
1543 }
1544
1545 FORCE_INLINE_TEMPLATE size_t
1546 ZSTD_decompressSequencesLong_body(
1547 ZSTD_DCtx* dctx,
1548 void* dst, size_t maxDstSize,
1549 const void* seqStart, size_t seqSize, int nbSeq,
1550 const ZSTD_longOffset_e isLongOffset)
1551 {
1552 const BYTE* ip = (const BYTE*)seqStart;
1553 const BYTE* const iend = ip + seqSize;
1554 BYTE* const ostart = (BYTE* const)dst;
1555 BYTE* const oend = ostart + maxDstSize;
1556 BYTE* op = ostart;
1557 const BYTE* litPtr = dctx->litPtr;
1558 const BYTE* const litEnd = litPtr + dctx->litSize;
1559 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1560 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1561 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1562
1563 /* Regen sequences */
1564 if (nbSeq) {
1565 #define STORED_SEQS 4
1566 #define STOSEQ_MASK (STORED_SEQS-1)
1567 #define ADVANCED_SEQS 4
1568 seq_t sequences[STORED_SEQS];
1569 int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1570 seqState_t seqState;
1571 int seqNb;
1572 dctx->fseEntropy = 1;
1573 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1574 seqState.prefixStart = prefixStart;
1575 seqState.pos = (size_t)(op-prefixStart);
1576 seqState.dictEnd = dictEnd;
1577 CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1578 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1579 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1580 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1581
1582 /* prepare in advance */
1583 for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1584 sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1585 }
1586 if (seqNb<seqAdvance) return ERROR(corruption_detected);
1587
1588 /* decode and decompress */
1589 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1590 seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1591 size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1592 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1593 PREFETCH(sequence.match); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1594 sequences[seqNb&STOSEQ_MASK] = sequence;
1595 op += oneSeqSize;
1596 }
1597 if (seqNb<nbSeq) return ERROR(corruption_detected);
1598
1599 /* finish queue */
1600 seqNb -= seqAdvance;
1601 for ( ; seqNb<nbSeq ; seqNb++) {
1602 size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1603 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1604 op += oneSeqSize;
1605 }
1606
1607 /* save reps for next block */
1608 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1609 #undef STORED_SEQS
1610 #undef STOSEQ_MASK
1611 #undef ADVANCED_SEQS
1612 }
1613
1614 /* last literal segment */
1615 { size_t const lastLLSize = litEnd - litPtr;
1616 if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
1617 memcpy(op, litPtr, lastLLSize);
1618 op += lastLLSize;
1619 }
1620
1621 return op-ostart;
1622 }
1623
1624 static size_t
1625 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1626 void* dst, size_t maxDstSize,
1627 const void* seqStart, size_t seqSize, int nbSeq,
1628 const ZSTD_longOffset_e isLongOffset)
1629 {
1630 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1631 }
1632
1633
1634
1635 #if DYNAMIC_BMI2
1636
1637 static TARGET_ATTRIBUTE("bmi2") size_t
1638 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1639 void* dst, size_t maxDstSize,
1640 const void* seqStart, size_t seqSize, int nbSeq,
1641 const ZSTD_longOffset_e isLongOffset)
1642 {
1643 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1644 }
1645
1646 static TARGET_ATTRIBUTE("bmi2") size_t
1647 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1648 void* dst, size_t maxDstSize,
1649 const void* seqStart, size_t seqSize, int nbSeq,
1650 const ZSTD_longOffset_e isLongOffset)
1651 {
1652 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1653 }
1654
1655 #endif
1656
1657 typedef size_t (*ZSTD_decompressSequences_t)(
1658 ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
1659 const void *seqStart, size_t seqSize, int nbSeq,
1660 const ZSTD_longOffset_e isLongOffset);
1661
1662 static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1663 const void* seqStart, size_t seqSize, int nbSeq,
1664 const ZSTD_longOffset_e isLongOffset)
1665 {
1666 DEBUGLOG(5, "ZSTD_decompressSequences");
1667 #if DYNAMIC_BMI2
1668 if (dctx->bmi2) {
1669 return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1670 }
1671 #endif
1672 return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1673 }
1674
1675 static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1676 void* dst, size_t maxDstSize,
1677 const void* seqStart, size_t seqSize, int nbSeq,
1678 const ZSTD_longOffset_e isLongOffset)
1679 {
1680 DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1681 #if DYNAMIC_BMI2
1682 if (dctx->bmi2) {
1683 return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1684 }
1685 #endif
1686 return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1687 }
1688
1689 /* ZSTD_getLongOffsetsShare() :
1690 * condition : offTable must be valid
1691 * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1692 * compared to maximum possible of (1<<OffFSELog) */
1693 static unsigned
1694 ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1695 {
1696 const void* ptr = offTable;
1697 U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1698 const ZSTD_seqSymbol* table = offTable + 1;
1699 U32 const max = 1 << tableLog;
1700 U32 u, total = 0;
1701 DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1702
1703 assert(max <= (1 << OffFSELog)); /* max not too large */
1704 for (u=0; u<max; u++) {
1705 if (table[u].nbAdditionalBits > 22) total += 1;
1706 }
1707
1708 assert(tableLog <= OffFSELog);
1709 total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1710
1711 return total;
1712 }
1713
1714
1715 static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1716 void* dst, size_t dstCapacity,
1717 const void* src, size_t srcSize, const int frame)
1718 { /* blockType == blockCompressed */
1719 const BYTE* ip = (const BYTE*)src;
1720 /* isLongOffset must be true if there are long offsets.
1721 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1722 * We don't expect that to be the case in 64-bit mode.
1723 * In block mode, window size is not known, so we have to be conservative.
1724 * (note: but it could be evaluated from current-lowLimit)
1725 */
1726 ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
1727 DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1728
1729 if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
1730
1731 /* Decode literals section */
1732 { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
1733 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1734 if (ZSTD_isError(litCSize)) return litCSize;
1735 ip += litCSize;
1736 srcSize -= litCSize;
1737 }
1738
1739 /* Build Decoding Tables */
1740 { int nbSeq;
1741 size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
1742 if (ZSTD_isError(seqHSize)) return seqHSize;
1743 ip += seqHSize;
1744 srcSize -= seqHSize;
1745
1746 if ( (!frame || dctx->fParams.windowSize > (1<<24))
1747 && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
1748 U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1749 U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1750 if (shareLongOffsets >= minShare)
1751 return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1752 }
1753
1754 return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1755 }
1756 }
1757
1758
1759 static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1760 {
1761 if (dst != dctx->previousDstEnd) { /* not contiguous */
1762 dctx->dictEnd = dctx->previousDstEnd;
1763 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1764 dctx->prefixStart = dst;
1765 dctx->previousDstEnd = dst;
1766 }
1767 }
1768
1769 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1770 void* dst, size_t dstCapacity,
1771 const void* src, size_t srcSize)
1772 {
1773 size_t dSize;
1774 ZSTD_checkContinuity(dctx, dst);
1775 dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
1776 dctx->previousDstEnd = (char*)dst + dSize;
1777 return dSize;
1778 }
1779
1780
1781 /** ZSTD_insertBlock() :
1782 insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
1783 ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
1784 {
1785 ZSTD_checkContinuity(dctx, blockStart);
1786 dctx->previousDstEnd = (const char*)blockStart + blockSize;
1787 return blockSize;
1788 }
1789
1790
1791 static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE value, size_t length)
1792 {
1793 if (length > dstCapacity) return ERROR(dstSize_tooSmall);
1794 memset(dst, value, length);
1795 return length;
1796 }
1797
1798 /** ZSTD_findFrameCompressedSize() :
439 /** ZSTD_findFrameCompressedSize() :
1799 * compatible with legacy mode
440 * compatible with legacy mode
1800 * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
441 * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
@@ -1806,9 +447,9 b' size_t ZSTD_findFrameCompressedSize(cons'
1806 if (ZSTD_isLegacy(src, srcSize))
447 if (ZSTD_isLegacy(src, srcSize))
1807 return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
448 return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
1808 #endif
449 #endif
1809 if ( (srcSize >= ZSTD_skippableHeaderSize)
450 if ( (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
1810 && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) {
451 && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START ) {
1811 return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE);
452 return readSkippableFrameSize(src, srcSize);
1812 } else {
453 } else {
1813 const BYTE* ip = (const BYTE*)src;
454 const BYTE* ip = (const BYTE*)src;
1814 const BYTE* const ipstart = ip;
455 const BYTE* const ipstart = ip;
@@ -1848,8 +489,64 b' size_t ZSTD_findFrameCompressedSize(cons'
1848 }
489 }
1849 }
490 }
1850
491
492
493
494 /*-*************************************************************
495 * Frame decoding
496 ***************************************************************/
497
498
499 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
500 {
501 if (dst != dctx->previousDstEnd) { /* not contiguous */
502 dctx->dictEnd = dctx->previousDstEnd;
503 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
504 dctx->prefixStart = dst;
505 dctx->previousDstEnd = dst;
506 }
507 }
508
509 /** ZSTD_insertBlock() :
510 insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
511 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
512 {
513 ZSTD_checkContinuity(dctx, blockStart);
514 dctx->previousDstEnd = (const char*)blockStart + blockSize;
515 return blockSize;
516 }
517
518
519 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
520 const void* src, size_t srcSize)
521 {
522 DEBUGLOG(5, "ZSTD_copyRawBlock");
523 if (dst == NULL) {
524 if (srcSize == 0) return 0;
525 return ERROR(dstBuffer_null);
526 }
527 if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
528 memcpy(dst, src, srcSize);
529 return srcSize;
530 }
531
532 static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
533 BYTE b,
534 size_t regenSize)
535 {
536 if (dst == NULL) {
537 if (regenSize == 0) return 0;
538 return ERROR(dstBuffer_null);
539 }
540 if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
541 memset(dst, b, regenSize);
542 return regenSize;
543 }
544
545
1851 /*! ZSTD_decompressFrame() :
546 /*! ZSTD_decompressFrame() :
1852 * @dctx must be properly initialized */
547 * @dctx must be properly initialized
548 * will update *srcPtr and *srcSizePtr,
549 * to make *srcPtr progress by one frame. */
1853 static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
550 static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
1854 void* dst, size_t dstCapacity,
551 void* dst, size_t dstCapacity,
1855 const void** srcPtr, size_t *srcSizePtr)
552 const void** srcPtr, size_t *srcSizePtr)
@@ -1858,31 +555,33 b' static size_t ZSTD_decompressFrame(ZSTD_'
1858 BYTE* const ostart = (BYTE* const)dst;
555 BYTE* const ostart = (BYTE* const)dst;
1859 BYTE* const oend = ostart + dstCapacity;
556 BYTE* const oend = ostart + dstCapacity;
1860 BYTE* op = ostart;
557 BYTE* op = ostart;
1861 size_t remainingSize = *srcSizePtr;
558 size_t remainingSrcSize = *srcSizePtr;
559
560 DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
1862
561
1863 /* check */
562 /* check */
1864 if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize)
563 if (remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize)
1865 return ERROR(srcSize_wrong);
564 return ERROR(srcSize_wrong);
1866
565
1867 /* Frame Header */
566 /* Frame Header */
1868 { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
567 { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
1869 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
568 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
1870 if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize)
569 if (remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize)
1871 return ERROR(srcSize_wrong);
570 return ERROR(srcSize_wrong);
1872 CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
571 CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
1873 ip += frameHeaderSize; remainingSize -= frameHeaderSize;
572 ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
1874 }
573 }
1875
574
1876 /* Loop on each block */
575 /* Loop on each block */
1877 while (1) {
576 while (1) {
1878 size_t decodedSize;
577 size_t decodedSize;
1879 blockProperties_t blockProperties;
578 blockProperties_t blockProperties;
1880 size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
579 size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
1881 if (ZSTD_isError(cBlockSize)) return cBlockSize;
580 if (ZSTD_isError(cBlockSize)) return cBlockSize;
1882
581
1883 ip += ZSTD_blockHeaderSize;
582 ip += ZSTD_blockHeaderSize;
1884 remainingSize -= ZSTD_blockHeaderSize;
583 remainingSrcSize -= ZSTD_blockHeaderSize;
1885 if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
584 if (cBlockSize > remainingSrcSize) return ERROR(srcSize_wrong);
1886
585
1887 switch(blockProperties.blockType)
586 switch(blockProperties.blockType)
1888 {
587 {
@@ -1893,7 +592,7 b' static size_t ZSTD_decompressFrame(ZSTD_'
1893 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
592 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
1894 break;
593 break;
1895 case bt_rle :
594 case bt_rle :
1896 decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
595 decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
1897 break;
596 break;
1898 case bt_reserved :
597 case bt_reserved :
1899 default:
598 default:
@@ -1905,7 +604,7 b' static size_t ZSTD_decompressFrame(ZSTD_'
1905 XXH64_update(&dctx->xxhState, op, decodedSize);
604 XXH64_update(&dctx->xxhState, op, decodedSize);
1906 op += decodedSize;
605 op += decodedSize;
1907 ip += cBlockSize;
606 ip += cBlockSize;
1908 remainingSize -= cBlockSize;
607 remainingSrcSize -= cBlockSize;
1909 if (blockProperties.lastBlock) break;
608 if (blockProperties.lastBlock) break;
1910 }
609 }
1911
610
@@ -1916,16 +615,16 b' static size_t ZSTD_decompressFrame(ZSTD_'
1916 if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
615 if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
1917 U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
616 U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
1918 U32 checkRead;
617 U32 checkRead;
1919 if (remainingSize<4) return ERROR(checksum_wrong);
618 if (remainingSrcSize<4) return ERROR(checksum_wrong);
1920 checkRead = MEM_readLE32(ip);
619 checkRead = MEM_readLE32(ip);
1921 if (checkRead != checkCalc) return ERROR(checksum_wrong);
620 if (checkRead != checkCalc) return ERROR(checksum_wrong);
1922 ip += 4;
621 ip += 4;
1923 remainingSize -= 4;
622 remainingSrcSize -= 4;
1924 }
623 }
1925
624
1926 /* Allow caller to get size read */
625 /* Allow caller to get size read */
1927 *srcPtr = ip;
626 *srcPtr = ip;
1928 *srcSizePtr = remainingSize;
627 *srcSizePtr = remainingSrcSize;
1929 return op-ostart;
628 return op-ostart;
1930 }
629 }
1931
630
@@ -1942,11 +641,11 b' static size_t ZSTD_decompressMultiFrame('
1942 assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
641 assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
1943
642
1944 if (ddict) {
643 if (ddict) {
1945 dict = ZSTD_DDictDictContent(ddict);
644 dict = ZSTD_DDict_dictContent(ddict);
1946 dictSize = ZSTD_DDictDictSize(ddict);
645 dictSize = ZSTD_DDict_dictSize(ddict);
1947 }
646 }
1948
647
1949 while (srcSize >= ZSTD_frameHeaderSize_prefix) {
648 while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
1950
649
1951 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
650 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
1952 if (ZSTD_isLegacy(src, srcSize)) {
651 if (ZSTD_isLegacy(src, srcSize)) {
@@ -1957,7 +656,9 b' static size_t ZSTD_decompressMultiFrame('
1957 if (dctx->staticSize) return ERROR(memory_allocation);
656 if (dctx->staticSize) return ERROR(memory_allocation);
1958
657
1959 decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
658 decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
659 if (ZSTD_isError(decodedSize)) return decodedSize;
1960
660
661 assert(decodedSize <=- dstCapacity);
1961 dst = (BYTE*)dst + decodedSize;
662 dst = (BYTE*)dst + decodedSize;
1962 dstCapacity -= decodedSize;
663 dstCapacity -= decodedSize;
1963
664
@@ -1970,13 +671,11 b' static size_t ZSTD_decompressMultiFrame('
1970
671
1971 { U32 const magicNumber = MEM_readLE32(src);
672 { U32 const magicNumber = MEM_readLE32(src);
1972 DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
673 DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
1973 (U32)magicNumber, (U32)ZSTD_MAGICNUMBER);
674 (unsigned)magicNumber, ZSTD_MAGICNUMBER);
1974 if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
675 if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
1975 size_t skippableSize;
676 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
1976 if (srcSize < ZSTD_skippableHeaderSize)
677 if (ZSTD_isError(skippableSize))
1977 return ERROR(srcSize_wrong);
678 return skippableSize;
1978 skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE)
1979 + ZSTD_skippableHeaderSize;
1980 if (srcSize < skippableSize) return ERROR(srcSize_wrong);
679 if (srcSize < skippableSize) return ERROR(srcSize_wrong);
1981
680
1982 src = (const BYTE *)src + skippableSize;
681 src = (const BYTE *)src + skippableSize;
@@ -2010,7 +709,7 b' static size_t ZSTD_decompressMultiFrame('
2010 return ERROR(srcSize_wrong);
709 return ERROR(srcSize_wrong);
2011 }
710 }
2012 if (ZSTD_isError(res)) return res;
711 if (ZSTD_isError(res)) return res;
2013 /* no need to bound check, ZSTD_decompressFrame already has */
712 assert(res <= dstCapacity);
2014 dst = (BYTE*)dst + res;
713 dst = (BYTE*)dst + res;
2015 dstCapacity -= res;
714 dstCapacity -= res;
2016 }
715 }
@@ -2090,9 +789,10 b' static int ZSTD_isSkipFrame(ZSTD_DCtx* d'
2090 * or an error code, which can be tested using ZSTD_isError() */
789 * or an error code, which can be tested using ZSTD_isError() */
2091 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
790 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2092 {
791 {
2093 DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize);
792 DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
2094 /* Sanity check */
793 /* Sanity check */
2095 if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */
794 if (srcSize != dctx->expected)
795 return ERROR(srcSize_wrong); /* not allowed */
2096 if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
796 if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
2097
797
2098 switch (dctx->stage)
798 switch (dctx->stage)
@@ -2101,9 +801,9 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2101 assert(src != NULL);
801 assert(src != NULL);
2102 if (dctx->format == ZSTD_f_zstd1) { /* allows header */
802 if (dctx->format == ZSTD_f_zstd1) { /* allows header */
2103 assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
803 assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
2104 if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
804 if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2105 memcpy(dctx->headerBuffer, src, srcSize);
805 memcpy(dctx->headerBuffer, src, srcSize);
2106 dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */
806 dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */
2107 dctx->stage = ZSTDds_decodeSkippableHeader;
807 dctx->stage = ZSTDds_decodeSkippableHeader;
2108 return 0;
808 return 0;
2109 } }
809 } }
@@ -2163,19 +863,19 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2163 rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
863 rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
2164 break;
864 break;
2165 case bt_rle :
865 case bt_rle :
2166 rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize);
866 rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
2167 break;
867 break;
2168 case bt_reserved : /* should never happen */
868 case bt_reserved : /* should never happen */
2169 default:
869 default:
2170 return ERROR(corruption_detected);
870 return ERROR(corruption_detected);
2171 }
871 }
2172 if (ZSTD_isError(rSize)) return rSize;
872 if (ZSTD_isError(rSize)) return rSize;
2173 DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize);
873 DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
2174 dctx->decodedSize += rSize;
874 dctx->decodedSize += rSize;
2175 if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
875 if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
2176
876
2177 if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
877 if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
2178 DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize);
878 DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
2179 if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
879 if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
2180 if (dctx->decodedSize != dctx->fParams.frameContentSize) {
880 if (dctx->decodedSize != dctx->fParams.frameContentSize) {
2181 return ERROR(corruption_detected);
881 return ERROR(corruption_detected);
@@ -2199,7 +899,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2199 assert(srcSize == 4); /* guaranteed by dctx->expected */
899 assert(srcSize == 4); /* guaranteed by dctx->expected */
2200 { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
900 { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
2201 U32 const check32 = MEM_readLE32(src);
901 U32 const check32 = MEM_readLE32(src);
2202 DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32);
902 DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
2203 if (check32 != h32) return ERROR(checksum_wrong);
903 if (check32 != h32) return ERROR(checksum_wrong);
2204 dctx->expected = 0;
904 dctx->expected = 0;
2205 dctx->stage = ZSTDds_getFrameHeaderSize;
905 dctx->stage = ZSTDds_getFrameHeaderSize;
@@ -2208,8 +908,8 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2208
908
2209 case ZSTDds_decodeSkippableHeader:
909 case ZSTDds_decodeSkippableHeader:
2210 assert(src != NULL);
910 assert(src != NULL);
2211 assert(srcSize <= ZSTD_skippableHeaderSize);
911 assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
2212 memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */
912 memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
2213 dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
913 dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
2214 dctx->stage = ZSTDds_skipFrame;
914 dctx->stage = ZSTDds_skipFrame;
2215 return 0;
915 return 0;
@@ -2220,7 +920,8 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2220 return 0;
920 return 0;
2221
921
2222 default:
922 default:
2223 return ERROR(GENERIC); /* impossible */
923 assert(0); /* impossible */
924 return ERROR(GENERIC); /* some compiler require default to do something */
2224 }
925 }
2225 }
926 }
2226
927
@@ -2234,11 +935,12 b' static size_t ZSTD_refDictContent(ZSTD_D'
2234 return 0;
935 return 0;
2235 }
936 }
2236
937
2237 /*! ZSTD_loadEntropy() :
938 /*! ZSTD_loadDEntropy() :
2238 * dict : must point at beginning of a valid zstd dictionary.
939 * dict : must point at beginning of a valid zstd dictionary.
2239 * @return : size of entropy tables read */
940 * @return : size of entropy tables read */
2240 static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy,
941 size_t
2241 const void* const dict, size_t const dictSize)
942 ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
943 const void* const dict, size_t const dictSize)
2242 {
944 {
2243 const BYTE* dictPtr = (const BYTE*)dict;
945 const BYTE* dictPtr = (const BYTE*)dict;
2244 const BYTE* const dictEnd = dictPtr + dictSize;
946 const BYTE* const dictEnd = dictPtr + dictSize;
@@ -2252,15 +954,22 b' static size_t ZSTD_loadEntropy(ZSTD_entr'
2252 ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
954 ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
2253 { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
955 { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
2254 size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
956 size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
957 #ifdef HUF_FORCE_DECOMPRESS_X1
958 /* in minimal huffman, we always use X1 variants */
959 size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
960 dictPtr, dictEnd - dictPtr,
961 workspace, workspaceSize);
962 #else
2255 size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
963 size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
2256 dictPtr, dictEnd - dictPtr,
964 dictPtr, dictEnd - dictPtr,
2257 workspace, workspaceSize);
965 workspace, workspaceSize);
966 #endif
2258 if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
967 if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
2259 dictPtr += hSize;
968 dictPtr += hSize;
2260 }
969 }
2261
970
2262 { short offcodeNCount[MaxOff+1];
971 { short offcodeNCount[MaxOff+1];
2263 U32 offcodeMaxValue = MaxOff, offcodeLog;
972 unsigned offcodeMaxValue = MaxOff, offcodeLog;
2264 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
973 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2265 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
974 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2266 if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
975 if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
@@ -2320,7 +1029,7 b' static size_t ZSTD_decompress_insertDict'
2320 dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
1029 dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
2321
1030
2322 /* load entropy tables */
1031 /* load entropy tables */
2323 { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
1032 { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
2324 if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
1033 if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
2325 dict = (const char*)dict + eSize;
1034 dict = (const char*)dict + eSize;
2326 dictSize -= eSize;
1035 dictSize -= eSize;
@@ -2364,209 +1073,25 b' size_t ZSTD_decompressBegin_usingDict(ZS'
2364
1073
2365 /* ====== ZSTD_DDict ====== */
1074 /* ====== ZSTD_DDict ====== */
2366
1075
2367 struct ZSTD_DDict_s {
2368 void* dictBuffer;
2369 const void* dictContent;
2370 size_t dictSize;
2371 ZSTD_entropyDTables_t entropy;
2372 U32 dictID;
2373 U32 entropyPresent;
2374 ZSTD_customMem cMem;
2375 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
2376
2377 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict)
2378 {
2379 assert(ddict != NULL);
2380 return ddict->dictContent;
2381 }
2382
2383 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
2384 {
2385 assert(ddict != NULL);
2386 return ddict->dictSize;
2387 }
2388
2389 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
1076 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2390 {
1077 {
2391 DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
1078 DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
2392 assert(dctx != NULL);
1079 assert(dctx != NULL);
2393 if (ddict) {
1080 if (ddict) {
2394 dctx->ddictIsCold = (dctx->dictEnd != (const char*)ddict->dictContent + ddict->dictSize);
1081 const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
1082 size_t const dictSize = ZSTD_DDict_dictSize(ddict);
1083 const void* const dictEnd = dictStart + dictSize;
1084 dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
2395 DEBUGLOG(4, "DDict is %s",
1085 DEBUGLOG(4, "DDict is %s",
2396 dctx->ddictIsCold ? "~cold~" : "hot!");
1086 dctx->ddictIsCold ? "~cold~" : "hot!");
2397 }
1087 }
2398 CHECK_F( ZSTD_decompressBegin(dctx) );
1088 CHECK_F( ZSTD_decompressBegin(dctx) );
2399 if (ddict) { /* NULL ddict is equivalent to no dictionary */
1089 if (ddict) { /* NULL ddict is equivalent to no dictionary */
2400 dctx->dictID = ddict->dictID;
1090 ZSTD_copyDDictParameters(dctx, ddict);
2401 dctx->prefixStart = ddict->dictContent;
2402 dctx->virtualStart = ddict->dictContent;
2403 dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
2404 dctx->previousDstEnd = dctx->dictEnd;
2405 if (ddict->entropyPresent) {
2406 dctx->litEntropy = 1;
2407 dctx->fseEntropy = 1;
2408 dctx->LLTptr = ddict->entropy.LLTable;
2409 dctx->MLTptr = ddict->entropy.MLTable;
2410 dctx->OFTptr = ddict->entropy.OFTable;
2411 dctx->HUFptr = ddict->entropy.hufTable;
2412 dctx->entropy.rep[0] = ddict->entropy.rep[0];
2413 dctx->entropy.rep[1] = ddict->entropy.rep[1];
2414 dctx->entropy.rep[2] = ddict->entropy.rep[2];
2415 } else {
2416 dctx->litEntropy = 0;
2417 dctx->fseEntropy = 0;
2418 }
2419 }
1091 }
2420 return 0;
1092 return 0;
2421 }
1093 }
2422
1094
2423 static size_t
2424 ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict,
2425 ZSTD_dictContentType_e dictContentType)
2426 {
2427 ddict->dictID = 0;
2428 ddict->entropyPresent = 0;
2429 if (dictContentType == ZSTD_dct_rawContent) return 0;
2430
2431 if (ddict->dictSize < 8) {
2432 if (dictContentType == ZSTD_dct_fullDict)
2433 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
2434 return 0; /* pure content mode */
2435 }
2436 { U32 const magic = MEM_readLE32(ddict->dictContent);
2437 if (magic != ZSTD_MAGIC_DICTIONARY) {
2438 if (dictContentType == ZSTD_dct_fullDict)
2439 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
2440 return 0; /* pure content mode */
2441 }
2442 }
2443 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
2444
2445 /* load entropy tables */
2446 CHECK_E( ZSTD_loadEntropy(&ddict->entropy,
2447 ddict->dictContent, ddict->dictSize),
2448 dictionary_corrupted );
2449 ddict->entropyPresent = 1;
2450 return 0;
2451 }
2452
2453
2454 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
2455 const void* dict, size_t dictSize,
2456 ZSTD_dictLoadMethod_e dictLoadMethod,
2457 ZSTD_dictContentType_e dictContentType)
2458 {
2459 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
2460 ddict->dictBuffer = NULL;
2461 ddict->dictContent = dict;
2462 if (!dict) dictSize = 0;
2463 } else {
2464 void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
2465 ddict->dictBuffer = internalBuffer;
2466 ddict->dictContent = internalBuffer;
2467 if (!internalBuffer) return ERROR(memory_allocation);
2468 memcpy(internalBuffer, dict, dictSize);
2469 }
2470 ddict->dictSize = dictSize;
2471 ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
2472
2473 /* parse dictionary content */
2474 CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) );
2475
2476 return 0;
2477 }
2478
2479 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
2480 ZSTD_dictLoadMethod_e dictLoadMethod,
2481 ZSTD_dictContentType_e dictContentType,
2482 ZSTD_customMem customMem)
2483 {
2484 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2485
2486 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
2487 if (ddict == NULL) return NULL;
2488 ddict->cMem = customMem;
2489 { size_t const initResult = ZSTD_initDDict_internal(ddict,
2490 dict, dictSize,
2491 dictLoadMethod, dictContentType);
2492 if (ZSTD_isError(initResult)) {
2493 ZSTD_freeDDict(ddict);
2494 return NULL;
2495 } }
2496 return ddict;
2497 }
2498 }
2499
2500 /*! ZSTD_createDDict() :
2501 * Create a digested dictionary, to start decompression without startup delay.
2502 * `dict` content is copied inside DDict.
2503 * Consequently, `dict` can be released after `ZSTD_DDict` creation */
2504 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
2505 {
2506 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2507 return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
2508 }
2509
2510 /*! ZSTD_createDDict_byReference() :
2511 * Create a digested dictionary, to start decompression without startup delay.
2512 * Dictionary content is simply referenced, it will be accessed during decompression.
2513 * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
2514 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
2515 {
2516 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2517 return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
2518 }
2519
2520
2521 const ZSTD_DDict* ZSTD_initStaticDDict(
2522 void* sBuffer, size_t sBufferSize,
2523 const void* dict, size_t dictSize,
2524 ZSTD_dictLoadMethod_e dictLoadMethod,
2525 ZSTD_dictContentType_e dictContentType)
2526 {
2527 size_t const neededSpace = sizeof(ZSTD_DDict)
2528 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
2529 ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
2530 assert(sBuffer != NULL);
2531 assert(dict != NULL);
2532 if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
2533 if (sBufferSize < neededSpace) return NULL;
2534 if (dictLoadMethod == ZSTD_dlm_byCopy) {
2535 memcpy(ddict+1, dict, dictSize); /* local copy */
2536 dict = ddict+1;
2537 }
2538 if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
2539 dict, dictSize,
2540 ZSTD_dlm_byRef, dictContentType) ))
2541 return NULL;
2542 return ddict;
2543 }
2544
2545
2546 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
2547 {
2548 if (ddict==NULL) return 0; /* support free on NULL */
2549 { ZSTD_customMem const cMem = ddict->cMem;
2550 ZSTD_free(ddict->dictBuffer, cMem);
2551 ZSTD_free(ddict, cMem);
2552 return 0;
2553 }
2554 }
2555
2556 /*! ZSTD_estimateDDictSize() :
2557 * Estimate amount of memory that will be needed to create a dictionary for decompression.
2558 * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
2559 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
2560 {
2561 return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
2562 }
2563
2564 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
2565 {
2566 if (ddict==NULL) return 0; /* support sizeof on NULL */
2567 return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
2568 }
2569
2570 /*! ZSTD_getDictID_fromDict() :
1095 /*! ZSTD_getDictID_fromDict() :
2571 * Provides the dictID stored within dictionary.
1096 * Provides the dictID stored within dictionary.
2572 * if @return == 0, the dictionary is not conformant with Zstandard specification.
1097 * if @return == 0, the dictionary is not conformant with Zstandard specification.
@@ -2578,16 +1103,6 b' unsigned ZSTD_getDictID_fromDict(const v'
2578 return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
1103 return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
2579 }
1104 }
2580
1105
2581 /*! ZSTD_getDictID_fromDDict() :
2582 * Provides the dictID of the dictionary loaded into `ddict`.
2583 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
2584 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
2585 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
2586 {
2587 if (ddict==NULL) return 0;
2588 return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
2589 }
2590
2591 /*! ZSTD_getDictID_fromFrame() :
1106 /*! ZSTD_getDictID_fromFrame() :
2592 * Provides the dictID required to decompresse frame stored within `src`.
1107 * Provides the dictID required to decompresse frame stored within `src`.
2593 * If @return == 0, the dictID could not be decoded.
1108 * If @return == 0, the dictID could not be decoded.
@@ -2695,7 +1210,7 b' size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dc'
2695
1210
2696
1211
2697 /* ZSTD_initDStream_usingDict() :
1212 /* ZSTD_initDStream_usingDict() :
2698 * return : expected size, aka ZSTD_frameHeaderSize_prefix.
1213 * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
2699 * this function cannot fail */
1214 * this function cannot fail */
2700 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
1215 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
2701 {
1216 {
@@ -2703,7 +1218,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D'
2703 zds->streamStage = zdss_init;
1218 zds->streamStage = zdss_init;
2704 zds->noForwardProgress = 0;
1219 zds->noForwardProgress = 0;
2705 CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
1220 CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
2706 return ZSTD_frameHeaderSize_prefix;
1221 return ZSTD_FRAMEHEADERSIZE_PREFIX;
2707 }
1222 }
2708
1223
2709 /* note : this variant can't fail */
1224 /* note : this variant can't fail */
@@ -2724,7 +1239,7 b' size_t ZSTD_initDStream_usingDDict(ZSTD_'
2724 }
1239 }
2725
1240
2726 /* ZSTD_resetDStream() :
1241 /* ZSTD_resetDStream() :
2727 * return : expected size, aka ZSTD_frameHeaderSize_prefix.
1242 * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
2728 * this function cannot fail */
1243 * this function cannot fail */
2729 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
1244 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
2730 {
1245 {
@@ -2733,23 +1248,9 b' size_t ZSTD_resetDStream(ZSTD_DStream* d'
2733 dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
1248 dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
2734 dctx->legacyVersion = 0;
1249 dctx->legacyVersion = 0;
2735 dctx->hostageByte = 0;
1250 dctx->hostageByte = 0;
2736 return ZSTD_frameHeaderSize_prefix;
1251 return ZSTD_FRAMEHEADERSIZE_PREFIX;
2737 }
1252 }
2738
1253
2739 size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx,
2740 ZSTD_DStreamParameter_e paramType, unsigned paramValue)
2741 {
2742 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2743 switch(paramType)
2744 {
2745 default : return ERROR(parameter_unsupported);
2746 case DStream_p_maxWindowSize :
2747 DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10);
2748 dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1);
2749 break;
2750 }
2751 return 0;
2752 }
2753
1254
2754 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
1255 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2755 {
1256 {
@@ -2758,18 +1259,92 b' size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dct'
2758 return 0;
1259 return 0;
2759 }
1260 }
2760
1261
1262 /* ZSTD_DCtx_setMaxWindowSize() :
1263 * note : no direct equivalence in ZSTD_DCtx_setParameter,
1264 * since this version sets windowSize, and the other sets windowLog */
2761 size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
1265 size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2762 {
1266 {
1267 ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
1268 size_t const min = (size_t)1 << bounds.lowerBound;
1269 size_t const max = (size_t)1 << bounds.upperBound;
2763 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
1270 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
1271 if (maxWindowSize < min) return ERROR(parameter_outOfBound);
1272 if (maxWindowSize > max) return ERROR(parameter_outOfBound);
2764 dctx->maxWindowSize = maxWindowSize;
1273 dctx->maxWindowSize = maxWindowSize;
2765 return 0;
1274 return 0;
2766 }
1275 }
2767
1276
2768 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
1277 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
2769 {
1278 {
2770 DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format);
1279 return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
1280 }
1281
1282 ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
1283 {
1284 ZSTD_bounds bounds = { 0, 0, 0 };
1285 switch(dParam) {
1286 case ZSTD_d_windowLogMax:
1287 bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
1288 bounds.upperBound = ZSTD_WINDOWLOG_MAX;
1289 return bounds;
1290 case ZSTD_d_format:
1291 bounds.lowerBound = (int)ZSTD_f_zstd1;
1292 bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
1293 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
1294 return bounds;
1295 default:;
1296 }
1297 bounds.error = ERROR(parameter_unsupported);
1298 return bounds;
1299 }
1300
1301 /* ZSTD_dParam_withinBounds:
1302 * @return 1 if value is within dParam bounds,
1303 * 0 otherwise */
1304 static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
1305 {
1306 ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
1307 if (ZSTD_isError(bounds.error)) return 0;
1308 if (value < bounds.lowerBound) return 0;
1309 if (value > bounds.upperBound) return 0;
1310 return 1;
1311 }
1312
1313 #define CHECK_DBOUNDS(p,v) { \
1314 if (!ZSTD_dParam_withinBounds(p, v)) \
1315 return ERROR(parameter_outOfBound); \
1316 }
1317
1318 size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
1319 {
2771 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
1320 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2772 dctx->format = format;
1321 switch(dParam) {
1322 case ZSTD_d_windowLogMax:
1323 CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
1324 dctx->maxWindowSize = ((size_t)1) << value;
1325 return 0;
1326 case ZSTD_d_format:
1327 CHECK_DBOUNDS(ZSTD_d_format, value);
1328 dctx->format = (ZSTD_format_e)value;
1329 return 0;
1330 default:;
1331 }
1332 return ERROR(parameter_unsupported);
1333 }
1334
1335 size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
1336 {
1337 if ( (reset == ZSTD_reset_session_only)
1338 || (reset == ZSTD_reset_session_and_parameters) ) {
1339 (void)ZSTD_initDStream(dctx);
1340 }
1341 if ( (reset == ZSTD_reset_parameters)
1342 || (reset == ZSTD_reset_session_and_parameters) ) {
1343 if (dctx->streamStage != zdss_init)
1344 return ERROR(stage_wrong);
1345 dctx->format = ZSTD_f_zstd1;
1346 dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
1347 }
2773 return 0;
1348 return 0;
2774 }
1349 }
2775
1350
@@ -2799,7 +1374,7 b' size_t ZSTD_estimateDStreamSize(size_t w'
2799
1374
2800 size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
1375 size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
2801 {
1376 {
2802 U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable */
1377 U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
2803 ZSTD_frameHeader zfh;
1378 ZSTD_frameHeader zfh;
2804 size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
1379 size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
2805 if (ZSTD_isError(err)) return err;
1380 if (ZSTD_isError(err)) return err;
@@ -2868,8 +1443,8 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2868 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
1443 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2869 U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
1444 U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
2870 if (legacyVersion) {
1445 if (legacyVersion) {
2871 const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
1446 const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL;
2872 size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
1447 size_t const dictSize = zds->ddict ? ZSTD_DDict_dictSize(zds->ddict) : 0;
2873 DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
1448 DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
2874 /* legacy support is incompatible with static dctx */
1449 /* legacy support is incompatible with static dctx */
2875 if (zds->staticSize) return ERROR(memory_allocation);
1450 if (zds->staticSize) return ERROR(memory_allocation);
@@ -2894,7 +1469,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2894 zds->lhSize += remainingInput;
1469 zds->lhSize += remainingInput;
2895 }
1470 }
2896 input->pos = input->size;
1471 input->pos = input->size;
2897 return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1472 return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
2898 }
1473 }
2899 assert(ip != NULL);
1474 assert(ip != NULL);
2900 memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
1475 memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@@ -2922,7 +1497,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2922 DEBUGLOG(4, "Consume header");
1497 DEBUGLOG(4, "Consume header");
2923 CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
1498 CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
2924
1499
2925 if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
1500 if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2926 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
1501 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
2927 zds->stage = ZSTDds_skipFrame;
1502 zds->stage = ZSTDds_skipFrame;
2928 } else {
1503 } else {
@@ -3038,7 +1613,9 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
3038 someMoreWork = 0;
1613 someMoreWork = 0;
3039 break;
1614 break;
3040
1615
3041 default: return ERROR(GENERIC); /* impossible */
1616 default:
1617 assert(0); /* impossible */
1618 return ERROR(GENERIC); /* some compiler require default to do something */
3042 } }
1619 } }
3043
1620
3044 /* result */
1621 /* result */
@@ -3080,13 +1657,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
3080 }
1657 }
3081 }
1658 }
3082
1659
3083
1660 size_t ZSTD_decompressStream_simpleArgs (
3084 size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
3085 {
3086 return ZSTD_decompressStream(dctx, output, input);
3087 }
3088
3089 size_t ZSTD_decompress_generic_simpleArgs (
3090 ZSTD_DCtx* dctx,
1661 ZSTD_DCtx* dctx,
3091 void* dst, size_t dstCapacity, size_t* dstPos,
1662 void* dst, size_t dstCapacity, size_t* dstPos,
3092 const void* src, size_t srcSize, size_t* srcPos)
1663 const void* src, size_t srcSize, size_t* srcPos)
@@ -3094,15 +1665,8 b' size_t ZSTD_decompress_generic_simpleArg'
3094 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
1665 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
3095 ZSTD_inBuffer input = { src, srcSize, *srcPos };
1666 ZSTD_inBuffer input = { src, srcSize, *srcPos };
3096 /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
1667 /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
3097 size_t const cErr = ZSTD_decompress_generic(dctx, &output, &input);
1668 size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
3098 *dstPos = output.pos;
1669 *dstPos = output.pos;
3099 *srcPos = input.pos;
1670 *srcPos = input.pos;
3100 return cErr;
1671 return cErr;
3101 }
1672 }
3102
3103 void ZSTD_DCtx_reset(ZSTD_DCtx* dctx)
3104 {
3105 (void)ZSTD_initDStream(dctx);
3106 dctx->format = ZSTD_f_zstd1;
3107 dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
3108 }
@@ -39,7 +39,7 b''
39 /*-*************************************
39 /*-*************************************
40 * Constants
40 * Constants
41 ***************************************/
41 ***************************************/
42 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
42 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
43 #define DEFAULT_SPLITPOINT 1.0
43 #define DEFAULT_SPLITPOINT 1.0
44
44
45 /*-*************************************
45 /*-*************************************
@@ -543,7 +543,7 b' static int COVER_ctx_init(COVER_ctx_t *c'
543 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
543 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
544 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
544 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
545 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
545 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
546 (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
546 (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
547 return 0;
547 return 0;
548 }
548 }
549 /* Check if there are at least 5 training samples */
549 /* Check if there are at least 5 training samples */
@@ -559,9 +559,9 b' static int COVER_ctx_init(COVER_ctx_t *c'
559 /* Zero the context */
559 /* Zero the context */
560 memset(ctx, 0, sizeof(*ctx));
560 memset(ctx, 0, sizeof(*ctx));
561 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
561 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
562 (U32)trainingSamplesSize);
562 (unsigned)trainingSamplesSize);
563 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
563 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
564 (U32)testSamplesSize);
564 (unsigned)testSamplesSize);
565 ctx->samples = samples;
565 ctx->samples = samples;
566 ctx->samplesSizes = samplesSizes;
566 ctx->samplesSizes = samplesSizes;
567 ctx->nbSamples = nbSamples;
567 ctx->nbSamples = nbSamples;
@@ -639,11 +639,11 b' static size_t COVER_buildDictionary(cons'
639 /* Divide the data up into epochs of equal size.
639 /* Divide the data up into epochs of equal size.
640 * We will select at least one segment from each epoch.
640 * We will select at least one segment from each epoch.
641 */
641 */
642 const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
642 const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
643 const U32 epochSize = (U32)(ctx->suffixSize / epochs);
643 const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
644 size_t epoch;
644 size_t epoch;
645 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
645 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
646 epochSize);
646 epochs, epochSize);
647 /* Loop through the epochs until there are no more segments or the dictionary
647 /* Loop through the epochs until there are no more segments or the dictionary
648 * is full.
648 * is full.
649 */
649 */
@@ -670,7 +670,7 b' static size_t COVER_buildDictionary(cons'
670 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
670 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
671 DISPLAYUPDATE(
671 DISPLAYUPDATE(
672 2, "\r%u%% ",
672 2, "\r%u%% ",
673 (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
673 (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
674 }
674 }
675 DISPLAYLEVEL(2, "\r%79s\r", "");
675 DISPLAYLEVEL(2, "\r%79s\r", "");
676 return tail;
676 return tail;
@@ -722,7 +722,7 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
722 samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
722 samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
723 if (!ZSTD_isError(dictionarySize)) {
723 if (!ZSTD_isError(dictionarySize)) {
724 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
724 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
725 (U32)dictionarySize);
725 (unsigned)dictionarySize);
726 }
726 }
727 COVER_ctx_destroy(&ctx);
727 COVER_ctx_destroy(&ctx);
728 COVER_map_destroy(&activeDmers);
728 COVER_map_destroy(&activeDmers);
@@ -868,6 +868,8 b' void COVER_best_finish(COVER_best_t *bes'
868 if (!best->dict) {
868 if (!best->dict) {
869 best->compressedSize = ERROR(GENERIC);
869 best->compressedSize = ERROR(GENERIC);
870 best->dictSize = 0;
870 best->dictSize = 0;
871 ZSTD_pthread_cond_signal(&best->cond);
872 ZSTD_pthread_mutex_unlock(&best->mutex);
871 return;
873 return;
872 }
874 }
873 }
875 }
@@ -1054,7 +1056,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
1054 }
1056 }
1055 /* Print status */
1057 /* Print status */
1056 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
1058 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
1057 (U32)((iteration * 100) / kIterations));
1059 (unsigned)((iteration * 100) / kIterations));
1058 ++iteration;
1060 ++iteration;
1059 }
1061 }
1060 COVER_best_wait(&best);
1062 COVER_best_wait(&best);
@@ -20,7 +20,7 b''
20 /*-*************************************
20 /*-*************************************
21 * Constants
21 * Constants
22 ***************************************/
22 ***************************************/
23 #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
23 #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
24 #define FASTCOVER_MAX_F 31
24 #define FASTCOVER_MAX_F 31
25 #define FASTCOVER_MAX_ACCEL 10
25 #define FASTCOVER_MAX_ACCEL 10
26 #define DEFAULT_SPLITPOINT 0.75
26 #define DEFAULT_SPLITPOINT 0.75
@@ -159,15 +159,15 b' static COVER_segment_t FASTCOVER_selectS'
159 */
159 */
160 while (activeSegment.end < end) {
160 while (activeSegment.end < end) {
161 /* Get hash value of current dmer */
161 /* Get hash value of current dmer */
162 const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
162 const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
163
163
164 /* Add frequency of this index to score if this is the first occurence of index in active segment */
164 /* Add frequency of this index to score if this is the first occurence of index in active segment */
165 if (segmentFreqs[index] == 0) {
165 if (segmentFreqs[idx] == 0) {
166 activeSegment.score += freqs[index];
166 activeSegment.score += freqs[idx];
167 }
167 }
168 /* Increment end of segment and segmentFreqs*/
168 /* Increment end of segment and segmentFreqs*/
169 activeSegment.end += 1;
169 activeSegment.end += 1;
170 segmentFreqs[index] += 1;
170 segmentFreqs[idx] += 1;
171 /* If the window is now too large, drop the first position */
171 /* If the window is now too large, drop the first position */
172 if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
172 if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
173 /* Get hash value of the dmer to be eliminated from active segment */
173 /* Get hash value of the dmer to be eliminated from active segment */
@@ -309,7 +309,7 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,'
309 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
309 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
310 totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
310 totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
311 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
311 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
312 (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
312 (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
313 return 0;
313 return 0;
314 }
314 }
315
315
@@ -328,9 +328,9 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,'
328 /* Zero the context */
328 /* Zero the context */
329 memset(ctx, 0, sizeof(*ctx));
329 memset(ctx, 0, sizeof(*ctx));
330 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
330 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
331 (U32)trainingSamplesSize);
331 (unsigned)trainingSamplesSize);
332 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
332 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
333 (U32)testSamplesSize);
333 (unsigned)testSamplesSize);
334
334
335 ctx->samples = samples;
335 ctx->samples = samples;
336 ctx->samplesSizes = samplesSizes;
336 ctx->samplesSizes = samplesSizes;
@@ -389,11 +389,11 b' FASTCOVER_buildDictionary(const FASTCOVE'
389 /* Divide the data up into epochs of equal size.
389 /* Divide the data up into epochs of equal size.
390 * We will select at least one segment from each epoch.
390 * We will select at least one segment from each epoch.
391 */
391 */
392 const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
392 const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
393 const U32 epochSize = (U32)(ctx->nbDmers / epochs);
393 const unsigned epochSize = (U32)(ctx->nbDmers / epochs);
394 size_t epoch;
394 size_t epoch;
395 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
395 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
396 epochSize);
396 epochs, epochSize);
397 /* Loop through the epochs until there are no more segments or the dictionary
397 /* Loop through the epochs until there are no more segments or the dictionary
398 * is full.
398 * is full.
399 */
399 */
@@ -423,7 +423,7 b' FASTCOVER_buildDictionary(const FASTCOVE'
423 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
423 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
424 DISPLAYUPDATE(
424 DISPLAYUPDATE(
425 2, "\r%u%% ",
425 2, "\r%u%% ",
426 (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
426 (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
427 }
427 }
428 DISPLAYLEVEL(2, "\r%79s\r", "");
428 DISPLAYLEVEL(2, "\r%79s\r", "");
429 return tail;
429 return tail;
@@ -577,7 +577,7 b' ZDICT_trainFromBuffer_fastCover(void* di'
577 samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
577 samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
578 if (!ZSTD_isError(dictionarySize)) {
578 if (!ZSTD_isError(dictionarySize)) {
579 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
579 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
580 (U32)dictionarySize);
580 (unsigned)dictionarySize);
581 }
581 }
582 FASTCOVER_ctx_destroy(&ctx);
582 FASTCOVER_ctx_destroy(&ctx);
583 free(segmentFreqs);
583 free(segmentFreqs);
@@ -702,7 +702,7 b' ZDICT_optimizeTrainFromBuffer_fastCover('
702 }
702 }
703 /* Print status */
703 /* Print status */
704 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
704 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
705 (U32)((iteration * 100) / kIterations));
705 (unsigned)((iteration * 100) / kIterations));
706 ++iteration;
706 ++iteration;
707 }
707 }
708 COVER_best_wait(&best);
708 COVER_best_wait(&best);
@@ -255,15 +255,15 b' static dictItem ZDICT_analyzePos('
255 }
255 }
256
256
257 { int i;
257 { int i;
258 U32 searchLength;
258 U32 mml;
259 U32 refinedStart = start;
259 U32 refinedStart = start;
260 U32 refinedEnd = end;
260 U32 refinedEnd = end;
261
261
262 DISPLAYLEVEL(4, "\n");
262 DISPLAYLEVEL(4, "\n");
263 DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
263 DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
264 DISPLAYLEVEL(4, "\n");
264 DISPLAYLEVEL(4, "\n");
265
265
266 for (searchLength = MINMATCHLENGTH ; ; searchLength++) {
266 for (mml = MINMATCHLENGTH ; ; mml++) {
267 BYTE currentChar = 0;
267 BYTE currentChar = 0;
268 U32 currentCount = 0;
268 U32 currentCount = 0;
269 U32 currentID = refinedStart;
269 U32 currentID = refinedStart;
@@ -271,13 +271,13 b' static dictItem ZDICT_analyzePos('
271 U32 selectedCount = 0;
271 U32 selectedCount = 0;
272 U32 selectedID = currentID;
272 U32 selectedID = currentID;
273 for (id =refinedStart; id < refinedEnd; id++) {
273 for (id =refinedStart; id < refinedEnd; id++) {
274 if (b[suffix[id] + searchLength] != currentChar) {
274 if (b[suffix[id] + mml] != currentChar) {
275 if (currentCount > selectedCount) {
275 if (currentCount > selectedCount) {
276 selectedCount = currentCount;
276 selectedCount = currentCount;
277 selectedID = currentID;
277 selectedID = currentID;
278 }
278 }
279 currentID = id;
279 currentID = id;
280 currentChar = b[ suffix[id] + searchLength];
280 currentChar = b[ suffix[id] + mml];
281 currentCount = 0;
281 currentCount = 0;
282 }
282 }
283 currentCount ++;
283 currentCount ++;
@@ -342,7 +342,7 b' static dictItem ZDICT_analyzePos('
342 savings[i] = savings[i-1] + (lengthList[i] * (i-3));
342 savings[i] = savings[i-1] + (lengthList[i] * (i-3));
343
343
344 DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
344 DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
345 (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
345 (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
346
346
347 solution.pos = (U32)pos;
347 solution.pos = (U32)pos;
348 solution.length = (U32)maxLength;
348 solution.length = (U32)maxLength;
@@ -497,7 +497,7 b' static U32 ZDICT_dictSize(const dictItem'
497 static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
497 static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
498 const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
498 const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
499 const size_t* fileSizes, unsigned nbFiles,
499 const size_t* fileSizes, unsigned nbFiles,
500 U32 minRatio, U32 notificationLevel)
500 unsigned minRatio, U32 notificationLevel)
501 {
501 {
502 int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
502 int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
503 int* const suffix = suffix0+1;
503 int* const suffix = suffix0+1;
@@ -523,11 +523,11 b' static size_t ZDICT_trainBuffer_legacy(d'
523 memset(doneMarks, 0, bufferSize+16);
523 memset(doneMarks, 0, bufferSize+16);
524
524
525 /* limit sample set size (divsufsort limitation)*/
525 /* limit sample set size (divsufsort limitation)*/
526 if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20));
526 if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20));
527 while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
527 while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
528
528
529 /* sort */
529 /* sort */
530 DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
530 DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20));
531 { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
531 { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
532 if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
532 if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
533 }
533 }
@@ -589,7 +589,7 b' typedef struct'
589 #define MAXREPOFFSET 1024
589 #define MAXREPOFFSET 1024
590
590
591 static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
591 static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
592 U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
592 unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
593 const void* src, size_t srcSize,
593 const void* src, size_t srcSize,
594 U32 notificationLevel)
594 U32 notificationLevel)
595 {
595 {
@@ -602,7 +602,7 b' static void ZDICT_countEStats(EStats_res'
602
602
603 }
603 }
604 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
604 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
605 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
605 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
606
606
607 if (cSize) { /* if == 0; block is not compressible */
607 if (cSize) { /* if == 0; block is not compressible */
608 const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
608 const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -671,7 +671,7 b' static void ZDICT_insertSortCount(offset'
671 * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
671 * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
672 * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
672 * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
673 */
673 */
674 static void ZDICT_flatLit(U32* countLit)
674 static void ZDICT_flatLit(unsigned* countLit)
675 {
675 {
676 int u;
676 int u;
677 for (u=1; u<256; u++) countLit[u] = 2;
677 for (u=1; u<256; u++) countLit[u] = 2;
@@ -687,14 +687,14 b' static size_t ZDICT_analyzeEntropy(void*'
687 const void* dictBuffer, size_t dictBufferSize,
687 const void* dictBuffer, size_t dictBufferSize,
688 unsigned notificationLevel)
688 unsigned notificationLevel)
689 {
689 {
690 U32 countLit[256];
690 unsigned countLit[256];
691 HUF_CREATE_STATIC_CTABLE(hufTable, 255);
691 HUF_CREATE_STATIC_CTABLE(hufTable, 255);
692 U32 offcodeCount[OFFCODE_MAX+1];
692 unsigned offcodeCount[OFFCODE_MAX+1];
693 short offcodeNCount[OFFCODE_MAX+1];
693 short offcodeNCount[OFFCODE_MAX+1];
694 U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
694 U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
695 U32 matchLengthCount[MaxML+1];
695 unsigned matchLengthCount[MaxML+1];
696 short matchLengthNCount[MaxML+1];
696 short matchLengthNCount[MaxML+1];
697 U32 litLengthCount[MaxLL+1];
697 unsigned litLengthCount[MaxLL+1];
698 short litLengthNCount[MaxLL+1];
698 short litLengthNCount[MaxLL+1];
699 U32 repOffset[MAXREPOFFSET];
699 U32 repOffset[MAXREPOFFSET];
700 offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
700 offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
@@ -983,33 +983,33 b' size_t ZDICT_trainFromBuffer_unsafe_lega'
983
983
984 /* display best matches */
984 /* display best matches */
985 if (params.zParams.notificationLevel>= 3) {
985 if (params.zParams.notificationLevel>= 3) {
986 U32 const nb = MIN(25, dictList[0].pos);
986 unsigned const nb = MIN(25, dictList[0].pos);
987 U32 const dictContentSize = ZDICT_dictSize(dictList);
987 unsigned const dictContentSize = ZDICT_dictSize(dictList);
988 U32 u;
988 unsigned u;
989 DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
989 DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
990 DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
990 DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
991 for (u=1; u<nb; u++) {
991 for (u=1; u<nb; u++) {
992 U32 const pos = dictList[u].pos;
992 unsigned const pos = dictList[u].pos;
993 U32 const length = dictList[u].length;
993 unsigned const length = dictList[u].length;
994 U32 const printedLength = MIN(40, length);
994 U32 const printedLength = MIN(40, length);
995 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
995 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
996 free(dictList);
996 free(dictList);
997 return ERROR(GENERIC); /* should never happen */
997 return ERROR(GENERIC); /* should never happen */
998 }
998 }
999 DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
999 DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
1000 u, length, pos, dictList[u].savings);
1000 u, length, pos, (unsigned)dictList[u].savings);
1001 ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
1001 ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
1002 DISPLAYLEVEL(3, "| \n");
1002 DISPLAYLEVEL(3, "| \n");
1003 } }
1003 } }
1004
1004
1005
1005
1006 /* create dictionary */
1006 /* create dictionary */
1007 { U32 dictContentSize = ZDICT_dictSize(dictList);
1007 { unsigned dictContentSize = ZDICT_dictSize(dictList);
1008 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
1008 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
1009 if (dictContentSize < targetDictSize/4) {
1009 if (dictContentSize < targetDictSize/4) {
1010 DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
1010 DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize);
1011 if (samplesBuffSize < 10 * targetDictSize)
1011 if (samplesBuffSize < 10 * targetDictSize)
1012 DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
1012 DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20));
1013 if (minRep > MINRATIO) {
1013 if (minRep > MINRATIO) {
1014 DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
1014 DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
1015 DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
1015 DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
@@ -1017,9 +1017,9 b' size_t ZDICT_trainFromBuffer_unsafe_lega'
1017 }
1017 }
1018
1018
1019 if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
1019 if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
1020 U32 proposedSelectivity = selectivity-1;
1020 unsigned proposedSelectivity = selectivity-1;
1021 while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
1021 while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
1022 DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
1022 DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
1023 DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
1023 DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
1024 DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
1024 DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
1025 }
1025 }
This diff has been collapsed as it changes many lines, (1566 lines changed) Show them Hide them
@@ -71,16 +71,16 b' extern "C" {'
71 /*------ Version ------*/
71 /*------ Version ------*/
72 #define ZSTD_VERSION_MAJOR 1
72 #define ZSTD_VERSION_MAJOR 1
73 #define ZSTD_VERSION_MINOR 3
73 #define ZSTD_VERSION_MINOR 3
74 #define ZSTD_VERSION_RELEASE 6
74 #define ZSTD_VERSION_RELEASE 8
75
75
76 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
76 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
77 ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */
77 ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
78
78
79 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
79 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
80 #define ZSTD_QUOTE(str) #str
80 #define ZSTD_QUOTE(str) #str
81 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
81 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
82 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
82 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
83 ZSTDLIB_API const char* ZSTD_versionString(void); /* v1.3.0+ */
83 ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
84
84
85 /***************************************
85 /***************************************
86 * Default constant
86 * Default constant
@@ -110,7 +110,7 b' ZSTDLIB_API size_t ZSTD_compress( void* '
110 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
110 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
111 const void* src, size_t compressedSize);
111 const void* src, size_t compressedSize);
112
112
113 /*! ZSTD_getFrameContentSize() : added in v1.3.0
113 /*! ZSTD_getFrameContentSize() : requires v1.3.0+
114 * `src` should point to the start of a ZSTD encoded frame.
114 * `src` should point to the start of a ZSTD encoded frame.
115 * `srcSize` must be at least as large as the frame header.
115 * `srcSize` must be at least as large as the frame header.
116 * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
116 * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
@@ -167,8 +167,10 b' ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(v'
167 ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
167 ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
168
168
169 /*! ZSTD_compressCCtx() :
169 /*! ZSTD_compressCCtx() :
170 * Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */
170 * Same as ZSTD_compress(), using an explicit ZSTD_CCtx
171 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx,
171 * The function will compress at requested compression level,
172 * ignoring any other parameter */
173 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
172 void* dst, size_t dstCapacity,
174 void* dst, size_t dstCapacity,
173 const void* src, size_t srcSize,
175 const void* src, size_t srcSize,
174 int compressionLevel);
176 int compressionLevel);
@@ -184,8 +186,11 b' ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(v'
184 ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
186 ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
185
187
186 /*! ZSTD_decompressDCtx() :
188 /*! ZSTD_decompressDCtx() :
187 * Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
189 * Same as ZSTD_decompress(),
188 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx,
190 * requires an allocated ZSTD_DCtx.
191 * Compatible with sticky parameters.
192 */
193 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
189 void* dst, size_t dstCapacity,
194 void* dst, size_t dstCapacity,
190 const void* src, size_t srcSize);
195 const void* src, size_t srcSize);
191
196
@@ -194,9 +199,12 b' ZSTDLIB_API size_t ZSTD_decompressDCtx(Z'
194 * Simple dictionary API
199 * Simple dictionary API
195 ***************************/
200 ***************************/
196 /*! ZSTD_compress_usingDict() :
201 /*! ZSTD_compress_usingDict() :
197 * Compression using a predefined Dictionary (see dictBuilder/zdict.h).
202 * Compression at an explicit compression level using a Dictionary.
203 * A dictionary can be any arbitrary data segment (also called a prefix),
204 * or a buffer with specified information (see dictBuilder/zdict.h).
198 * Note : This function loads the dictionary, resulting in significant startup delay.
205 * Note : This function loads the dictionary, resulting in significant startup delay.
199 * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
206 * It's intended for a dictionary used only once.
207 * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
200 ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
208 ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
201 void* dst, size_t dstCapacity,
209 void* dst, size_t dstCapacity,
202 const void* src, size_t srcSize,
210 const void* src, size_t srcSize,
@@ -204,9 +212,10 b' ZSTDLIB_API size_t ZSTD_compress_usingDi'
204 int compressionLevel);
212 int compressionLevel);
205
213
206 /*! ZSTD_decompress_usingDict() :
214 /*! ZSTD_decompress_usingDict() :
207 * Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
215 * Decompression using a known Dictionary.
208 * Dictionary must be identical to the one used during compression.
216 * Dictionary must be identical to the one used during compression.
209 * Note : This function loads the dictionary, resulting in significant startup delay.
217 * Note : This function loads the dictionary, resulting in significant startup delay.
218 * It's intended for a dictionary used only once.
210 * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
219 * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
211 ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
220 ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
212 void* dst, size_t dstCapacity,
221 void* dst, size_t dstCapacity,
@@ -214,17 +223,18 b' ZSTDLIB_API size_t ZSTD_decompress_using'
214 const void* dict,size_t dictSize);
223 const void* dict,size_t dictSize);
215
224
216
225
217 /**********************************
226 /***********************************
218 * Bulk processing dictionary API
227 * Bulk processing dictionary API
219 *********************************/
228 **********************************/
220 typedef struct ZSTD_CDict_s ZSTD_CDict;
229 typedef struct ZSTD_CDict_s ZSTD_CDict;
221
230
222 /*! ZSTD_createCDict() :
231 /*! ZSTD_createCDict() :
223 * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
232 * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
224 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
233 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
225 * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
234 * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
226 * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict
235 * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
227 * Note : A ZSTD_CDict can be created with an empty dictionary, but it is inefficient for small data. */
236 * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
237 * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
228 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
238 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
229 int compressionLevel);
239 int compressionLevel);
230
240
@@ -234,11 +244,9 b' ZSTDLIB_API size_t ZSTD_freeCDict(Z'
234
244
235 /*! ZSTD_compress_usingCDict() :
245 /*! ZSTD_compress_usingCDict() :
236 * Compression using a digested Dictionary.
246 * Compression using a digested Dictionary.
237 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
247 * Recommended when same dictionary is used multiple times.
238 * Note that compression level is decided during dictionary creation.
248 * Note : compression level is _decided at dictionary creation time_,
239 * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no)
249 * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
240 * Note : ZSTD_compress_usingCDict() can be used with a ZSTD_CDict created from an empty dictionary.
241 * But it is inefficient for small data, and it is recommended to use ZSTD_compressCCtx(). */
242 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
250 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
243 void* dst, size_t dstCapacity,
251 void* dst, size_t dstCapacity,
244 const void* src, size_t srcSize,
252 const void* src, size_t srcSize,
@@ -249,7 +257,7 b' typedef struct ZSTD_DDict_s ZSTD_DDict;'
249
257
250 /*! ZSTD_createDDict() :
258 /*! ZSTD_createDDict() :
251 * Create a digested dictionary, ready to start decompression operation without startup delay.
259 * Create a digested dictionary, ready to start decompression operation without startup delay.
252 * dictBuffer can be released after DDict creation, as its content is copied inside DDict */
260 * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
253 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
261 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
254
262
255 /*! ZSTD_freeDDict() :
263 /*! ZSTD_freeDDict() :
@@ -258,7 +266,7 b' ZSTDLIB_API size_t ZSTD_freeDDict(Z'
258
266
259 /*! ZSTD_decompress_usingDDict() :
267 /*! ZSTD_decompress_usingDDict() :
260 * Decompression using a digested Dictionary.
268 * Decompression using a digested Dictionary.
261 * Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */
269 * Recommended when same dictionary is used multiple times. */
262 ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
270 ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
263 void* dst, size_t dstCapacity,
271 void* dst, size_t dstCapacity,
264 const void* src, size_t srcSize,
272 const void* src, size_t srcSize,
@@ -289,13 +297,17 b' typedef struct ZSTD_outBuffer_s {'
289 * A ZSTD_CStream object is required to track streaming operation.
297 * A ZSTD_CStream object is required to track streaming operation.
290 * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
298 * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
291 * ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
299 * ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
292 * It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively,
300 * It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
293 * since it will play nicer with system's memory, by re-using already allocated memory.
301 *
294 * Use one separate ZSTD_CStream per thread for parallel execution.
302 * For parallel execution, use one separate ZSTD_CStream per thread.
303 *
304 * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
295 *
305 *
296 * Start a new compression by initializing ZSTD_CStream context.
306 * Parameters are sticky : when starting a new compression on the same context,
297 * Use ZSTD_initCStream() to start a new compression operation.
307 * it will re-use the same sticky parameters as previous compression session.
298 * Use variants ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for streaming with dictionary (experimental section)
308 * When in doubt, it's recommended to fully initialize the context before usage.
309 * Use ZSTD_initCStream() to set the parameter to a selected compression level.
310 * Use advanced API (ZSTD_CCtx_setParameter(), etc.) to set more specific parameters.
299 *
311 *
300 * Use ZSTD_compressStream() as many times as necessary to consume input stream.
312 * Use ZSTD_compressStream() as many times as necessary to consume input stream.
301 * The function will automatically update both `pos` fields within `input` and `output`.
313 * The function will automatically update both `pos` fields within `input` and `output`.
@@ -304,12 +316,11 b' typedef struct ZSTD_outBuffer_s {'
304 * in which case `input.pos < input.size`.
316 * in which case `input.pos < input.size`.
305 * The caller must check if input has been entirely consumed.
317 * The caller must check if input has been entirely consumed.
306 * If not, the caller must make some room to receive more compressed data,
318 * If not, the caller must make some room to receive more compressed data,
307 * typically by emptying output buffer, or allocating a new output buffer,
308 * and then present again remaining input data.
319 * and then present again remaining input data.
309 * @return : a size hint, preferred nb of bytes to use as input for next function call
320 * @return : a size hint, preferred nb of bytes to use as input for next function call
310 * or an error code, which can be tested using ZSTD_isError().
321 * or an error code, which can be tested using ZSTD_isError().
311 * Note 1 : it's just a hint, to help latency a little, any other value will work fine.
322 * Note 1 : it's just a hint, to help latency a little, any value will work fine.
312 * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
323 * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
313 *
324 *
314 * At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
325 * At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
315 * using ZSTD_flushStream(). `output->pos` will be updated.
326 * using ZSTD_flushStream(). `output->pos` will be updated.
@@ -353,23 +364,28 b' ZSTDLIB_API size_t ZSTD_CStreamOutSize(v'
353 * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
364 * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
354 * ZSTD_DStream objects can be re-used multiple times.
365 * ZSTD_DStream objects can be re-used multiple times.
355 *
366 *
356 * Use ZSTD_initDStream() to start a new decompression operation,
367 * Use ZSTD_initDStream() to start a new decompression operation.
357 * or ZSTD_initDStream_usingDict() if decompression requires a dictionary.
368 * @return : recommended first input size
358 * @return : recommended first input size
369 * Alternatively, use advanced API to set specific properties.
359 *
370 *
360 * Use ZSTD_decompressStream() repetitively to consume your input.
371 * Use ZSTD_decompressStream() repetitively to consume your input.
361 * The function will update both `pos` fields.
372 * The function will update both `pos` fields.
362 * If `input.pos < input.size`, some input has not been consumed.
373 * If `input.pos < input.size`, some input has not been consumed.
363 * It's up to the caller to present again remaining data.
374 * It's up to the caller to present again remaining data.
375 * The function tries to flush all data decoded immediately, respecting output buffer size.
364 * If `output.pos < output.size`, decoder has flushed everything it could.
376 * If `output.pos < output.size`, decoder has flushed everything it could.
365 * @return : 0 when a frame is completely decoded and fully flushed,
377 * But if `output.pos == output.size`, there might be some data left within internal buffers.,
366 * an error code, which can be tested using ZSTD_isError(),
378 * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
367 * any other value > 0, which means there is still some decoding to do to complete current frame.
379 * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
368 * The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame.
380 * @return : 0 when a frame is completely decoded and fully flushed,
381 * or an error code, which can be tested using ZSTD_isError(),
382 * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
383 * the return value is a suggested next input size (just a hint for better latency)
384 * that will never request more than the remaining frame size.
369 * *******************************************************************************/
385 * *******************************************************************************/
370
386
371 typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
387 typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
372 /* For compatibility with versions <= v1.2.0, continue to consider them separated. */
388 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
373 /*===== ZSTD_DStream management functions =====*/
389 /*===== ZSTD_DStream management functions =====*/
374 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
390 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
375 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
391 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
@@ -386,77 +402,602 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
386
402
387
403
388
404
389 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
390 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
391
392 /****************************************************************************************
405 /****************************************************************************************
393 * ADVANCED AND EXPERIMENTAL FUNCTIONS
406 * ADVANCED AND EXPERIMENTAL FUNCTIONS
394 ****************************************************************************************
407 ****************************************************************************************
395 * The definitions in this section are considered experimental.
408 * The definitions in the following section are considered experimental.
409 * They are provided for advanced scenarios.
396 * They should never be used with a dynamic library, as prototypes may change in the future.
410 * They should never be used with a dynamic library, as prototypes may change in the future.
397 * They are provided for advanced scenarios.
398 * Use them only in association with static linking.
411 * Use them only in association with static linking.
399 * ***************************************************************************************/
412 * ***************************************************************************************/
400
413
414 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
415 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
416
417
418 /****************************************************************************************
419 * Candidate API for promotion to stable status
420 ****************************************************************************************
421 * The following symbols and constants form the "staging area" :
422 * they are considered to join "stable API" by v1.4.0.
423 * The proposal is written so that it can be made stable "as is",
424 * though it's still possible to suggest improvements.
425 * Staging is in fact last chance for changes,
426 * the API is locked once reaching "stable" status.
427 * ***************************************************************************************/
428
429
430 /* === Constants === */
431
432 /* all magic numbers are supposed read/written to/from files/memory using little-endian convention */
433 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
434 #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */
435 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
436 #define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0
437
438 #define ZSTD_BLOCKSIZELOG_MAX 17
439 #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX)
440
441
442 /* === query limits === */
443
401 ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */
444 ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */
402
445
403 /* --- Constants ---*/
446
404 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8+ */
447 /* === frame size === */
405 #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */
448
406 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
449 /*! ZSTD_findFrameCompressedSize() :
450 * `src` should point to the start of a ZSTD frame or skippable frame.
451 * `srcSize` must be >= first frame size
452 * @return : the compressed size of the first frame starting at `src`,
453 * suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
454 * or an error code if input is invalid */
455 ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
456
457
458 /* === Memory management === */
459
460 /*! ZSTD_sizeof_*() :
461 * These functions give the _current_ memory usage of selected object.
462 * Note that object memory usage can evolve (increase or decrease) over time. */
463 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
464 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
465 ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
466 ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
467 ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
468 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
469
470
471 /***************************************
472 * Advanced compression API
473 ***************************************/
474
475 /* API design :
476 * Parameters are pushed one by one into an existing context,
477 * using ZSTD_CCtx_set*() functions.
478 * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
479 * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
480 * They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()
481 *
482 * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
483 *
484 * This API supercedes all other "advanced" API entry points in the experimental section.
485 * In the future, we expect to remove from experimental API entry points which are redundant with this API.
486 */
487
488
489 /* Compression strategies, listed from fastest to strongest */
490 typedef enum { ZSTD_fast=1,
491 ZSTD_dfast=2,
492 ZSTD_greedy=3,
493 ZSTD_lazy=4,
494 ZSTD_lazy2=5,
495 ZSTD_btlazy2=6,
496 ZSTD_btopt=7,
497 ZSTD_btultra=8,
498 ZSTD_btultra2=9
499 /* note : new strategies _might_ be added in the future.
500 Only the order (from fast to strong) is guaranteed */
501 } ZSTD_strategy;
502
503
504 typedef enum {
407
505
408 #define ZSTD_BLOCKSIZELOG_MAX 17
506 /* compression parameters */
409 #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) /* define, for static allocation */
507 ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
508 * Default level is ZSTD_CLEVEL_DEFAULT==3.
509 * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
510 * Note 1 : it's possible to pass a negative compression level.
511 * Note 2 : setting a level sets all default values of other compression parameters */
512 ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2.
513 * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
514 * Special: value 0 means "use default windowLog".
515 * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
516 * requires explicitly allowing such window size at decompression stage if using streaming. */
517 ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2.
518 * Resulting memory usage is (1 << (hashLog+2)).
519 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
520 * Larger tables improve compression ratio of strategies <= dFast,
521 * and improve speed of strategies > dFast.
522 * Special: value 0 means "use default hashLog". */
523 ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2.
524 * Resulting memory usage is (1 << (chainLog+2)).
525 * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
526 * Larger tables result in better and slower compression.
527 * This parameter is useless when using "fast" strategy.
528 * It's still useful when using "dfast" strategy,
529 * in which case it defines a secondary probe table.
530 * Special: value 0 means "use default chainLog". */
531 ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2.
532 * More attempts result in better and slower compression.
533 * This parameter is useless when using "fast" and "dFast" strategies.
534 * Special: value 0 means "use default searchLog". */
535 ZSTD_c_minMatch=105, /* Minimum size of searched matches.
536 * Note that Zstandard can still find matches of smaller size,
537 * it just tweaks its search algorithm to look for this size and larger.
538 * Larger values increase compression and decompression speed, but decrease ratio.
539 * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
540 * Note that currently, for all strategies < btopt, effective minimum is 4.
541 * , for all strategies > fast, effective maximum is 6.
542 * Special: value 0 means "use default minMatchLength". */
543 ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
544 * For strategies btopt, btultra & btultra2:
545 * Length of Match considered "good enough" to stop search.
546 * Larger values make compression stronger, and slower.
547 * For strategy fast:
548 * Distance between match sampling.
549 * Larger values make compression faster, and weaker.
550 * Special: value 0 means "use default targetLength". */
551 ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition.
552 * The higher the value of selected strategy, the more complex it is,
553 * resulting in stronger and slower compression.
554 * Special: value 0 means "use default strategy". */
555
556 /* LDM mode parameters */
557 ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
558 * This parameter is designed to improve compression ratio
559 * for large inputs, by finding large matches at long distance.
560 * It increases memory usage and window size.
561 * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
562 * except when expressly set to a different value. */
563 ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2.
564 * Larger values increase memory usage and compression ratio,
565 * but decrease compression speed.
566 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
567 * default: windowlog - 7.
568 * Special: value 0 means "automatically determine hashlog". */
569 ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher.
570 * Larger/too small values usually decrease compression ratio.
571 * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
572 * Special: value 0 means "use default value" (default: 64). */
573 ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
574 * Larger values improve collision resolution but decrease compression speed.
575 * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
576 * Special: value 0 means "use default value" (default: 3). */
577 ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
578 * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
579 * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
580 * Larger values improve compression speed.
581 * Deviating far from default value will likely result in a compression ratio decrease.
582 * Special: value 0 means "automatically determine hashRateLog". */
583
584 /* frame parameters */
585 ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
586 * Content size must be known at the beginning of compression.
587 * This is automatically the case when using ZSTD_compress2(),
588 * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
589 ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
590 ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */
410
591
411 #define ZSTD_WINDOWLOG_MAX_32 30
592 /* multi-threading parameters */
412 #define ZSTD_WINDOWLOG_MAX_64 31
593 /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
413 #define ZSTD_WINDOWLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
594 * They return an error otherwise. */
414 #define ZSTD_WINDOWLOG_MIN 10
595 ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel.
415 #define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
596 * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() :
416 #define ZSTD_HASHLOG_MIN 6
597 * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
417 #define ZSTD_CHAINLOG_MAX_32 29
598 * while compression work is performed in parallel, within worker threads.
418 #define ZSTD_CHAINLOG_MAX_64 30
599 * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
419 #define ZSTD_CHAINLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
600 * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
420 #define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
601 * More workers improve speed, but also increase memory usage.
421 #define ZSTD_HASHLOG3_MAX 17
602 * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
422 #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1)
603 ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
423 #define ZSTD_SEARCHLOG_MIN 1
604 * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
424 #define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
605 * 0 means default, which is dynamically determined based on compression parameters.
425 #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */
606 * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
426 #define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
607 * The minimum size is automatically and transparently enforced */
427 #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */
608 ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size.
428 #define ZSTD_LDM_MINMATCH_MAX 4096
609 * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
429 #define ZSTD_LDM_MINMATCH_MIN 4
610 * It helps preserve compression ratio, while each job is compressed in parallel.
430 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8
611 * This value is enforced only when nbWorkers >= 1.
612 * Larger values increase compression ratio, but decrease speed.
613 * Possible values range from 0 to 9 :
614 * - 0 means "default" : value will be determined by the library, depending on strategy
615 * - 1 means "no overlap"
616 * - 9 means "full overlap", using a full window size.
617 * Each intermediate rank increases/decreases load size by a factor 2 :
618 * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default
619 * default value varies between 6 and 9, depending on strategy */
620
621 /* note : additional experimental parameters are also available
622 * within the experimental section of the API.
623 * At the time of this writing, they include :
624 * ZSTD_c_rsyncable
625 * ZSTD_c_format
626 * ZSTD_c_forceMaxWindow
627 * ZSTD_c_forceAttachDict
628 * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
629 * note : never ever use experimentalParam? names directly;
630 * also, the enums values themselves are unstable and can still change.
631 */
632 ZSTD_c_experimentalParam1=500,
633 ZSTD_c_experimentalParam2=10,
634 ZSTD_c_experimentalParam3=1000,
635 ZSTD_c_experimentalParam4=1001
636 } ZSTD_cParameter;
637
638
639 typedef struct {
640 size_t error;
641 int lowerBound;
642 int upperBound;
643 } ZSTD_bounds;
644
645 /*! ZSTD_cParam_getBounds() :
646 * All parameters must belong to an interval with lower and upper bounds,
647 * otherwise they will either trigger an error or be automatically clamped.
648 * @return : a structure, ZSTD_bounds, which contains
649 * - an error status field, which must be tested using ZSTD_isError()
650 * - lower and upper bounds, both inclusive
651 */
652 ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
653
654 /*! ZSTD_CCtx_setParameter() :
655 * Set one compression parameter, selected by enum ZSTD_cParameter.
656 * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
657 * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
658 * Setting a parameter is generally only possible during frame initialization (before starting compression).
659 * Exception : when using multi-threading mode (nbWorkers >= 1),
660 * the following parameters can be updated _during_ compression (within same frame):
661 * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
662 * new parameters will be active for next job only (after a flush()).
663 * @return : an error code (which can be tested using ZSTD_isError()).
664 */
665 ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
431
666
432 #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size to know frame header size */
667 /*! ZSTD_CCtx_setPledgedSrcSize() :
433 #define ZSTD_FRAMEHEADERSIZE_MIN 6
668 * Total input data size to be compressed as a single frame.
434 #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
669 * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
435 static const size_t ZSTD_frameHeaderSize_prefix = ZSTD_FRAMEHEADERSIZE_PREFIX;
670 * This value will also be controlled at end of frame, and trigger an error if not respected.
436 static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
671 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
437 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
672 * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
438 static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
673 * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
674 * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
675 * Note 2 : pledgedSrcSize is only valid once, for the next frame.
676 * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
677 * Note 3 : Whenever all input data is provided and consumed in a single round,
678 * for example with ZSTD_compress2(),
679 * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
680 * this value is automatically overriden by srcSize instead.
681 */
682 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
683
684 /*! ZSTD_CCtx_loadDictionary() :
685 * Create an internal CDict from `dict` buffer.
686 * Decompression will have to use same dictionary.
687 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
688 * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
689 * meaning "return to no-dictionary mode".
690 * Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
691 * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
692 * Note 2 : Loading a dictionary involves building tables.
693 * It's also a CPU consuming operation, with non-negligible impact on latency.
694 * Tables are dependent on compression parameters, and for this reason,
695 * compression parameters can no longer be changed after loading a dictionary.
696 * Note 3 :`dict` content will be copied internally.
697 * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
698 * In such a case, dictionary buffer must outlive its users.
699 * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
700 * to precisely select how dictionary content must be interpreted. */
701 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
702
703 /*! ZSTD_CCtx_refCDict() :
704 * Reference a prepared dictionary, to be used for all next compressed frames.
705 * Note that compression parameters are enforced from within CDict,
706 * and supercede any compression parameter previously set within CCtx.
707 * The dictionary will remain valid for future compressed frames using same CCtx.
708 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
709 * Special : Referencing a NULL CDict means "return to no-dictionary mode".
710 * Note 1 : Currently, only one dictionary can be managed.
711 * Referencing a new dictionary effectively "discards" any previous one.
712 * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
713 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
714
715 /*! ZSTD_CCtx_refPrefix() :
716 * Reference a prefix (single-usage dictionary) for next compressed frame.
717 * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
718 * Decompression will need same prefix to properly regenerate data.
719 * Compressing with a prefix is similar in outcome as performing a diff and compressing it,
720 * but performs much faster, especially during decompression (compression speed is tunable with compression level).
721 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
722 * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
723 * Note 1 : Prefix buffer is referenced. It **must** outlive compression.
724 * Its content must remain unmodified during compression.
725 * Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
726 * ensure that the window size is large enough to contain the entire source.
727 * See ZSTD_c_windowLog.
728 * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
729 * It's a CPU consuming operation, with non-negligible impact on latency.
730 * If there is a need to use the same prefix multiple times, consider loadDictionary instead.
731 * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
732 * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
733 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
734 const void* prefix, size_t prefixSize);
735
736
737 typedef enum {
738 ZSTD_reset_session_only = 1,
739 ZSTD_reset_parameters = 2,
740 ZSTD_reset_session_and_parameters = 3
741 } ZSTD_ResetDirective;
742
743 /*! ZSTD_CCtx_reset() :
744 * There are 2 different things that can be reset, independently or jointly :
745 * - The session : will stop compressing current frame, and make CCtx ready to start a new one.
746 * Useful after an error, or to interrupt any ongoing compression.
747 * Any internal data not yet flushed is cancelled.
748 * Compression parameters and dictionary remain unchanged.
749 * They will be used to compress next frame.
750 * Resetting session never fails.
751 * - The parameters : changes all parameters back to "default".
752 * This removes any reference to any dictionary too.
753 * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
754 * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
755 * - Both : similar to resetting the session, followed by resetting parameters.
756 */
757 ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
439
758
440
759
441
760
761 /*! ZSTD_compress2() :
762 * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
763 * ZSTD_compress2() always starts a new frame.
764 * Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
765 * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
766 * - The function is always blocking, returns when compression is completed.
767 * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
768 * @return : compressed size written into `dst` (<= `dstCapacity),
769 * or an error code if it fails (which can be tested using ZSTD_isError()).
770 */
771 ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
772 void* dst, size_t dstCapacity,
773 const void* src, size_t srcSize);
774
775 typedef enum {
776 ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
777 ZSTD_e_flush=1, /* flush any data provided so far,
778 * it creates (at least) one new block, that can be decoded immediately on reception;
779 * frame will continue: any future data can still reference previously compressed data, improving compression. */
780 ZSTD_e_end=2 /* flush any remaining data _and_ close current frame.
781 * note that frame is only closed after compressed data is fully flushed (return value == 0).
782 * After that point, any additional data starts a new frame.
783 * note : each frame is independent (does not reference any content from previous frame). */
784 } ZSTD_EndDirective;
785
786 /*! ZSTD_compressStream2() :
787 * Behaves about the same as ZSTD_compressStream, with additional control on end directive.
788 * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
789 * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
790 * - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize
791 * - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
792 * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
793 * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
794 * and then immediately returns, just indicating that there is some data remaining to be flushed.
795 * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
796 * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
797 * - @return provides a minimum amount of data remaining to be flushed from internal buffers
798 * or an error code, which can be tested using ZSTD_isError().
799 * if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
800 * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
801 * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
802 * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
803 * only ZSTD_e_end or ZSTD_e_flush operations are allowed.
804 * Before starting a new compression job, or changing compression parameters,
805 * it is required to fully flush internal buffers.
806 */
807 ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
808 ZSTD_outBuffer* output,
809 ZSTD_inBuffer* input,
810 ZSTD_EndDirective endOp);
811
812
813
814 /* ============================== */
815 /* Advanced decompression API */
816 /* ============================== */
817
818 /* The advanced API pushes parameters one by one into an existing DCtx context.
819 * Parameters are sticky, and remain valid for all following frames
820 * using the same DCtx context.
821 * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
822 * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
823 * Therefore, no new decompression function is necessary.
824 */
825
826
827 typedef enum {
828
829 ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
830 * the streaming API will refuse to allocate memory buffer
831 * in order to protect the host from unreasonable memory requirements.
832 * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
833 * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) */
834
835 /* note : additional experimental parameters are also available
836 * within the experimental section of the API.
837 * At the time of this writing, they include :
838 * ZSTD_c_format
839 * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
840 * note : never ever use experimentalParam? names directly
841 */
842 ZSTD_d_experimentalParam1=1000
843
844 } ZSTD_dParameter;
845
846
847 /*! ZSTD_dParam_getBounds() :
848 * All parameters must belong to an interval with lower and upper bounds,
849 * otherwise they will either trigger an error or be automatically clamped.
850 * @return : a structure, ZSTD_bounds, which contains
851 * - an error status field, which must be tested using ZSTD_isError()
852 * - both lower and upper bounds, inclusive
853 */
854 ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
855
856 /*! ZSTD_DCtx_setParameter() :
857 * Set one compression parameter, selected by enum ZSTD_dParameter.
858 * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
859 * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
860 * Setting a parameter is only possible during frame initialization (before starting decompression).
861 * @return : 0, or an error code (which can be tested using ZSTD_isError()).
862 */
863 ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
864
865
866 /*! ZSTD_DCtx_loadDictionary() :
867 * Create an internal DDict from dict buffer,
868 * to be used to decompress next frames.
869 * The dictionary remains valid for all future frames, until explicitly invalidated.
870 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
871 * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
872 * meaning "return to no-dictionary mode".
873 * Note 1 : Loading a dictionary involves building tables,
874 * which has a non-negligible impact on CPU usage and latency.
875 * It's recommended to "load once, use many times", to amortize the cost
876 * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
877 * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
878 * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
879 * how dictionary content is loaded and interpreted.
880 */
881 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
882
883 /*! ZSTD_DCtx_refDDict() :
884 * Reference a prepared dictionary, to be used to decompress next frames.
885 * The dictionary remains active for decompression of future frames using same DCtx.
886 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
887 * Note 1 : Currently, only one dictionary can be managed.
888 * Referencing a new dictionary effectively "discards" any previous one.
889 * Special: referencing a NULL DDict means "return to no-dictionary mode".
890 * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
891 */
892 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
893
894 /*! ZSTD_DCtx_refPrefix() :
895 * Reference a prefix (single-usage dictionary) to decompress next frame.
896 * This is the reverse operation of ZSTD_CCtx_refPrefix(),
897 * and must use the same prefix as the one used during compression.
898 * Prefix is **only used once**. Reference is discarded at end of frame.
899 * End of frame is reached when ZSTD_decompressStream() returns 0.
900 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
901 * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
902 * Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
903 * Prefix buffer must remain unmodified up to the end of frame,
904 * reached when ZSTD_decompressStream() returns 0.
905 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
906 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
907 * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
908 * A full dictionary is more costly, as it requires building tables.
909 */
910 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
911 const void* prefix, size_t prefixSize);
912
913 /*! ZSTD_DCtx_reset() :
914 * Return a DCtx to clean state.
915 * Session and parameters can be reset jointly or separately.
916 * Parameters can only be reset when no active frame is being decompressed.
917 * @return : 0, or an error code, which can be tested with ZSTD_isError()
918 */
919 ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
920
921
922
923 /****************************************************************************************
924 * experimental API (static linking only)
925 ****************************************************************************************
926 * The following symbols and constants
927 * are not planned to join "stable API" status in the near future.
928 * They can still change in future versions.
929 * Some of them are planned to remain in the static_only section indefinitely.
930 * Some of them might be removed in the future (especially when redundant with existing stable functions)
931 * ***************************************************************************************/
932
933 #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */
934 #define ZSTD_FRAMEHEADERSIZE_MIN 6
935 #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */
936 #define ZSTD_SKIPPABLEHEADERSIZE 8
937
938 /* compression parameter bounds */
939 #define ZSTD_WINDOWLOG_MAX_32 30
940 #define ZSTD_WINDOWLOG_MAX_64 31
941 #define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
942 #define ZSTD_WINDOWLOG_MIN 10
943 #define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
944 #define ZSTD_HASHLOG_MIN 6
945 #define ZSTD_CHAINLOG_MAX_32 29
946 #define ZSTD_CHAINLOG_MAX_64 30
947 #define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
948 #define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
949 #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1)
950 #define ZSTD_SEARCHLOG_MIN 1
951 #define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
952 #define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */
953 #define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
954 #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */
955 #define ZSTD_STRATEGY_MIN ZSTD_fast
956 #define ZSTD_STRATEGY_MAX ZSTD_btultra2
957
958
959 #define ZSTD_OVERLAPLOG_MIN 0
960 #define ZSTD_OVERLAPLOG_MAX 9
961
962 #define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame
963 * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
964 * to preserve host's memory from unreasonable requirements.
965 * This limit can be overriden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
966 * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
967
968
969 /* LDM parameter bounds */
970 #define ZSTD_LDM_HASHLOG_MIN ZSTD_HASHLOG_MIN
971 #define ZSTD_LDM_HASHLOG_MAX ZSTD_HASHLOG_MAX
972 #define ZSTD_LDM_MINMATCH_MIN 4
973 #define ZSTD_LDM_MINMATCH_MAX 4096
974 #define ZSTD_LDM_BUCKETSIZELOG_MIN 1
975 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8
976 #define ZSTD_LDM_HASHRATELOG_MIN 0
977 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
978
979 /* internal */
980 #define ZSTD_HASHLOG3_MAX 17
981
982
442 /* --- Advanced types --- */
983 /* --- Advanced types --- */
443 typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
984
444 ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; /* from faster to stronger */
985 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
445
986
446 typedef struct {
987 typedef struct {
447 unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
988 unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
448 unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
989 unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
449 unsigned hashLog; /**< dispatch table : larger == faster, more memory */
990 unsigned hashLog; /**< dispatch table : larger == faster, more memory */
450 unsigned searchLog; /**< nb of searches : larger == more compression, slower */
991 unsigned searchLog; /**< nb of searches : larger == more compression, slower */
451 unsigned searchLength; /**< match length searched : larger == faster decompression, sometimes less compression */
992 unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */
452 unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
993 unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
453 ZSTD_strategy strategy;
994 ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */
454 } ZSTD_compressionParameters;
995 } ZSTD_compressionParameters;
455
996
456 typedef struct {
997 typedef struct {
457 unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
998 int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
458 unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
999 int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
459 unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
1000 int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
460 } ZSTD_frameParameters;
1001 } ZSTD_frameParameters;
461
1002
462 typedef struct {
1003 typedef struct {
@@ -464,33 +1005,70 b' typedef struct {'
464 ZSTD_frameParameters fParams;
1005 ZSTD_frameParameters fParams;
465 } ZSTD_parameters;
1006 } ZSTD_parameters;
466
1007
467 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
468
469 typedef enum {
1008 typedef enum {
470 ZSTD_dct_auto=0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
1009 ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
471 ZSTD_dct_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
1010 ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
472 ZSTD_dct_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */
1011 ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
473 } ZSTD_dictContentType_e;
1012 } ZSTD_dictContentType_e;
474
1013
475 typedef enum {
1014 typedef enum {
476 ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
1015 ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
477 ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
1016 ZSTD_dlm_byRef = 1, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
478 } ZSTD_dictLoadMethod_e;
1017 } ZSTD_dictLoadMethod_e;
479
1018
1019 typedef enum {
1020 /* Opened question : should we have a format ZSTD_f_auto ?
1021 * Today, it would mean exactly the same as ZSTD_f_zstd1.
1022 * But, in the future, should several formats become supported,
1023 * on the compression side, it would mean "default format".
1024 * On the decompression side, it would mean "automatic format detection",
1025 * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
1026 * Since meaning is a little different, another option could be to define different enums for compression and decompression.
1027 * This question could be kept for later, when there are actually multiple formats to support,
1028 * but there is also the question of pinning enum values, and pinning value `0` is especially important */
1029 ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */
1030 ZSTD_f_zstd1_magicless = 1, /* Variant of zstd frame format, without initial 4-bytes magic number.
1031 * Useful to save 4 bytes per generated frame.
1032 * Decoder cannot recognise automatically this format, requiring this instruction. */
1033 } ZSTD_format_e;
1034
1035 typedef enum {
1036 /* Note: this enum and the behavior it controls are effectively internal
1037 * implementation details of the compressor. They are expected to continue
1038 * to evolve and should be considered only in the context of extremely
1039 * advanced performance tuning.
1040 *
1041 * Zstd currently supports the use of a CDict in two ways:
1042 *
1043 * - The contents of the CDict can be copied into the working context. This
1044 * means that the compression can search both the dictionary and input
1045 * while operating on a single set of internal tables. This makes
1046 * the compression faster per-byte of input. However, the initial copy of
1047 * the CDict's tables incurs a fixed cost at the beginning of the
1048 * compression. For small compressions (< 8 KB), that copy can dominate
1049 * the cost of the compression.
1050 *
1051 * - The CDict's tables can be used in-place. In this model, compression is
1052 * slower per input byte, because the compressor has to search two sets of
1053 * tables. However, this model incurs no start-up cost (as long as the
1054 * working context's tables can be reused). For small inputs, this can be
1055 * faster than copying the CDict's tables.
1056 *
1057 * Zstd has a simple internal heuristic that selects which strategy to use
1058 * at the beginning of a compression. However, if experimentation shows that
1059 * Zstd is making poor choices, it is possible to override that choice with
1060 * this enum.
1061 */
1062 ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
1063 ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
1064 ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
1065 } ZSTD_dictAttachPref_e;
480
1066
481
1067
482 /***************************************
1068 /***************************************
483 * Frame size functions
1069 * Frame size functions
484 ***************************************/
1070 ***************************************/
485
1071
486 /*! ZSTD_findFrameCompressedSize() :
487 * `src` should point to the start of a ZSTD encoded frame or skippable frame
488 * `srcSize` must be >= first frame size
489 * @return : the compressed size of the first frame starting at `src`,
490 * suitable to pass to `ZSTD_decompress` or similar,
491 * or an error code if input is invalid */
492 ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
493
494 /*! ZSTD_findDecompressedSize() :
1072 /*! ZSTD_findDecompressedSize() :
495 * `src` should point the start of a series of ZSTD encoded and/or skippable frames
1073 * `src` should point the start of a series of ZSTD encoded and/or skippable frames
496 * `srcSize` must be the _exact_ size of this series
1074 * `srcSize` must be the _exact_ size of this series
@@ -515,7 +1093,7 b' ZSTDLIB_API size_t ZSTD_findFrameCompres'
515 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
1093 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
516
1094
517 /*! ZSTD_frameHeaderSize() :
1095 /*! ZSTD_frameHeaderSize() :
518 * srcSize must be >= ZSTD_frameHeaderSize_prefix.
1096 * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
519 * @return : size of the Frame Header,
1097 * @return : size of the Frame Header,
520 * or an error code (if srcSize is too small) */
1098 * or an error code (if srcSize is too small) */
521 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
1099 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
@@ -525,16 +1103,6 b' ZSTDLIB_API size_t ZSTD_frameHeaderSize('
525 * Memory management
1103 * Memory management
526 ***************************************/
1104 ***************************************/
527
1105
528 /*! ZSTD_sizeof_*() :
529 * These functions give the current memory usage of selected object.
530 * Object memory usage can evolve when re-used. */
531 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
532 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
533 ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
534 ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
535 ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
536 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
537
538 /*! ZSTD_estimate*() :
1106 /*! ZSTD_estimate*() :
539 * These functions make it possible to estimate memory usage
1107 * These functions make it possible to estimate memory usage
540 * of a future {D,C}Ctx, before its creation.
1108 * of a future {D,C}Ctx, before its creation.
@@ -542,7 +1110,7 b' ZSTDLIB_API size_t ZSTD_sizeof_DDict(con'
542 * It will also consider src size to be arbitrarily "large", which is worst case.
1110 * It will also consider src size to be arbitrarily "large", which is worst case.
543 * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
1111 * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
544 * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
1112 * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
545 * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbWorkers is >= 1.
1113 * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
546 * Note : CCtx size estimation is only correct for single-threaded compression. */
1114 * Note : CCtx size estimation is only correct for single-threaded compression. */
547 ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
1115 ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
548 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
1116 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
@@ -554,7 +1122,7 b' ZSTDLIB_API size_t ZSTD_estimateDCtxSize'
554 * It will also consider src size to be arbitrarily "large", which is worst case.
1122 * It will also consider src size to be arbitrarily "large", which is worst case.
555 * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
1123 * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
556 * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
1124 * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
557 * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbWorkers is >= 1.
1125 * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
558 * Note : CStream size estimation is only correct for single-threaded compression.
1126 * Note : CStream size estimation is only correct for single-threaded compression.
559 * ZSTD_DStream memory budget depends on window Size.
1127 * ZSTD_DStream memory budget depends on window Size.
560 * This information can be passed manually, using ZSTD_estimateDStreamSize,
1128 * This information can be passed manually, using ZSTD_estimateDStreamSize,
@@ -617,6 +1185,7 b' ZSTDLIB_API const ZSTD_DDict* ZSTD_initS'
617 ZSTD_dictLoadMethod_e dictLoadMethod,
1185 ZSTD_dictLoadMethod_e dictLoadMethod,
618 ZSTD_dictContentType_e dictContentType);
1186 ZSTD_dictContentType_e dictContentType);
619
1187
1188
620 /*! Custom memory allocation :
1189 /*! Custom memory allocation :
621 * These prototypes make it possible to pass your own allocation/free functions.
1190 * These prototypes make it possible to pass your own allocation/free functions.
622 * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
1191 * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
@@ -651,8 +1220,9 b' ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict'
651
1220
652 /*! ZSTD_createCDict_byReference() :
1221 /*! ZSTD_createCDict_byReference() :
653 * Create a digested dictionary for compression
1222 * Create a digested dictionary for compression
654 * Dictionary content is simply referenced, and therefore stays in dictBuffer.
1223 * Dictionary content is just referenced, not duplicated.
655 * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
1224 * As a consequence, `dictBuffer` **must** outlive CDict,
1225 * and its content must remain unmodified throughout the lifetime of CDict. */
656 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
1226 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
657
1227
658 /*! ZSTD_getCParams() :
1228 /*! ZSTD_getCParams() :
@@ -675,22 +1245,161 b' ZSTDLIB_API size_t ZSTD_checkCParams(ZST'
675 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
1245 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
676
1246
677 /*! ZSTD_compress_advanced() :
1247 /*! ZSTD_compress_advanced() :
678 * Same as ZSTD_compress_usingDict(), with fine-tune control over each compression parameter */
1248 * Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */
679 ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
1249 ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
680 void* dst, size_t dstCapacity,
1250 void* dst, size_t dstCapacity,
681 const void* src, size_t srcSize,
1251 const void* src, size_t srcSize,
682 const void* dict,size_t dictSize,
1252 const void* dict,size_t dictSize,
683 ZSTD_parameters params);
1253 ZSTD_parameters params);
684
1254
685 /*! ZSTD_compress_usingCDict_advanced() :
1255 /*! ZSTD_compress_usingCDict_advanced() :
686 * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
1256 * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
687 ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
1257 ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
688 void* dst, size_t dstCapacity,
1258 void* dst, size_t dstCapacity,
689 const void* src, size_t srcSize,
1259 const void* src, size_t srcSize,
690 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams);
1260 const ZSTD_CDict* cdict,
1261 ZSTD_frameParameters fParams);
1262
1263
1264 /*! ZSTD_CCtx_loadDictionary_byReference() :
1265 * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
1266 * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
1267 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
1268
1269 /*! ZSTD_CCtx_loadDictionary_advanced() :
1270 * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
1271 * how to load the dictionary (by copy ? by reference ?)
1272 * and how to interpret it (automatic ? force raw mode ? full mode only ?) */
1273 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
1274
1275 /*! ZSTD_CCtx_refPrefix_advanced() :
1276 * Same as ZSTD_CCtx_refPrefix(), but gives finer control over
1277 * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
1278 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
1279
1280 /* === experimental parameters === */
1281 /* these parameters can be used with ZSTD_setParameter()
1282 * they are not guaranteed to remain supported in the future */
1283
1284 /* Enables rsyncable mode,
1285 * which makes compressed files more rsync friendly
1286 * by adding periodic synchronization points to the compressed data.
1287 * The target average block size is ZSTD_c_jobSize / 2.
1288 * It's possible to modify the job size to increase or decrease
1289 * the granularity of the synchronization point.
1290 * Once the jobSize is smaller than the window size,
1291 * it will result in compression ratio degradation.
1292 * NOTE 1: rsyncable mode only works when multithreading is enabled.
1293 * NOTE 2: rsyncable performs poorly in combination with long range mode,
1294 * since it will decrease the effectiveness of synchronization points,
1295 * though mileage may vary.
1296 * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
1297 * If the selected compression level is already running significantly slower,
1298 * the overall speed won't be significantly impacted.
1299 */
1300 #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
1301
1302 /* Select a compression format.
1303 * The value must be of type ZSTD_format_e.
1304 * See ZSTD_format_e enum definition for details */
1305 #define ZSTD_c_format ZSTD_c_experimentalParam2
1306
1307 /* Force back-reference distances to remain < windowSize,
1308 * even when referencing into Dictionary content (default:0) */
1309 #define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
1310
1311 /* Controls whether the contents of a CDict
1312 * are used in place, or copied into the working context.
1313 * Accepts values from the ZSTD_dictAttachPref_e enum.
1314 * See the comments on that enum for an explanation of the feature. */
1315 #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
1316
1317 /*! ZSTD_CCtx_getParameter() :
1318 * Get the requested compression parameter value, selected by enum ZSTD_cParameter,
1319 * and store it into int* value.
1320 * @return : 0, or an error code (which can be tested with ZSTD_isError()).
1321 */
1322 ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
691
1323
692
1324
693 /*--- Advanced decompression functions ---*/
1325 /*! ZSTD_CCtx_params :
1326 * Quick howto :
1327 * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
1328 * - ZSTD_CCtxParam_setParameter() : Push parameters one by one into
1329 * an existing ZSTD_CCtx_params structure.
1330 * This is similar to
1331 * ZSTD_CCtx_setParameter().
1332 * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
1333 * an existing CCtx.
1334 * These parameters will be applied to
1335 * all subsequent frames.
1336 * - ZSTD_compressStream2() : Do compression using the CCtx.
1337 * - ZSTD_freeCCtxParams() : Free the memory.
1338 *
1339 * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
1340 * for static allocation of CCtx for single-threaded compression.
1341 */
1342 ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
1343 ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
1344
1345 /*! ZSTD_CCtxParams_reset() :
1346 * Reset params to default values.
1347 */
1348 ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
1349
1350 /*! ZSTD_CCtxParams_init() :
1351 * Initializes the compression parameters of cctxParams according to
1352 * compression level. All other parameters are reset to their default values.
1353 */
1354 ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
1355
1356 /*! ZSTD_CCtxParams_init_advanced() :
1357 * Initializes the compression and frame parameters of cctxParams according to
1358 * params. All other parameters are reset to their default values.
1359 */
1360 ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
1361
1362 /*! ZSTD_CCtxParam_setParameter() :
1363 * Similar to ZSTD_CCtx_setParameter.
1364 * Set one compression parameter, selected by enum ZSTD_cParameter.
1365 * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
1366 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1367 */
1368 ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
1369
1370 /*! ZSTD_CCtxParam_getParameter() :
1371 * Similar to ZSTD_CCtx_getParameter.
1372 * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
1373 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1374 */
1375 ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
1376
1377 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
1378 * Apply a set of ZSTD_CCtx_params to the compression context.
1379 * This can be done even after compression is started,
1380 * if nbWorkers==0, this will have no impact until a new compression is started.
1381 * if nbWorkers>=1, new parameters will be picked up at next job,
1382 * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
1383 */
1384 ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
1385 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
1386
1387 /*! ZSTD_compressStream2_simpleArgs() :
1388 * Same as ZSTD_compressStream2(),
1389 * but using only integral types as arguments.
1390 * This variant might be helpful for binders from dynamic languages
1391 * which have troubles handling structures containing memory pointers.
1392 */
1393 ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
1394 ZSTD_CCtx* cctx,
1395 void* dst, size_t dstCapacity, size_t* dstPos,
1396 const void* src, size_t srcSize, size_t* srcPos,
1397 ZSTD_EndDirective endOp);
1398
1399
1400 /***************************************
1401 * Advanced decompression functions
1402 ***************************************/
694
1403
695 /*! ZSTD_isFrame() :
1404 /*! ZSTD_isFrame() :
696 * Tells if the content of `buffer` starts with a valid Frame Identifier.
1405 * Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -731,9 +1440,64 b' ZSTDLIB_API unsigned ZSTD_getDictID_from'
731 * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
1440 * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
732 ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
1441 ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
733
1442
1443 /*! ZSTD_DCtx_loadDictionary_byReference() :
1444 * Same as ZSTD_DCtx_loadDictionary(),
1445 * but references `dict` content instead of copying it into `dctx`.
1446 * This saves memory if `dict` remains around.,
1447 * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
1448 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
1449
1450 /*! ZSTD_DCtx_loadDictionary_advanced() :
1451 * Same as ZSTD_DCtx_loadDictionary(),
1452 * but gives direct control over
1453 * how to load the dictionary (by copy ? by reference ?)
1454 * and how to interpret it (automatic ? force raw mode ? full mode only ?). */
1455 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
1456
1457 /*! ZSTD_DCtx_refPrefix_advanced() :
1458 * Same as ZSTD_DCtx_refPrefix(), but gives finer control over
1459 * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
1460 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
1461
1462 /*! ZSTD_DCtx_setMaxWindowSize() :
1463 * Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
1464 * This protects a decoder context from reserving too much memory for itself (potential attack scenario).
1465 * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
1466 * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
1467 * @return : 0, or an error code (which can be tested using ZSTD_isError()).
1468 */
1469 ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
1470
1471 /* ZSTD_d_format
1472 * experimental parameter,
1473 * allowing selection between ZSTD_format_e input compression formats
1474 */
1475 #define ZSTD_d_format ZSTD_d_experimentalParam1
1476
1477 /*! ZSTD_DCtx_setFormat() :
1478 * Instruct the decoder context about what kind of data to decode next.
1479 * This instruction is mandatory to decode data without a fully-formed header,
1480 * such ZSTD_f_zstd1_magicless for example.
1481 * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
1482 ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
1483
1484 /*! ZSTD_decompressStream_simpleArgs() :
1485 * Same as ZSTD_decompressStream(),
1486 * but using only integral types as arguments.
1487 * This can be helpful for binders from dynamic languages
1488 * which have troubles handling structures containing memory pointers.
1489 */
1490 ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
1491 ZSTD_DCtx* dctx,
1492 void* dst, size_t dstCapacity, size_t* dstPos,
1493 const void* src, size_t srcSize, size_t* srcPos);
1494
734
1495
735 /********************************************************************
1496 /********************************************************************
736 * Advanced streaming functions
1497 * Advanced streaming functions
1498 * Warning : most of these functions are now redundant with the Advanced API.
1499 * Once Advanced API reaches "stable" status,
1500 * redundant functions will be deprecated, and then at some point removed.
737 ********************************************************************/
1501 ********************************************************************/
738
1502
739 /*===== Advanced Streaming compression functions =====*/
1503 /*===== Advanced Streaming compression functions =====*/
@@ -745,7 +1509,7 b' ZSTDLIB_API size_t ZSTD_initCStream_usin'
745 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters. pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. */
1509 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters. pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. */
746
1510
747 /*! ZSTD_resetCStream() :
1511 /*! ZSTD_resetCStream() :
748 * start a new compression job, using same parameters from previous job.
1512 * start a new frame, using same parameters from previous frame.
749 * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
1513 * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
750 * Note that zcs must be init at least once before using ZSTD_resetCStream().
1514 * Note that zcs must be init at least once before using ZSTD_resetCStream().
751 * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
1515 * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
@@ -784,16 +1548,13 b' ZSTDLIB_API ZSTD_frameProgression ZSTD_g'
784 * + there is no active job (could be checked with ZSTD_frameProgression()), or
1548 * + there is no active job (could be checked with ZSTD_frameProgression()), or
785 * + oldest job is still actively compressing data,
1549 * + oldest job is still actively compressing data,
786 * but everything it has produced has also been flushed so far,
1550 * but everything it has produced has also been flushed so far,
787 * therefore flushing speed is currently limited by production speed of oldest job
1551 * therefore flush speed is limited by production speed of oldest job
788 * irrespective of the speed of concurrent newer jobs.
1552 * irrespective of the speed of concurrent (and newer) jobs.
789 */
1553 */
790 ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
1554 ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
791
1555
792
1556
793
794 /*===== Advanced Streaming decompression functions =====*/
1557 /*===== Advanced Streaming decompression functions =====*/
795 typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
796 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); /* obsolete : this API will be removed in a future version */
797 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */
1558 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */
798 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */
1559 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */
799 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
1560 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
@@ -934,12 +1695,17 b' typedef struct {'
934 unsigned dictID;
1695 unsigned dictID;
935 unsigned checksumFlag;
1696 unsigned checksumFlag;
936 } ZSTD_frameHeader;
1697 } ZSTD_frameHeader;
1698
937 /** ZSTD_getFrameHeader() :
1699 /** ZSTD_getFrameHeader() :
938 * decode Frame Header, or requires larger `srcSize`.
1700 * decode Frame Header, or requires larger `srcSize`.
939 * @return : 0, `zfhPtr` is correctly filled,
1701 * @return : 0, `zfhPtr` is correctly filled,
940 * >0, `srcSize` is too small, value is wanted `srcSize` amount,
1702 * >0, `srcSize` is too small, value is wanted `srcSize` amount,
941 * or an error code, which can be tested using ZSTD_isError() */
1703 * or an error code, which can be tested using ZSTD_isError() */
942 ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
1704 ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
1705 /*! ZSTD_getFrameHeader_advanced() :
1706 * same as ZSTD_getFrameHeader(),
1707 * with added capability to select a format (like ZSTD_f_zstd1_magicless) */
1708 ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
943 ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
1709 ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
944
1710
945 ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
1711 ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
@@ -956,522 +1722,6 b' ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne'
956
1722
957
1723
958
1724
959 /* ============================================ */
960 /** New advanced API (experimental) */
961 /* ============================================ */
962
963 /* API design :
964 * In this advanced API, parameters are pushed one by one into an existing context,
965 * using ZSTD_CCtx_set*() functions.
966 * Pushed parameters are sticky : they are applied to next job, and any subsequent job.
967 * It's possible to reset parameters to "default" using ZSTD_CCtx_reset().
968 * Important : "sticky" parameters only work with `ZSTD_compress_generic()` !
969 * For any other entry point, "sticky" parameters are ignored !
970 *
971 * This API is intended to replace all others advanced / experimental API entry points.
972 */
973
974 /* note on enum design :
975 * All enum will be pinned to explicit values before reaching "stable API" status */
976
977 typedef enum {
978 /* Opened question : should we have a format ZSTD_f_auto ?
979 * Today, it would mean exactly the same as ZSTD_f_zstd1.
980 * But, in the future, should several formats become supported,
981 * on the compression side, it would mean "default format".
982 * On the decompression side, it would mean "automatic format detection",
983 * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
984 * Since meaning is a little different, another option could be to define different enums for compression and decompression.
985 * This question could be kept for later, when there are actually multiple formats to support,
986 * but there is also the question of pinning enum values, and pinning value `0` is especially important */
987 ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */
988 ZSTD_f_zstd1_magicless, /* Variant of zstd frame format, without initial 4-bytes magic number.
989 * Useful to save 4 bytes per generated frame.
990 * Decoder cannot recognise automatically this format, requiring instructions. */
991 } ZSTD_format_e;
992
993 typedef enum {
994 /* compression format */
995 ZSTD_p_format = 10, /* See ZSTD_format_e enum definition.
996 * Cast selected format as unsigned for ZSTD_CCtx_setParameter() compatibility. */
997
998 /* compression parameters */
999 ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
1000 * Default level is ZSTD_CLEVEL_DEFAULT==3.
1001 * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
1002 * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
1003 * Note 2 : setting a level sets all default values of other compression parameters.
1004 * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
1005 ZSTD_p_windowLog, /* Maximum allowed back-reference distance, expressed as power of 2.
1006 * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
1007 * Special: value 0 means "use default windowLog".
1008 * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
1009 * requires explicitly allowing such window size during decompression stage. */
1010 ZSTD_p_hashLog, /* Size of the initial probe table, as a power of 2.
1011 * Resulting table size is (1 << (hashLog+2)).
1012 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
1013 * Larger tables improve compression ratio of strategies <= dFast,
1014 * and improve speed of strategies > dFast.
1015 * Special: value 0 means "use default hashLog". */
1016 ZSTD_p_chainLog, /* Size of the multi-probe search table, as a power of 2.
1017 * Resulting table size is (1 << (chainLog+2)).
1018 * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
1019 * Larger tables result in better and slower compression.
1020 * This parameter is useless when using "fast" strategy.
1021 * Note it's still useful when using "dfast" strategy,
1022 * in which case it defines a secondary probe table.
1023 * Special: value 0 means "use default chainLog". */
1024 ZSTD_p_searchLog, /* Number of search attempts, as a power of 2.
1025 * More attempts result in better and slower compression.
1026 * This parameter is useless when using "fast" and "dFast" strategies.
1027 * Special: value 0 means "use default searchLog". */
1028 ZSTD_p_minMatch, /* Minimum size of searched matches (note : repCode matches can be smaller).
1029 * Larger values make faster compression and decompression, but decrease ratio.
1030 * Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
1031 * Note that currently, for all strategies < btopt, effective minimum is 4.
1032 * , for all strategies > fast, effective maximum is 6.
1033 * Special: value 0 means "use default minMatchLength". */
1034 ZSTD_p_targetLength, /* Impact of this field depends on strategy.
1035 * For strategies btopt & btultra:
1036 * Length of Match considered "good enough" to stop search.
1037 * Larger values make compression stronger, and slower.
1038 * For strategy fast:
1039 * Distance between match sampling.
1040 * Larger values make compression faster, and weaker.
1041 * Special: value 0 means "use default targetLength". */
1042 ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
1043 * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
1044 * The higher the value of selected strategy, the more complex it is,
1045 * resulting in stronger and slower compression.
1046 * Special: value 0 means "use default strategy". */
1047
1048 ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching.
1049 * This parameter is designed to improve compression ratio
1050 * for large inputs, by finding large matches at long distance.
1051 * It increases memory usage and window size.
1052 * Note: enabling this parameter increases ZSTD_p_windowLog to 128 MB
1053 * except when expressly set to a different value. */
1054 ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2.
1055 * Larger values increase memory usage and compression ratio,
1056 * but decrease compression speed.
1057 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
1058 * default: windowlog - 7.
1059 * Special: value 0 means "automatically determine hashlog". */
1060 ZSTD_p_ldmMinMatch, /* Minimum match size for long distance matcher.
1061 * Larger/too small values usually decrease compression ratio.
1062 * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
1063 * Special: value 0 means "use default value" (default: 64). */
1064 ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
1065 * Larger values improve collision resolution but decrease compression speed.
1066 * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX .
1067 * Special: value 0 means "use default value" (default: 3). */
1068 ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table.
1069 * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
1070 * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
1071 * Larger values improve compression speed.
1072 * Deviating far from default value will likely result in a compression ratio decrease.
1073 * Special: value 0 means "automatically determine hashEveryLog". */
1074
1075 /* frame parameters */
1076 ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
1077 * Content size must be known at the beginning of compression,
1078 * it is provided using ZSTD_CCtx_setPledgedSrcSize() */
1079 ZSTD_p_checksumFlag, /* A 32-bits checksum of content is written at end of frame (default:0) */
1080 ZSTD_p_dictIDFlag, /* When applicable, dictionary's ID is written into frame header (default:1) */
1081
1082 /* multi-threading parameters */
1083 /* These parameters are only useful if multi-threading is enabled (ZSTD_MULTITHREAD).
1084 * They return an error otherwise. */
1085 ZSTD_p_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel.
1086 * When nbWorkers >= 1, triggers asynchronous mode :
1087 * ZSTD_compress_generic() consumes some input, flush some output if possible, and immediately gives back control to caller,
1088 * while compression work is performed in parallel, within worker threads.
1089 * (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call).
1090 * More workers improve speed, but also increase memory usage.
1091 * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
1092 ZSTD_p_jobSize, /* Size of a compression job. This value is enforced only in non-blocking mode.
1093 * Each compression job is completed in parallel, so this value indirectly controls the nb of active threads.
1094 * 0 means default, which is dynamically determined based on compression parameters.
1095 * Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
1096 * The minimum size is automatically and transparently enforced */
1097 ZSTD_p_overlapSizeLog, /* Size of previous input reloaded at the beginning of each job.
1098 * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
1099
1100 /* =================================================================== */
1101 /* experimental parameters - no stability guaranteed */
1102 /* =================================================================== */
1103
1104 ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
1105 * even when referencing into Dictionary content (default:0) */
1106 ZSTD_p_forceAttachDict, /* ZSTD supports usage of a CDict in-place
1107 * (avoiding having to copy the compression tables
1108 * from the CDict into the working context). Using
1109 * a CDict in this way saves an initial setup step,
1110 * but comes at the cost of more work per byte of
1111 * input. ZSTD has a simple internal heuristic that
1112 * guesses which strategy will be faster. You can
1113 * use this flag to override that guess.
1114 *
1115 * Note that the by-reference, in-place strategy is
1116 * only used when reusing a compression context
1117 * with compatible compression parameters. (If
1118 * incompatible / uninitialized, the working
1119 * context needs to be cleared anyways, which is
1120 * about as expensive as overwriting it with the
1121 * dictionary context, so there's no savings in
1122 * using the CDict by-ref.)
1123 *
1124 * Values greater than 0 force attaching the dict.
1125 * Values less than 0 force copying the dict.
1126 * 0 selects the default heuristic-guided behavior.
1127 */
1128
1129 } ZSTD_cParameter;
1130
1131
1132 /*! ZSTD_CCtx_setParameter() :
1133 * Set one compression parameter, selected by enum ZSTD_cParameter.
1134 * Setting a parameter is generally only possible during frame initialization (before starting compression).
1135 * Exception : when using multi-threading mode (nbThreads >= 1),
1136 * following parameters can be updated _during_ compression (within same frame):
1137 * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
1138 * new parameters will be active on next job, or after a flush().
1139 * Note : when `value` type is not unsigned (int, or enum), cast it to unsigned for proper type checking.
1140 * @result : informational value (typically, value being set, correctly clamped),
1141 * or an error code (which can be tested with ZSTD_isError()). */
1142 ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
1143
1144 /*! ZSTD_CCtx_getParameter() :
1145 * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
1146 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1147 */
1148 ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value);
1149
1150 /*! ZSTD_CCtx_setPledgedSrcSize() :
1151 * Total input data size to be compressed as a single frame.
1152 * This value will be controlled at the end, and result in error if not respected.
1153 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1154 * Note 1 : 0 means zero, empty.
1155 * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
1156 * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new compression job.
1157 * Note 2 : If all data is provided and consumed in a single round,
1158 * this value is overriden by srcSize instead. */
1159 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
1160
1161 /*! ZSTD_CCtx_loadDictionary() :
1162 * Create an internal CDict from `dict` buffer.
1163 * Decompression will have to use same dictionary.
1164 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1165 * Special: Adding a NULL (or 0-size) dictionary invalidates previous dictionary,
1166 * meaning "return to no-dictionary mode".
1167 * Note 1 : Dictionary will be used for all future compression jobs.
1168 * To return to "no-dictionary" situation, load a NULL dictionary
1169 * Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
1170 * For this reason, compression parameters cannot be changed anymore after loading a dictionary.
1171 * It's also a CPU consuming operation, with non-negligible impact on latency.
1172 * Note 3 :`dict` content will be copied internally.
1173 * Use ZSTD_CCtx_loadDictionary_byReference() to reference dictionary content instead.
1174 * In such a case, dictionary buffer must outlive its users.
1175 * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
1176 * to precisely select how dictionary content must be interpreted. */
1177 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
1178 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
1179 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
1180
1181
1182 /*! ZSTD_CCtx_refCDict() :
1183 * Reference a prepared dictionary, to be used for all next compression jobs.
1184 * Note that compression parameters are enforced from within CDict,
1185 * and supercede any compression parameter previously set within CCtx.
1186 * The dictionary will remain valid for future compression jobs using same CCtx.
1187 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1188 * Special : adding a NULL CDict means "return to no-dictionary mode".
1189 * Note 1 : Currently, only one dictionary can be managed.
1190 * Adding a new dictionary effectively "discards" any previous one.
1191 * Note 2 : CDict is just referenced, its lifetime must outlive CCtx. */
1192 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
1193
1194 /*! ZSTD_CCtx_refPrefix() :
1195 * Reference a prefix (single-usage dictionary) for next compression job.
1196 * Decompression will need same prefix to properly regenerate data.
1197 * Compressing with a prefix is similar in outcome as performing a diff and compressing it,
1198 * but performs much faster, especially during decompression (compression speed is tunable with compression level).
1199 * Note that prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end).
1200 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1201 * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
1202 * Note 1 : Prefix buffer is referenced. It **must** outlive compression job.
1203 * Its contain must remain unmodified up to end of compression (ZSTD_e_end).
1204 * Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
1205 * ensure that the window size is large enough to contain the entire source.
1206 * See ZSTD_p_windowLog.
1207 * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
1208 * It's a CPU consuming operation, with non-negligible impact on latency.
1209 * If there is a need to use same prefix multiple times, consider loadDictionary instead.
1210 * Note 4 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1211 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */
1212 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
1213 const void* prefix, size_t prefixSize);
1214 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx,
1215 const void* prefix, size_t prefixSize,
1216 ZSTD_dictContentType_e dictContentType);
1217
1218 /*! ZSTD_CCtx_reset() :
1219 * Return a CCtx to clean state.
1220 * Useful after an error, or to interrupt an ongoing compression job and start a new one.
1221 * Any internal data not yet flushed is cancelled.
1222 * The parameters and dictionary are kept unchanged, to reset them use ZSTD_CCtx_resetParameters().
1223 */
1224 ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
1225
1226 /*! ZSTD_CCtx_resetParameters() :
1227 * All parameters are back to default values (compression level is ZSTD_CLEVEL_DEFAULT).
1228 * Dictionary (if any) is dropped.
1229 * Resetting parameters is only possible during frame initialization (before starting compression).
1230 * To reset the context use ZSTD_CCtx_reset().
1231 * @return 0 or an error code (which can be checked with ZSTD_isError()).
1232 */
1233 ZSTDLIB_API size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx);
1234
1235
1236
1237 typedef enum {
1238 ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal conditions */
1239 ZSTD_e_flush, /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
1240 ZSTD_e_end /* flush any remaining data and close current frame. Any additional data starts a new frame. */
1241 } ZSTD_EndDirective;
1242
1243 /*! ZSTD_compress_generic() :
1244 * Behave about the same as ZSTD_compressStream. To note :
1245 * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter()
1246 * - Compression parameters cannot be changed once compression is started.
1247 * - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize
1248 * - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
1249 * - In single-thread mode (default), function is blocking : it completed its job before returning to caller.
1250 * - In multi-thread mode, function is non-blocking : it just acquires a copy of input, and distribute job to internal worker threads,
1251 * and then immediately returns, just indicating that there is some data remaining to be flushed.
1252 * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
1253 * - Exception : in multi-threading mode, if the first call requests a ZSTD_e_end directive, it is blocking : it will complete compression before giving back control to caller.
1254 * - @return provides a minimum amount of data remaining to be flushed from internal buffers
1255 * or an error code, which can be tested using ZSTD_isError().
1256 * if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
1257 * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
1258 * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
1259 * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
1260 * only ZSTD_e_end or ZSTD_e_flush operations are allowed.
1261 * Before starting a new compression job, or changing compression parameters,
1262 * it is required to fully flush internal buffers.
1263 */
1264 ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
1265 ZSTD_outBuffer* output,
1266 ZSTD_inBuffer* input,
1267 ZSTD_EndDirective endOp);
1268
1269
1270 /*! ZSTD_compress_generic_simpleArgs() :
1271 * Same as ZSTD_compress_generic(),
1272 * but using only integral types as arguments.
1273 * Argument list is larger than ZSTD_{in,out}Buffer,
1274 * but can be helpful for binders from dynamic languages
1275 * which have troubles handling structures containing memory pointers.
1276 */
1277 ZSTDLIB_API size_t ZSTD_compress_generic_simpleArgs (
1278 ZSTD_CCtx* cctx,
1279 void* dst, size_t dstCapacity, size_t* dstPos,
1280 const void* src, size_t srcSize, size_t* srcPos,
1281 ZSTD_EndDirective endOp);
1282
1283
1284 /*! ZSTD_CCtx_params :
1285 * Quick howto :
1286 * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
1287 * - ZSTD_CCtxParam_setParameter() : Push parameters one by one into
1288 * an existing ZSTD_CCtx_params structure.
1289 * This is similar to
1290 * ZSTD_CCtx_setParameter().
1291 * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
1292 * an existing CCtx.
1293 * These parameters will be applied to
1294 * all subsequent compression jobs.
1295 * - ZSTD_compress_generic() : Do compression using the CCtx.
1296 * - ZSTD_freeCCtxParams() : Free the memory.
1297 *
1298 * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
1299 * for static allocation for single-threaded compression.
1300 */
1301 ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
1302 ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
1303
1304
1305 /*! ZSTD_CCtxParams_reset() :
1306 * Reset params to default values.
1307 */
1308 ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
1309
1310 /*! ZSTD_CCtxParams_init() :
1311 * Initializes the compression parameters of cctxParams according to
1312 * compression level. All other parameters are reset to their default values.
1313 */
1314 ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
1315
1316 /*! ZSTD_CCtxParams_init_advanced() :
1317 * Initializes the compression and frame parameters of cctxParams according to
1318 * params. All other parameters are reset to their default values.
1319 */
1320 ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
1321
1322
1323 /*! ZSTD_CCtxParam_setParameter() :
1324 * Similar to ZSTD_CCtx_setParameter.
1325 * Set one compression parameter, selected by enum ZSTD_cParameter.
1326 * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
1327 * Note : when `value` is an enum, cast it to unsigned for proper type checking.
1328 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1329 */
1330 ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
1331
1332 /*! ZSTD_CCtxParam_getParameter() :
1333 * Similar to ZSTD_CCtx_getParameter.
1334 * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
1335 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1336 */
1337 ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned* value);
1338
1339 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
1340 * Apply a set of ZSTD_CCtx_params to the compression context.
1341 * This can be done even after compression is started,
1342 * if nbWorkers==0, this will have no impact until a new compression is started.
1343 * if nbWorkers>=1, new parameters will be picked up at next job,
1344 * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
1345 */
1346 ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
1347 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
1348
1349
1350 /* ==================================== */
1351 /*=== Advanced decompression API ===*/
1352 /* ==================================== */
1353
1354 /* The following API works the same way as the advanced compression API :
1355 * a context is created, parameters are pushed into it one by one,
1356 * then the context can be used to decompress data using an interface similar to the straming API.
1357 */
1358
1359 /*! ZSTD_DCtx_loadDictionary() :
1360 * Create an internal DDict from dict buffer,
1361 * to be used to decompress next frames.
1362 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1363 * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
1364 * meaning "return to no-dictionary mode".
1365 * Note 1 : `dict` content will be copied internally.
1366 * Use ZSTD_DCtx_loadDictionary_byReference()
1367 * to reference dictionary content instead.
1368 * In which case, the dictionary buffer must outlive its users.
1369 * Note 2 : Loading a dictionary involves building tables,
1370 * which has a non-negligible impact on CPU usage and latency.
1371 * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to select
1372 * how dictionary content will be interpreted and loaded.
1373 */
1374 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
1375 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
1376 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
1377
1378
1379 /*! ZSTD_DCtx_refDDict() :
1380 * Reference a prepared dictionary, to be used to decompress next frames.
1381 * The dictionary remains active for decompression of future frames using same DCtx.
1382 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1383 * Note 1 : Currently, only one dictionary can be managed.
1384 * Referencing a new dictionary effectively "discards" any previous one.
1385 * Special : adding a NULL DDict means "return to no-dictionary mode".
1386 * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
1387 */
1388 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
1389
1390
1391 /*! ZSTD_DCtx_refPrefix() :
1392 * Reference a prefix (single-usage dictionary) for next compression job.
1393 * This is the reverse operation of ZSTD_CCtx_refPrefix(),
1394 * and must use the same prefix as the one used during compression.
1395 * Prefix is **only used once**. Reference is discarded at end of frame.
1396 * End of frame is reached when ZSTD_DCtx_decompress_generic() returns 0.
1397 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1398 * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
1399 * Note 2 : Prefix buffer is referenced. It **must** outlive decompression job.
1400 * Prefix buffer must remain unmodified up to the end of frame,
1401 * reached when ZSTD_DCtx_decompress_generic() returns 0.
1402 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1403 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
1404 * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
1405 * A fulldict prefix is more costly though.
1406 */
1407 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
1408 const void* prefix, size_t prefixSize);
1409 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
1410 const void* prefix, size_t prefixSize,
1411 ZSTD_dictContentType_e dictContentType);
1412
1413
1414 /*! ZSTD_DCtx_setMaxWindowSize() :
1415 * Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
1416 * This is useful to prevent a decoder context from reserving too much memory for itself (potential attack scenario).
1417 * This parameter is only useful in streaming mode, since no internal buffer is allocated in direct mode.
1418 * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_MAX)
1419 * @return : 0, or an error code (which can be tested using ZSTD_isError()).
1420 */
1421 ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
1422
1423
1424 /*! ZSTD_DCtx_setFormat() :
1425 * Instruct the decoder context about what kind of data to decode next.
1426 * This instruction is mandatory to decode data without a fully-formed header,
1427 * such ZSTD_f_zstd1_magicless for example.
1428 * @return : 0, or an error code (which can be tested using ZSTD_isError()).
1429 */
1430 ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
1431
1432
1433 /*! ZSTD_getFrameHeader_advanced() :
1434 * same as ZSTD_getFrameHeader(),
1435 * with added capability to select a format (like ZSTD_f_zstd1_magicless) */
1436 ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
1437 const void* src, size_t srcSize, ZSTD_format_e format);
1438
1439
1440 /*! ZSTD_decompress_generic() :
1441 * Behave the same as ZSTD_decompressStream.
1442 * Decompression parameters cannot be changed once decompression is started.
1443 * @return : an error code, which can be tested using ZSTD_isError()
1444 * if >0, a hint, nb of expected input bytes for next invocation.
1445 * `0` means : a frame has just been fully decoded and flushed.
1446 */
1447 ZSTDLIB_API size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx,
1448 ZSTD_outBuffer* output,
1449 ZSTD_inBuffer* input);
1450
1451
1452 /*! ZSTD_decompress_generic_simpleArgs() :
1453 * Same as ZSTD_decompress_generic(),
1454 * but using only integral types as arguments.
1455 * Argument list is larger than ZSTD_{in,out}Buffer,
1456 * but can be helpful for binders from dynamic languages
1457 * which have troubles handling structures containing memory pointers.
1458 */
1459 ZSTDLIB_API size_t ZSTD_decompress_generic_simpleArgs (
1460 ZSTD_DCtx* dctx,
1461 void* dst, size_t dstCapacity, size_t* dstPos,
1462 const void* src, size_t srcSize, size_t* srcPos);
1463
1464
1465 /*! ZSTD_DCtx_reset() :
1466 * Return a DCtx to clean state.
1467 * If a decompression was ongoing, any internal data not yet flushed is cancelled.
1468 * All parameters are back to default values, including sticky ones.
1469 * Dictionary (if any) is dropped.
1470 * Parameters can be modified again after a reset.
1471 */
1472 ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx);
1473
1474
1475
1725
1476 /* ============================ */
1726 /* ============================ */
1477 /** Block level API */
1727 /** Block level API */
@@ -1491,10 +1741,10 b' ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DC'
1491 + copyCCtx() and copyDCtx() can be used too
1741 + copyCCtx() and copyDCtx() can be used too
1492 - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
1742 - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
1493 + If input is larger than a block size, it's necessary to split input data into multiple blocks
1743 + If input is larger than a block size, it's necessary to split input data into multiple blocks
1494 + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead.
1744 + For inputs larger than a single block, really consider using regular ZSTD_compress() instead.
1495 Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
1745 Frame metadata is not that costly, and quickly becomes negligible as source size grows larger.
1496 - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
1746 - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
1497 In which case, nothing is produced into `dst`.
1747 In which case, nothing is produced into `dst` !
1498 + User must test for such outcome and deal directly with uncompressed data
1748 + User must test for such outcome and deal directly with uncompressed data
1499 + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
1749 + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
1500 + In case of multiple successive blocks, should some of them be uncompressed,
1750 + In case of multiple successive blocks, should some of them be uncompressed,
@@ -36,14 +36,6 b''
36 > -X mercurial/thirdparty \
36 > -X mercurial/thirdparty \
37 > | sed 's|\\|/|g' | xargs "$PYTHON" contrib/check-py3-compat.py \
37 > | sed 's|\\|/|g' | xargs "$PYTHON" contrib/check-py3-compat.py \
38 > | sed 's/[0-9][0-9]*)$/*)/'
38 > | sed 's/[0-9][0-9]*)$/*)/'
39 contrib/python-zstandard/tests/test_compressor.py:324: SyntaxWarning: invalid escape sequence \( (py38 !)
40 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'): (py38 !)
41 contrib/python-zstandard/tests/test_compressor.py:1329: SyntaxWarning: invalid escape sequence \( (py38 !)
42 'cannot call compress\(\) after compression finished'): (py38 !)
43 contrib/python-zstandard/tests/test_compressor.py:1341: SyntaxWarning: invalid escape sequence \( (py38 !)
44 'cannot call flush\(\) after compression finished'): (py38 !)
45 contrib/python-zstandard/tests/test_compressor.py:1353: SyntaxWarning: invalid escape sequence \( (py38 !)
46 'cannot call finish\(\) after compression finished'): (py38 !)
47 hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob) (?)
39 hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob) (?)
48 hgext/infinitepush/sqlindexapi.py: error importing: <*Error> No module named 'mysql' (error at sqlindexapi.py:*) (glob) (?)
40 hgext/infinitepush/sqlindexapi.py: error importing: <*Error> No module named 'mysql' (error at sqlindexapi.py:*) (glob) (?)
49 mercurial/scmwindows.py: error importing: <ValueError> _type_ 'v' not supported (error at win32.py:*) (no-windows !)
41 mercurial/scmwindows.py: error importing: <ValueError> _type_ 'v' not supported (error at win32.py:*) (no-windows !)
@@ -729,7 +729,7 b' Defining an invalid content encoding res'
729 s> \r\n
729 s> \r\n
730 s> 25\r\n
730 s> 25\r\n
731 s> \x1d\x00\x00\x01\x00\x02\x042
731 s> \x1d\x00\x00\x01\x00\x02\x042
732 s> (\xb5/\xfd\x00P\xa4\x00\x00p\xa1FstatusBok\x81T\x00\x01\x00\tP\x02
732 s> (\xb5/\xfd\x00X\xa4\x00\x00p\xa1FstatusBok\x81T\x00\x01\x00\tP\x02
733 s> \r\n
733 s> \r\n
734 s> 0\r\n
734 s> 0\r\n
735 s> \r\n
735 s> \r\n
@@ -96,7 +96,7 b' zstd is used if available'
96
96
97 $ get-with-headers.py --hgproto '0.2 comp=zstd' $LOCALIP:$HGPORT '?cmd=getbundle&heads=e93700bd72895c5addab234c56d4024b487a362f&common=0000000000000000000000000000000000000000' > resp
97 $ get-with-headers.py --hgproto '0.2 comp=zstd' $LOCALIP:$HGPORT '?cmd=getbundle&heads=e93700bd72895c5addab234c56d4024b487a362f&common=0000000000000000000000000000000000000000' > resp
98 $ f --size --hexdump --bytes 36 --sha1 resp
98 $ f --size --hexdump --bytes 36 --sha1 resp
99 resp: size=248, sha1=4d8d8f87fb82bd542ce52881fdc94f850748
99 resp: size=248, sha1=f11b5c098c638068b3d5fe2f9e6241bf5228
100 0000: 32 30 30 20 53 63 72 69 70 74 20 6f 75 74 70 75 |200 Script outpu|
100 0000: 32 30 30 20 53 63 72 69 70 74 20 6f 75 74 70 75 |200 Script outpu|
101 0010: 74 20 66 6f 6c 6c 6f 77 73 0a 0a 04 7a 73 74 64 |t follows...zstd|
101 0010: 74 20 66 6f 6c 6c 6f 77 73 0a 0a 04 7a 73 74 64 |t follows...zstd|
102 0020: 28 b5 2f fd |(./.|
102 0020: 28 b5 2f fd |(./.|
General Comments 0
You need to be logged in to leave comments. Login now