Show More
This diff has been collapsed as it changes many lines, (535 lines changed) Show them Hide them | |||
@@ -0,0 +1,535 | |||
|
1 | /* | |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |
|
9 | */ | |
|
10 | ||
|
11 | #ifndef ZSTD_CWKSP_H | |
|
12 | #define ZSTD_CWKSP_H | |
|
13 | ||
|
14 | /*-************************************* | |
|
15 | * Dependencies | |
|
16 | ***************************************/ | |
|
17 | #include "zstd_internal.h" | |
|
18 | ||
|
19 | #if defined (__cplusplus) | |
|
20 | extern "C" { | |
|
21 | #endif | |
|
22 | ||
|
23 | /*-************************************* | |
|
24 | * Constants | |
|
25 | ***************************************/ | |
|
26 | ||
|
27 | /* define "workspace is too large" as this number of times larger than needed */ | |
|
28 | #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 | |
|
29 | ||
|
30 | /* when workspace is continuously too large | |
|
31 | * during at least this number of times, | |
|
32 | * context's memory usage is considered wasteful, | |
|
33 | * because it's sized to handle a worst case scenario which rarely happens. | |
|
34 | * In which case, resize it down to free some memory */ | |
|
35 | #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 | |
|
36 | ||
|
37 | /* Since the workspace is effectively its own little malloc implementation / | |
|
38 | * arena, when we run under ASAN, we should similarly insert redzones between | |
|
39 | * each internal element of the workspace, so ASAN will catch overruns that | |
|
40 | * reach outside an object but that stay inside the workspace. | |
|
41 | * | |
|
42 | * This defines the size of that redzone. | |
|
43 | */ | |
|
44 | #ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE | |
|
45 | #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 | |
|
46 | #endif | |
|
47 | ||
|
48 | /*-************************************* | |
|
49 | * Structures | |
|
50 | ***************************************/ | |
|
51 | typedef enum { | |
|
52 | ZSTD_cwksp_alloc_objects, | |
|
53 | ZSTD_cwksp_alloc_buffers, | |
|
54 | ZSTD_cwksp_alloc_aligned | |
|
55 | } ZSTD_cwksp_alloc_phase_e; | |
|
56 | ||
|
57 | /** | |
|
58 | * Zstd fits all its internal datastructures into a single continuous buffer, | |
|
59 | * so that it only needs to perform a single OS allocation (or so that a buffer | |
|
60 | * can be provided to it and it can perform no allocations at all). This buffer | |
|
61 | * is called the workspace. | |
|
62 | * | |
|
63 | * Several optimizations complicate that process of allocating memory ranges | |
|
64 | * from this workspace for each internal datastructure: | |
|
65 | * | |
|
66 | * - These different internal datastructures have different setup requirements: | |
|
67 | * | |
|
68 | * - The static objects need to be cleared once and can then be trivially | |
|
69 | * reused for each compression. | |
|
70 | * | |
|
71 | * - Various buffers don't need to be initialized at all--they are always | |
|
72 | * written into before they're read. | |
|
73 | * | |
|
74 | * - The matchstate tables have a unique requirement that they don't need | |
|
75 | * their memory to be totally cleared, but they do need the memory to have | |
|
76 | * some bound, i.e., a guarantee that all values in the memory they've been | |
|
77 | * allocated is less than some maximum value (which is the starting value | |
|
78 | * for the indices that they will then use for compression). When this | |
|
79 | * guarantee is provided to them, they can use the memory without any setup | |
|
80 | * work. When it can't, they have to clear the area. | |
|
81 | * | |
|
82 | * - These buffers also have different alignment requirements. | |
|
83 | * | |
|
84 | * - We would like to reuse the objects in the workspace for multiple | |
|
85 | * compressions without having to perform any expensive reallocation or | |
|
86 | * reinitialization work. | |
|
87 | * | |
|
88 | * - We would like to be able to efficiently reuse the workspace across | |
|
89 | * multiple compressions **even when the compression parameters change** and | |
|
90 | * we need to resize some of the objects (where possible). | |
|
91 | * | |
|
92 | * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp | |
|
93 | * abstraction was created. It works as follows: | |
|
94 | * | |
|
95 | * Workspace Layout: | |
|
96 | * | |
|
97 | * [ ... workspace ... ] | |
|
98 | * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] | |
|
99 | * | |
|
100 | * The various objects that live in the workspace are divided into the | |
|
101 | * following categories, and are allocated separately: | |
|
102 | * | |
|
103 | * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, | |
|
104 | * so that literally everything fits in a single buffer. Note: if present, | |
|
105 | * this must be the first object in the workspace, since ZSTD_free{CCtx, | |
|
106 | * CDict}() rely on a pointer comparison to see whether one or two frees are | |
|
107 | * required. | |
|
108 | * | |
|
109 | * - Fixed size objects: these are fixed-size, fixed-count objects that are | |
|
110 | * nonetheless "dynamically" allocated in the workspace so that we can | |
|
111 | * control how they're initialized separately from the broader ZSTD_CCtx. | |
|
112 | * Examples: | |
|
113 | * - Entropy Workspace | |
|
114 | * - 2 x ZSTD_compressedBlockState_t | |
|
115 | * - CDict dictionary contents | |
|
116 | * | |
|
117 | * - Tables: these are any of several different datastructures (hash tables, | |
|
118 | * chain tables, binary trees) that all respect a common format: they are | |
|
119 | * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). | |
|
120 | * Their sizes depend on the cparams. | |
|
121 | * | |
|
122 | * - Aligned: these buffers are used for various purposes that require 4 byte | |
|
123 | * alignment, but don't require any initialization before they're used. | |
|
124 | * | |
|
125 | * - Buffers: these buffers are used for various purposes that don't require | |
|
126 | * any alignment or initialization before they're used. This means they can | |
|
127 | * be moved around at no cost for a new compression. | |
|
128 | * | |
|
129 | * Allocating Memory: | |
|
130 | * | |
|
131 | * The various types of objects must be allocated in order, so they can be | |
|
132 | * correctly packed into the workspace buffer. That order is: | |
|
133 | * | |
|
134 | * 1. Objects | |
|
135 | * 2. Buffers | |
|
136 | * 3. Aligned | |
|
137 | * 4. Tables | |
|
138 | * | |
|
139 | * Attempts to reserve objects of different types out of order will fail. | |
|
140 | */ | |
|
141 | typedef struct { | |
|
142 | void* workspace; | |
|
143 | void* workspaceEnd; | |
|
144 | ||
|
145 | void* objectEnd; | |
|
146 | void* tableEnd; | |
|
147 | void* tableValidEnd; | |
|
148 | void* allocStart; | |
|
149 | ||
|
150 | int allocFailed; | |
|
151 | int workspaceOversizedDuration; | |
|
152 | ZSTD_cwksp_alloc_phase_e phase; | |
|
153 | } ZSTD_cwksp; | |
|
154 | ||
|
155 | /*-************************************* | |
|
156 | * Functions | |
|
157 | ***************************************/ | |
|
158 | ||
|
159 | MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); | |
|
160 | ||
|
161 | MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { | |
|
162 | (void)ws; | |
|
163 | assert(ws->workspace <= ws->objectEnd); | |
|
164 | assert(ws->objectEnd <= ws->tableEnd); | |
|
165 | assert(ws->objectEnd <= ws->tableValidEnd); | |
|
166 | assert(ws->tableEnd <= ws->allocStart); | |
|
167 | assert(ws->tableValidEnd <= ws->allocStart); | |
|
168 | assert(ws->allocStart <= ws->workspaceEnd); | |
|
169 | } | |
|
170 | ||
|
171 | /** | |
|
172 | * Align must be a power of 2. | |
|
173 | */ | |
|
174 | MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { | |
|
175 | size_t const mask = align - 1; | |
|
176 | assert((align & mask) == 0); | |
|
177 | return (size + mask) & ~mask; | |
|
178 | } | |
|
179 | ||
|
180 | /** | |
|
181 | * Use this to determine how much space in the workspace we will consume to | |
|
182 | * allocate this object. (Normally it should be exactly the size of the object, | |
|
183 | * but under special conditions, like ASAN, where we pad each object, it might | |
|
184 | * be larger.) | |
|
185 | * | |
|
186 | * Since tables aren't currently redzoned, you don't need to call through this | |
|
187 | * to figure out how much space you need for the matchState tables. Everything | |
|
188 | * else is though. | |
|
189 | */ | |
|
190 | MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { | |
|
191 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
192 | return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |
|
193 | #else | |
|
194 | return size; | |
|
195 | #endif | |
|
196 | } | |
|
197 | ||
|
198 | MEM_STATIC void ZSTD_cwksp_internal_advance_phase( | |
|
199 | ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { | |
|
200 | assert(phase >= ws->phase); | |
|
201 | if (phase > ws->phase) { | |
|
202 | if (ws->phase < ZSTD_cwksp_alloc_buffers && | |
|
203 | phase >= ZSTD_cwksp_alloc_buffers) { | |
|
204 | ws->tableValidEnd = ws->objectEnd; | |
|
205 | } | |
|
206 | if (ws->phase < ZSTD_cwksp_alloc_aligned && | |
|
207 | phase >= ZSTD_cwksp_alloc_aligned) { | |
|
208 | /* If unaligned allocations down from a too-large top have left us | |
|
209 | * unaligned, we need to realign our alloc ptr. Technically, this | |
|
210 | * can consume space that is unaccounted for in the neededSpace | |
|
211 | * calculation. However, I believe this can only happen when the | |
|
212 | * workspace is too large, and specifically when it is too large | |
|
213 | * by a larger margin than the space that will be consumed. */ | |
|
214 | /* TODO: cleaner, compiler warning friendly way to do this??? */ | |
|
215 | ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); | |
|
216 | if (ws->allocStart < ws->tableValidEnd) { | |
|
217 | ws->tableValidEnd = ws->allocStart; | |
|
218 | } | |
|
219 | } | |
|
220 | ws->phase = phase; | |
|
221 | } | |
|
222 | } | |
|
223 | ||
|
224 | /** | |
|
225 | * Returns whether this object/buffer/etc was allocated in this workspace. | |
|
226 | */ | |
|
227 | MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { | |
|
228 | return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); | |
|
229 | } | |
|
230 | ||
|
231 | /** | |
|
232 | * Internal function. Do not use directly. | |
|
233 | */ | |
|
234 | MEM_STATIC void* ZSTD_cwksp_reserve_internal( | |
|
235 | ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { | |
|
236 | void* alloc; | |
|
237 | void* bottom = ws->tableEnd; | |
|
238 | ZSTD_cwksp_internal_advance_phase(ws, phase); | |
|
239 | alloc = (BYTE *)ws->allocStart - bytes; | |
|
240 | ||
|
241 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
242 | /* over-reserve space */ | |
|
243 | alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |
|
244 | #endif | |
|
245 | ||
|
246 | DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", | |
|
247 | alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); | |
|
248 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
249 | assert(alloc >= bottom); | |
|
250 | if (alloc < bottom) { | |
|
251 | DEBUGLOG(4, "cwksp: alloc failed!"); | |
|
252 | ws->allocFailed = 1; | |
|
253 | return NULL; | |
|
254 | } | |
|
255 | if (alloc < ws->tableValidEnd) { | |
|
256 | ws->tableValidEnd = alloc; | |
|
257 | } | |
|
258 | ws->allocStart = alloc; | |
|
259 | ||
|
260 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
261 | /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on | |
|
262 | * either size. */ | |
|
263 | alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |
|
264 | __asan_unpoison_memory_region(alloc, bytes); | |
|
265 | #endif | |
|
266 | ||
|
267 | return alloc; | |
|
268 | } | |
|
269 | ||
|
270 | /** | |
|
271 | * Reserves and returns unaligned memory. | |
|
272 | */ | |
|
273 | MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { | |
|
274 | return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); | |
|
275 | } | |
|
276 | ||
|
277 | /** | |
|
278 | * Reserves and returns memory sized on and aligned on sizeof(unsigned). | |
|
279 | */ | |
|
280 | MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { | |
|
281 | assert((bytes & (sizeof(U32)-1)) == 0); | |
|
282 | return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); | |
|
283 | } | |
|
284 | ||
|
285 | /** | |
|
286 | * Aligned on sizeof(unsigned). These buffers have the special property that | |
|
287 | * their values remain constrained, allowing us to re-use them without | |
|
288 | * memset()-ing them. | |
|
289 | */ | |
|
290 | MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { | |
|
291 | const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; | |
|
292 | void* alloc = ws->tableEnd; | |
|
293 | void* end = (BYTE *)alloc + bytes; | |
|
294 | void* top = ws->allocStart; | |
|
295 | ||
|
296 | DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", | |
|
297 | alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); | |
|
298 | assert((bytes & (sizeof(U32)-1)) == 0); | |
|
299 | ZSTD_cwksp_internal_advance_phase(ws, phase); | |
|
300 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
301 | assert(end <= top); | |
|
302 | if (end > top) { | |
|
303 | DEBUGLOG(4, "cwksp: table alloc failed!"); | |
|
304 | ws->allocFailed = 1; | |
|
305 | return NULL; | |
|
306 | } | |
|
307 | ws->tableEnd = end; | |
|
308 | ||
|
309 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
310 | __asan_unpoison_memory_region(alloc, bytes); | |
|
311 | #endif | |
|
312 | ||
|
313 | return alloc; | |
|
314 | } | |
|
315 | ||
|
316 | /** | |
|
317 | * Aligned on sizeof(void*). | |
|
318 | */ | |
|
319 | MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { | |
|
320 | size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); | |
|
321 | void* alloc = ws->objectEnd; | |
|
322 | void* end = (BYTE*)alloc + roundedBytes; | |
|
323 | ||
|
324 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
325 | /* over-reserve space */ | |
|
326 | end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |
|
327 | #endif | |
|
328 | ||
|
329 | DEBUGLOG(5, | |
|
330 | "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", | |
|
331 | alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); | |
|
332 | assert(((size_t)alloc & (sizeof(void*)-1)) == 0); | |
|
333 | assert((bytes & (sizeof(void*)-1)) == 0); | |
|
334 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
335 | /* we must be in the first phase, no advance is possible */ | |
|
336 | if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { | |
|
337 | DEBUGLOG(4, "cwksp: object alloc failed!"); | |
|
338 | ws->allocFailed = 1; | |
|
339 | return NULL; | |
|
340 | } | |
|
341 | ws->objectEnd = end; | |
|
342 | ws->tableEnd = end; | |
|
343 | ws->tableValidEnd = end; | |
|
344 | ||
|
345 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
346 | /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on | |
|
347 | * either size. */ | |
|
348 | alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |
|
349 | __asan_unpoison_memory_region(alloc, bytes); | |
|
350 | #endif | |
|
351 | ||
|
352 | return alloc; | |
|
353 | } | |
|
354 | ||
|
355 | MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { | |
|
356 | DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); | |
|
357 | ||
|
358 | #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |
|
359 | /* To validate that the table re-use logic is sound, and that we don't | |
|
360 | * access table space that we haven't cleaned, we re-"poison" the table | |
|
361 | * space every time we mark it dirty. */ | |
|
362 | { | |
|
363 | size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; | |
|
364 | assert(__msan_test_shadow(ws->objectEnd, size) == -1); | |
|
365 | __msan_poison(ws->objectEnd, size); | |
|
366 | } | |
|
367 | #endif | |
|
368 | ||
|
369 | assert(ws->tableValidEnd >= ws->objectEnd); | |
|
370 | assert(ws->tableValidEnd <= ws->allocStart); | |
|
371 | ws->tableValidEnd = ws->objectEnd; | |
|
372 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
373 | } | |
|
374 | ||
|
375 | MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { | |
|
376 | DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); | |
|
377 | assert(ws->tableValidEnd >= ws->objectEnd); | |
|
378 | assert(ws->tableValidEnd <= ws->allocStart); | |
|
379 | if (ws->tableValidEnd < ws->tableEnd) { | |
|
380 | ws->tableValidEnd = ws->tableEnd; | |
|
381 | } | |
|
382 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
383 | } | |
|
384 | ||
|
385 | /** | |
|
386 | * Zero the part of the allocated tables not already marked clean. | |
|
387 | */ | |
|
388 | MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { | |
|
389 | DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); | |
|
390 | assert(ws->tableValidEnd >= ws->objectEnd); | |
|
391 | assert(ws->tableValidEnd <= ws->allocStart); | |
|
392 | if (ws->tableValidEnd < ws->tableEnd) { | |
|
393 | memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); | |
|
394 | } | |
|
395 | ZSTD_cwksp_mark_tables_clean(ws); | |
|
396 | } | |
|
397 | ||
|
398 | /** | |
|
399 | * Invalidates table allocations. | |
|
400 | * All other allocations remain valid. | |
|
401 | */ | |
|
402 | MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { | |
|
403 | DEBUGLOG(4, "cwksp: clearing tables!"); | |
|
404 | ||
|
405 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
406 | { | |
|
407 | size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; | |
|
408 | __asan_poison_memory_region(ws->objectEnd, size); | |
|
409 | } | |
|
410 | #endif | |
|
411 | ||
|
412 | ws->tableEnd = ws->objectEnd; | |
|
413 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
414 | } | |
|
415 | ||
|
416 | /** | |
|
417 | * Invalidates all buffer, aligned, and table allocations. | |
|
418 | * Object allocations remain valid. | |
|
419 | */ | |
|
420 | MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { | |
|
421 | DEBUGLOG(4, "cwksp: clearing!"); | |
|
422 | ||
|
423 | #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |
|
424 | /* To validate that the context re-use logic is sound, and that we don't | |
|
425 | * access stuff that this compression hasn't initialized, we re-"poison" | |
|
426 | * the workspace (or at least the non-static, non-table parts of it) | |
|
427 | * every time we start a new compression. */ | |
|
428 | { | |
|
429 | size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; | |
|
430 | __msan_poison(ws->tableValidEnd, size); | |
|
431 | } | |
|
432 | #endif | |
|
433 | ||
|
434 | #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |
|
435 | { | |
|
436 | size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; | |
|
437 | __asan_poison_memory_region(ws->objectEnd, size); | |
|
438 | } | |
|
439 | #endif | |
|
440 | ||
|
441 | ws->tableEnd = ws->objectEnd; | |
|
442 | ws->allocStart = ws->workspaceEnd; | |
|
443 | ws->allocFailed = 0; | |
|
444 | if (ws->phase > ZSTD_cwksp_alloc_buffers) { | |
|
445 | ws->phase = ZSTD_cwksp_alloc_buffers; | |
|
446 | } | |
|
447 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
448 | } | |
|
449 | ||
|
450 | /** | |
|
451 | * The provided workspace takes ownership of the buffer [start, start+size). | |
|
452 | * Any existing values in the workspace are ignored (the previously managed | |
|
453 | * buffer, if present, must be separately freed). | |
|
454 | */ | |
|
455 | MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { | |
|
456 | DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); | |
|
457 | assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ | |
|
458 | ws->workspace = start; | |
|
459 | ws->workspaceEnd = (BYTE*)start + size; | |
|
460 | ws->objectEnd = ws->workspace; | |
|
461 | ws->tableValidEnd = ws->objectEnd; | |
|
462 | ws->phase = ZSTD_cwksp_alloc_objects; | |
|
463 | ZSTD_cwksp_clear(ws); | |
|
464 | ws->workspaceOversizedDuration = 0; | |
|
465 | ZSTD_cwksp_assert_internal_consistency(ws); | |
|
466 | } | |
|
467 | ||
|
468 | MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { | |
|
469 | void* workspace = ZSTD_malloc(size, customMem); | |
|
470 | DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); | |
|
471 | RETURN_ERROR_IF(workspace == NULL, memory_allocation); | |
|
472 | ZSTD_cwksp_init(ws, workspace, size); | |
|
473 | return 0; | |
|
474 | } | |
|
475 | ||
|
476 | MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { | |
|
477 | void *ptr = ws->workspace; | |
|
478 | DEBUGLOG(4, "cwksp: freeing workspace"); | |
|
479 | memset(ws, 0, sizeof(ZSTD_cwksp)); | |
|
480 | ZSTD_free(ptr, customMem); | |
|
481 | } | |
|
482 | ||
|
483 | /** | |
|
484 | * Moves the management of a workspace from one cwksp to another. The src cwksp | |
|
485 | * is left in an invalid state (src must be re-init()'ed before its used again). | |
|
486 | */ | |
|
487 | MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { | |
|
488 | *dst = *src; | |
|
489 | memset(src, 0, sizeof(ZSTD_cwksp)); | |
|
490 | } | |
|
491 | ||
|
492 | MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { | |
|
493 | return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); | |
|
494 | } | |
|
495 | ||
|
496 | MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { | |
|
497 | return ws->allocFailed; | |
|
498 | } | |
|
499 | ||
|
500 | /*-************************************* | |
|
501 | * Functions Checking Free Space | |
|
502 | ***************************************/ | |
|
503 | ||
|
504 | MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { | |
|
505 | return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); | |
|
506 | } | |
|
507 | ||
|
508 | MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |
|
509 | return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; | |
|
510 | } | |
|
511 | ||
|
512 | MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |
|
513 | return ZSTD_cwksp_check_available( | |
|
514 | ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); | |
|
515 | } | |
|
516 | ||
|
517 | MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |
|
518 | return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) | |
|
519 | && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; | |
|
520 | } | |
|
521 | ||
|
522 | MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( | |
|
523 | ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |
|
524 | if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { | |
|
525 | ws->workspaceOversizedDuration++; | |
|
526 | } else { | |
|
527 | ws->workspaceOversizedDuration = 0; | |
|
528 | } | |
|
529 | } | |
|
530 | ||
|
531 | #if defined (__cplusplus) | |
|
532 | } | |
|
533 | #endif | |
|
534 | ||
|
535 | #endif /* ZSTD_CWKSP_H */ |
@@ -52,6 +52,7 contrib/python-zstandard/zstd/compress/z | |||
|
52 | 52 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.h |
|
53 | 53 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c |
|
54 | 54 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h |
|
55 | contrib/python-zstandard/zstd/compress/zstd_cwksp.h | |
|
55 | 56 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c |
|
56 | 57 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h |
|
57 | 58 | contrib/python-zstandard/zstd/compress/zstd_fast.c |
@@ -43,13 +43,18 Actions Blocking Release | |||
|
43 | 43 | * Support modifying compression parameters mid operation when supported by |
|
44 | 44 | zstd API. |
|
45 | 45 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. |
|
46 | * Expose ``ZSTD_SRCSIZEHINT_{MIN,MAX}`` constants. | |
|
46 | 47 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. |
|
47 |
* Support ``ZSTD_ |
|
|
48 | * Support ``ZSTD_dictForceLoad`` dictionary compression parameter. | |
|
49 | * Support ``ZSTD_c_targetCBlockSize`` compression parameter. | |
|
50 | * Support ``ZSTD_c_literalCompressionMode`` compression parameter. | |
|
51 | * Support ``ZSTD_c_srcSizeHint`` compression parameter. | |
|
48 | 52 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving |
|
49 | 53 | compression parameters. |
|
50 | 54 | * Consider exposing ``ZSTDMT_toFlushNow()``. |
|
51 | 55 | * Expose ``ZDICT_trainFromBuffer_fastCover()``, |
|
52 | 56 | ``ZDICT_optimizeTrainFromBuffer_fastCover``. |
|
57 | * Expose ``ZSTD_Sequence`` struct and related ``ZSTD_getSequences()`` API. | |
|
53 | 58 | * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level. |
|
54 | 59 | * Consider a ``chunker()`` API for decompression. |
|
55 | 60 | * Consider stats for ``chunker()`` API, including finding the last consumed |
@@ -67,6 +72,20 Other Actions Not Blocking Release | |||
|
67 | 72 | * API for ensuring max memory ceiling isn't exceeded. |
|
68 | 73 | * Move off nose for testing. |
|
69 | 74 | |
|
75 | 0.13.0 (released 2019-12-28) | |
|
76 | ============================ | |
|
77 | ||
|
78 | Changes | |
|
79 | ------- | |
|
80 | ||
|
81 | * ``pytest-xdist`` ``pytest`` extension is now installed so tests can be | |
|
82 | run in parallel. | |
|
83 | * CI now builds ``manylinux2010`` and ``manylinux2014`` binary wheels | |
|
84 | instead of a mix of ``manylinux2010`` and ``manylinux1``. | |
|
85 | * Official support for Python 3.8 has been added. | |
|
86 | * Bundled zstandard library upgraded from 1.4.3 to 1.4.4. | |
|
87 | * Python code has been reformatted with black. | |
|
88 | ||
|
70 | 89 | 0.12.0 (released 2019-09-15) |
|
71 | 90 | ============================ |
|
72 | 91 |
@@ -20,7 +20,7 https://github.com/indygreg/python-zstan | |||
|
20 | 20 | Requirements |
|
21 | 21 | ============ |
|
22 | 22 | |
|
23 |
This extension is designed to run with Python 2.7, 3. |
|
|
23 | This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8 | |
|
24 | 24 | on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above. |
|
25 | 25 | x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS. |
|
26 | 26 |
@@ -16,7 +16,7 | |||
|
16 | 16 | #include <zdict.h> |
|
17 | 17 | |
|
18 | 18 | /* Remember to change the string in zstandard/__init__ as well */ |
|
19 |
#define PYTHON_ZSTANDARD_VERSION "0.1 |
|
|
19 | #define PYTHON_ZSTANDARD_VERSION "0.13.0" | |
|
20 | 20 | |
|
21 | 21 | typedef enum { |
|
22 | 22 | compressorobj_flush_finish, |
@@ -16,80 +16,82 import tempfile | |||
|
16 | 16 | |
|
17 | 17 | HERE = os.path.abspath(os.path.dirname(__file__)) |
|
18 | 18 | |
|
19 | SOURCES = ['zstd/%s' % p for p in ( | |
|
20 | 'common/debug.c', | |
|
21 | 'common/entropy_common.c', | |
|
22 | 'common/error_private.c', | |
|
23 | 'common/fse_decompress.c', | |
|
24 | 'common/pool.c', | |
|
25 | 'common/threading.c', | |
|
26 |
|
|
|
27 |
|
|
|
28 | 'compress/fse_compress.c', | |
|
29 | 'compress/hist.c', | |
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
|
40 | 'decompress/huf_decompress.c', | |
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
|
44 | 'dictBuilder/cover.c', | |
|
45 | 'dictBuilder/fastcover.c', | |
|
46 |
|
|
|
47 |
|
|
|
48 | )] | |
|
19 | SOURCES = [ | |
|
20 | "zstd/%s" % p | |
|
21 | for p in ( | |
|
22 | "common/debug.c", | |
|
23 | "common/entropy_common.c", | |
|
24 | "common/error_private.c", | |
|
25 | "common/fse_decompress.c", | |
|
26 | "common/pool.c", | |
|
27 | "common/threading.c", | |
|
28 | "common/xxhash.c", | |
|
29 | "common/zstd_common.c", | |
|
30 | "compress/fse_compress.c", | |
|
31 | "compress/hist.c", | |
|
32 | "compress/huf_compress.c", | |
|
33 | "compress/zstd_compress.c", | |
|
34 | "compress/zstd_compress_literals.c", | |
|
35 | "compress/zstd_compress_sequences.c", | |
|
36 | "compress/zstd_double_fast.c", | |
|
37 | "compress/zstd_fast.c", | |
|
38 | "compress/zstd_lazy.c", | |
|
39 | "compress/zstd_ldm.c", | |
|
40 | "compress/zstd_opt.c", | |
|
41 | "compress/zstdmt_compress.c", | |
|
42 | "decompress/huf_decompress.c", | |
|
43 | "decompress/zstd_ddict.c", | |
|
44 | "decompress/zstd_decompress.c", | |
|
45 | "decompress/zstd_decompress_block.c", | |
|
46 | "dictBuilder/cover.c", | |
|
47 | "dictBuilder/fastcover.c", | |
|
48 | "dictBuilder/divsufsort.c", | |
|
49 | "dictBuilder/zdict.c", | |
|
50 | ) | |
|
51 | ] | |
|
49 | 52 | |
|
50 | 53 | # Headers whose preprocessed output will be fed into cdef(). |
|
51 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( | |
|
52 | ('zstd.h',), | |
|
53 | ('dictBuilder', 'zdict.h'), | |
|
54 | )] | |
|
54 | HEADERS = [ | |
|
55 | os.path.join(HERE, "zstd", *p) for p in (("zstd.h",), ("dictBuilder", "zdict.h"),) | |
|
56 | ] | |
|
55 | 57 | |
|
56 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( | |
|
57 | 'zstd', | |
|
58 | 'zstd/common', | |
|
59 | 'zstd/compress', | |
|
60 | 'zstd/decompress', | |
|
61 | 'zstd/dictBuilder', | |
|
62 | )] | |
|
58 | INCLUDE_DIRS = [ | |
|
59 | os.path.join(HERE, d) | |
|
60 | for d in ( | |
|
61 | "zstd", | |
|
62 | "zstd/common", | |
|
63 | "zstd/compress", | |
|
64 | "zstd/decompress", | |
|
65 | "zstd/dictBuilder", | |
|
66 | ) | |
|
67 | ] | |
|
63 | 68 | |
|
64 | 69 | # cffi can't parse some of the primitives in zstd.h. So we invoke the |
|
65 | 70 | # preprocessor and feed its output into cffi. |
|
66 | 71 | compiler = distutils.ccompiler.new_compiler() |
|
67 | 72 | |
|
68 | 73 | # Needed for MSVC. |
|
69 |
if hasattr(compiler, |
|
|
74 | if hasattr(compiler, "initialize"): | |
|
70 | 75 | compiler.initialize() |
|
71 | 76 | |
|
72 | 77 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor |
|
73 | 78 | # manually. |
|
74 |
if compiler.compiler_type == |
|
|
75 |
args = list(compiler.executables[ |
|
|
76 |
args.extend( |
|
|
77 | '-E', | |
|
78 | '-DZSTD_STATIC_LINKING_ONLY', | |
|
79 | '-DZDICT_STATIC_LINKING_ONLY', | |
|
80 | ]) | |
|
81 | elif compiler.compiler_type == 'msvc': | |
|
79 | if compiler.compiler_type == "unix": | |
|
80 | args = list(compiler.executables["compiler"]) | |
|
81 | args.extend( | |
|
82 | ["-E", "-DZSTD_STATIC_LINKING_ONLY", "-DZDICT_STATIC_LINKING_ONLY",] | |
|
83 | ) | |
|
84 | elif compiler.compiler_type == "msvc": | |
|
82 | 85 | args = [compiler.cc] |
|
83 |
args.extend( |
|
|
84 | '/EP', | |
|
85 | '/DZSTD_STATIC_LINKING_ONLY', | |
|
86 | '/DZDICT_STATIC_LINKING_ONLY', | |
|
87 | ]) | |
|
86 | args.extend( | |
|
87 | ["/EP", "/DZSTD_STATIC_LINKING_ONLY", "/DZDICT_STATIC_LINKING_ONLY",] | |
|
88 | ) | |
|
88 | 89 | else: |
|
89 |
raise Exception( |
|
|
90 | raise Exception("unsupported compiler type: %s" % compiler.compiler_type) | |
|
91 | ||
|
90 | 92 | |
|
91 | 93 | def preprocess(path): |
|
92 |
with open(path, |
|
|
94 | with open(path, "rb") as fh: | |
|
93 | 95 | lines = [] |
|
94 | 96 | it = iter(fh) |
|
95 | 97 | |
@@ -104,32 +106,44 def preprocess(path): | |||
|
104 | 106 | # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline |
|
105 | 107 | # #define in zstdmt_compress.h and results in a compiler warning. So drop |
|
106 | 108 | # the inline #define. |
|
107 |
if l.startswith( |
|
|
108 | b'#include "zstd.h"', | |
|
109 | b'#define ZSTD_STATIC_LINKING_ONLY')): | |
|
109 | if l.startswith( | |
|
110 | ( | |
|
111 | b"#include <stddef.h>", | |
|
112 | b'#include "zstd.h"', | |
|
113 | b"#define ZSTD_STATIC_LINKING_ONLY", | |
|
114 | ) | |
|
115 | ): | |
|
110 | 116 | continue |
|
111 | 117 | |
|
118 | # The preprocessor environment on Windows doesn't define include | |
|
119 | # paths, so the #include of limits.h fails. We work around this | |
|
120 | # by removing that import and defining INT_MAX ourselves. This is | |
|
121 | # a bit hacky. But it gets the job done. | |
|
122 | # TODO make limits.h work on Windows so we ensure INT_MAX is | |
|
123 | # correct. | |
|
124 | if l.startswith(b"#include <limits.h>"): | |
|
125 | l = b"#define INT_MAX 2147483647\n" | |
|
126 | ||
|
112 | 127 | # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't |
|
113 | 128 | # important so just filter it out. |
|
114 |
if l.startswith(b |
|
|
115 |
l = l[len(b |
|
|
129 | if l.startswith(b"ZSTDLIB_API"): | |
|
130 | l = l[len(b"ZSTDLIB_API ") :] | |
|
116 | 131 | |
|
117 | 132 | lines.append(l) |
|
118 | 133 | |
|
119 |
fd, input_file = tempfile.mkstemp(suffix= |
|
|
120 |
os.write(fd, b |
|
|
134 | fd, input_file = tempfile.mkstemp(suffix=".h") | |
|
135 | os.write(fd, b"".join(lines)) | |
|
121 | 136 | os.close(fd) |
|
122 | 137 | |
|
123 | 138 | try: |
|
124 | 139 | env = dict(os.environ) |
|
125 |
if getattr(compiler, |
|
|
126 |
env[ |
|
|
127 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, | |
|
128 | env=env) | |
|
140 | if getattr(compiler, "_paths", None): | |
|
141 | env["PATH"] = compiler._paths | |
|
142 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, env=env) | |
|
129 | 143 | output = process.communicate()[0] |
|
130 | 144 | ret = process.poll() |
|
131 | 145 | if ret: |
|
132 |
raise Exception( |
|
|
146 | raise Exception("preprocessor exited with error") | |
|
133 | 147 | |
|
134 | 148 | return output |
|
135 | 149 | finally: |
@@ -141,16 +155,16 def normalize_output(output): | |||
|
141 | 155 | for line in output.splitlines(): |
|
142 | 156 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. |
|
143 | 157 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): |
|
144 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] | |
|
158 | line = line[len(b'__attribute__ ((visibility ("default"))) ') :] | |
|
145 | 159 | |
|
146 |
if line.startswith(b |
|
|
160 | if line.startswith(b"__attribute__((deprecated("): | |
|
147 | 161 | continue |
|
148 |
elif b |
|
|
162 | elif b"__declspec(deprecated(" in line: | |
|
149 | 163 | continue |
|
150 | 164 | |
|
151 | 165 | lines.append(line) |
|
152 | 166 | |
|
153 |
return b |
|
|
167 | return b"\n".join(lines) | |
|
154 | 168 | |
|
155 | 169 | |
|
156 | 170 | ffi = cffi.FFI() |
@@ -159,18 +173,22 ffi = cffi.FFI() | |||
|
159 | 173 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning |
|
160 | 174 | # when cffi uses the function. Since we statically link against zstd, even |
|
161 | 175 | # if we use the deprecated functions it shouldn't be a huge problem. |
|
162 |
ffi.set_source( |
|
|
176 | ffi.set_source( | |
|
177 | "_zstd_cffi", | |
|
178 | """ | |
|
163 | 179 |
|
|
164 | 180 |
|
|
165 | 181 |
|
|
166 | 182 |
|
|
167 | 183 |
|
|
168 | 184 |
|
|
169 | ''', sources=SOURCES, | |
|
170 | include_dirs=INCLUDE_DIRS, | |
|
171 | extra_compile_args=['-DZSTD_MULTITHREAD']) | |
|
185 | """, | |
|
186 | sources=SOURCES, | |
|
187 | include_dirs=INCLUDE_DIRS, | |
|
188 | extra_compile_args=["-DZSTD_MULTITHREAD"], | |
|
189 | ) | |
|
172 | 190 | |
|
173 |
DEFINE = re.compile(b |
|
|
191 | DEFINE = re.compile(b"^\\#define ([a-zA-Z0-9_]+) ") | |
|
174 | 192 | |
|
175 | 193 | sources = [] |
|
176 | 194 | |
@@ -181,27 +199,27 for header in HEADERS: | |||
|
181 | 199 | |
|
182 | 200 | # #define's are effectively erased as part of going through preprocessor. |
|
183 | 201 | # So perform a manual pass to re-add those to the cdef source. |
|
184 |
with open(header, |
|
|
202 | with open(header, "rb") as fh: | |
|
185 | 203 | for line in fh: |
|
186 | 204 | line = line.strip() |
|
187 | 205 | m = DEFINE.match(line) |
|
188 | 206 | if not m: |
|
189 | 207 | continue |
|
190 | 208 | |
|
191 |
if m.group(1) == b |
|
|
209 | if m.group(1) == b"ZSTD_STATIC_LINKING_ONLY": | |
|
192 | 210 | continue |
|
193 | 211 | |
|
194 | 212 | # The parser doesn't like some constants with complex values. |
|
195 |
if m.group(1) in (b |
|
|
213 | if m.group(1) in (b"ZSTD_LIB_VERSION", b"ZSTD_VERSION_STRING"): | |
|
196 | 214 | continue |
|
197 | 215 | |
|
198 | 216 | # The ... is magic syntax by the cdef parser to resolve the |
|
199 | 217 | # value at compile time. |
|
200 |
sources.append(m.group(0) + b |
|
|
218 | sources.append(m.group(0) + b" ...") | |
|
201 | 219 | |
|
202 |
cdeflines = b |
|
|
220 | cdeflines = b"\n".join(sources).splitlines() | |
|
203 | 221 | cdeflines = [l for l in cdeflines if l.strip()] |
|
204 |
ffi.cdef(b |
|
|
222 | ffi.cdef(b"\n".join(cdeflines).decode("latin1")) | |
|
205 | 223 | |
|
206 |
if __name__ == |
|
|
224 | if __name__ == "__main__": | |
|
207 | 225 | ffi.compile() |
@@ -16,7 +16,7 from setuptools import setup | |||
|
16 | 16 | # (like memoryview). |
|
17 | 17 | # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid |
|
18 | 18 | # garbage collection pitfalls. |
|
19 |
MINIMUM_CFFI_VERSION = |
|
|
19 | MINIMUM_CFFI_VERSION = "1.11" | |
|
20 | 20 | |
|
21 | 21 | try: |
|
22 | 22 | import cffi |
@@ -26,9 +26,11 try: | |||
|
26 | 26 | # out the CFFI version here and reject CFFI if it is too old. |
|
27 | 27 | cffi_version = LooseVersion(cffi.__version__) |
|
28 | 28 | if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION): |
|
29 | print('CFFI 1.11 or newer required (%s found); ' | |
|
30 | 'not building CFFI backend' % cffi_version, | |
|
31 | file=sys.stderr) | |
|
29 | print( | |
|
30 | "CFFI 1.11 or newer required (%s found); " | |
|
31 | "not building CFFI backend" % cffi_version, | |
|
32 | file=sys.stderr, | |
|
33 | ) | |
|
32 | 34 | cffi = None |
|
33 | 35 | |
|
34 | 36 | except ImportError: |
@@ -40,73 +42,77 SUPPORT_LEGACY = False | |||
|
40 | 42 | SYSTEM_ZSTD = False |
|
41 | 43 | WARNINGS_AS_ERRORS = False |
|
42 | 44 | |
|
43 |
if os.environ.get( |
|
|
45 | if os.environ.get("ZSTD_WARNINGS_AS_ERRORS", ""): | |
|
44 | 46 | WARNINGS_AS_ERRORS = True |
|
45 | 47 | |
|
46 |
if |
|
|
48 | if "--legacy" in sys.argv: | |
|
47 | 49 | SUPPORT_LEGACY = True |
|
48 |
sys.argv.remove( |
|
|
50 | sys.argv.remove("--legacy") | |
|
49 | 51 | |
|
50 |
if |
|
|
52 | if "--system-zstd" in sys.argv: | |
|
51 | 53 | SYSTEM_ZSTD = True |
|
52 |
sys.argv.remove( |
|
|
54 | sys.argv.remove("--system-zstd") | |
|
53 | 55 | |
|
54 |
if |
|
|
56 | if "--warnings-as-errors" in sys.argv: | |
|
55 | 57 | WARNINGS_AS_ERRORS = True |
|
56 |
sys.argv.remove( |
|
|
58 | sys.argv.remove("--warning-as-errors") | |
|
57 | 59 | |
|
58 | 60 | # Code for obtaining the Extension instance is in its own module to |
|
59 | 61 | # facilitate reuse in other projects. |
|
60 | 62 | extensions = [ |
|
61 |
setup_zstd.get_c_extension( |
|
|
62 | support_legacy=SUPPORT_LEGACY, | |
|
63 | system_zstd=SYSTEM_ZSTD, | |
|
64 | warnings_as_errors=WARNINGS_AS_ERRORS), | |
|
63 | setup_zstd.get_c_extension( | |
|
64 | name="zstd", | |
|
65 | support_legacy=SUPPORT_LEGACY, | |
|
66 | system_zstd=SYSTEM_ZSTD, | |
|
67 | warnings_as_errors=WARNINGS_AS_ERRORS, | |
|
68 | ), | |
|
65 | 69 | ] |
|
66 | 70 | |
|
67 | 71 | install_requires = [] |
|
68 | 72 | |
|
69 | 73 | if cffi: |
|
70 | 74 | import make_cffi |
|
75 | ||
|
71 | 76 | extensions.append(make_cffi.ffi.distutils_extension()) |
|
72 |
install_requires.append( |
|
|
77 | install_requires.append("cffi>=%s" % MINIMUM_CFFI_VERSION) | |
|
73 | 78 | |
|
74 | 79 | version = None |
|
75 | 80 | |
|
76 |
with open( |
|
|
81 | with open("c-ext/python-zstandard.h", "r") as fh: | |
|
77 | 82 | for line in fh: |
|
78 |
if not line.startswith( |
|
|
83 | if not line.startswith("#define PYTHON_ZSTANDARD_VERSION"): | |
|
79 | 84 | continue |
|
80 | 85 | |
|
81 | 86 | version = line.split()[2][1:-1] |
|
82 | 87 | break |
|
83 | 88 | |
|
84 | 89 | if not version: |
|
85 |
raise Exception( |
|
|
86 | 'this should never happen') | |
|
90 | raise Exception("could not resolve package version; " "this should never happen") | |
|
87 | 91 | |
|
88 | 92 | setup( |
|
89 |
name= |
|
|
93 | name="zstandard", | |
|
90 | 94 | version=version, |
|
91 |
description= |
|
|
92 |
long_description=open( |
|
|
93 |
url= |
|
|
94 |
author= |
|
|
95 |
author_email= |
|
|
96 |
license= |
|
|
95 | description="Zstandard bindings for Python", | |
|
96 | long_description=open("README.rst", "r").read(), | |
|
97 | url="https://github.com/indygreg/python-zstandard", | |
|
98 | author="Gregory Szorc", | |
|
99 | author_email="gregory.szorc@gmail.com", | |
|
100 | license="BSD", | |
|
97 | 101 | classifiers=[ |
|
98 |
|
|
|
99 |
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
|
|
|
103 |
|
|
|
104 |
|
|
|
105 |
|
|
|
102 | "Development Status :: 4 - Beta", | |
|
103 | "Intended Audience :: Developers", | |
|
104 | "License :: OSI Approved :: BSD License", | |
|
105 | "Programming Language :: C", | |
|
106 | "Programming Language :: Python :: 2.7", | |
|
107 | "Programming Language :: Python :: 3.5", | |
|
108 | "Programming Language :: Python :: 3.6", | |
|
109 | "Programming Language :: Python :: 3.7", | |
|
110 | "Programming Language :: Python :: 3.8", | |
|
106 | 111 | ], |
|
107 |
keywords= |
|
|
108 |
packages=[ |
|
|
112 | keywords="zstandard zstd compression", | |
|
113 | packages=["zstandard"], | |
|
109 | 114 | ext_modules=extensions, |
|
110 |
test_suite= |
|
|
115 | test_suite="tests", | |
|
111 | 116 | install_requires=install_requires, |
|
117 | tests_require=["hypothesis"], | |
|
112 | 118 | ) |
@@ -10,97 +10,110 import os | |||
|
10 | 10 | from distutils.extension import Extension |
|
11 | 11 | |
|
12 | 12 | |
|
13 | zstd_sources = ['zstd/%s' % p for p in ( | |
|
14 | 'common/debug.c', | |
|
15 | 'common/entropy_common.c', | |
|
16 | 'common/error_private.c', | |
|
17 | 'common/fse_decompress.c', | |
|
18 | 'common/pool.c', | |
|
19 | 'common/threading.c', | |
|
20 |
|
|
|
21 |
|
|
|
22 | 'compress/fse_compress.c', | |
|
23 | 'compress/hist.c', | |
|
24 |
|
|
|
25 | 'compress/zstd_compress_literals.c', | |
|
26 |
|
|
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 | 'decompress/huf_decompress.c', | |
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 | 'dictBuilder/cover.c', | |
|
39 | 'dictBuilder/divsufsort.c', | |
|
40 |
|
|
|
41 |
|
|
|
42 | )] | |
|
13 | zstd_sources = [ | |
|
14 | "zstd/%s" % p | |
|
15 | for p in ( | |
|
16 | "common/debug.c", | |
|
17 | "common/entropy_common.c", | |
|
18 | "common/error_private.c", | |
|
19 | "common/fse_decompress.c", | |
|
20 | "common/pool.c", | |
|
21 | "common/threading.c", | |
|
22 | "common/xxhash.c", | |
|
23 | "common/zstd_common.c", | |
|
24 | "compress/fse_compress.c", | |
|
25 | "compress/hist.c", | |
|
26 | "compress/huf_compress.c", | |
|
27 | "compress/zstd_compress_literals.c", | |
|
28 | "compress/zstd_compress_sequences.c", | |
|
29 | "compress/zstd_compress.c", | |
|
30 | "compress/zstd_double_fast.c", | |
|
31 | "compress/zstd_fast.c", | |
|
32 | "compress/zstd_lazy.c", | |
|
33 | "compress/zstd_ldm.c", | |
|
34 | "compress/zstd_opt.c", | |
|
35 | "compress/zstdmt_compress.c", | |
|
36 | "decompress/huf_decompress.c", | |
|
37 | "decompress/zstd_ddict.c", | |
|
38 | "decompress/zstd_decompress.c", | |
|
39 | "decompress/zstd_decompress_block.c", | |
|
40 | "dictBuilder/cover.c", | |
|
41 | "dictBuilder/divsufsort.c", | |
|
42 | "dictBuilder/fastcover.c", | |
|
43 | "dictBuilder/zdict.c", | |
|
44 | ) | |
|
45 | ] | |
|
43 | 46 | |
|
44 |
zstd_sources_legacy = [ |
|
|
45 | 'deprecated/zbuff_common.c', | |
|
46 | 'deprecated/zbuff_compress.c', | |
|
47 |
|
|
|
48 | 'legacy/zstd_v01.c', | |
|
49 | 'legacy/zstd_v02.c', | |
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 | )] | |
|
47 | zstd_sources_legacy = [ | |
|
48 | "zstd/%s" % p | |
|
49 | for p in ( | |
|
50 | "deprecated/zbuff_common.c", | |
|
51 | "deprecated/zbuff_compress.c", | |
|
52 | "deprecated/zbuff_decompress.c", | |
|
53 | "legacy/zstd_v01.c", | |
|
54 | "legacy/zstd_v02.c", | |
|
55 | "legacy/zstd_v03.c", | |
|
56 | "legacy/zstd_v04.c", | |
|
57 | "legacy/zstd_v05.c", | |
|
58 | "legacy/zstd_v06.c", | |
|
59 | "legacy/zstd_v07.c", | |
|
60 | ) | |
|
61 | ] | |
|
56 | 62 | |
|
57 | 63 | zstd_includes = [ |
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
64 | "zstd", | |
|
65 | "zstd/common", | |
|
66 | "zstd/compress", | |
|
67 | "zstd/decompress", | |
|
68 | "zstd/dictBuilder", | |
|
63 | 69 | ] |
|
64 | 70 | |
|
65 | 71 | zstd_includes_legacy = [ |
|
66 |
|
|
|
67 |
|
|
|
72 | "zstd/deprecated", | |
|
73 | "zstd/legacy", | |
|
68 | 74 | ] |
|
69 | 75 | |
|
70 | 76 | ext_includes = [ |
|
71 |
|
|
|
72 |
|
|
|
77 | "c-ext", | |
|
78 | "zstd/common", | |
|
73 | 79 | ] |
|
74 | 80 | |
|
75 | 81 | ext_sources = [ |
|
76 |
|
|
|
77 |
|
|
|
78 | 'zstd.c', | |
|
79 | 'c-ext/bufferutil.c', | |
|
80 | 'c-ext/compressiondict.c', | |
|
81 | 'c-ext/compressobj.c', | |
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
|
|
|
86 |
|
|
|
87 |
|
|
|
88 |
|
|
|
89 |
|
|
|
90 |
|
|
|
91 |
|
|
|
92 |
|
|
|
93 |
|
|
|
94 | 'c-ext/frameparams.c', | |
|
82 | "zstd/common/error_private.c", | |
|
83 | "zstd/common/pool.c", | |
|
84 | "zstd/common/threading.c", | |
|
85 | "zstd/common/zstd_common.c", | |
|
86 | "zstd.c", | |
|
87 | "c-ext/bufferutil.c", | |
|
88 | "c-ext/compressiondict.c", | |
|
89 | "c-ext/compressobj.c", | |
|
90 | "c-ext/compressor.c", | |
|
91 | "c-ext/compressoriterator.c", | |
|
92 | "c-ext/compressionchunker.c", | |
|
93 | "c-ext/compressionparams.c", | |
|
94 | "c-ext/compressionreader.c", | |
|
95 | "c-ext/compressionwriter.c", | |
|
96 | "c-ext/constants.c", | |
|
97 | "c-ext/decompressobj.c", | |
|
98 | "c-ext/decompressor.c", | |
|
99 | "c-ext/decompressoriterator.c", | |
|
100 | "c-ext/decompressionreader.c", | |
|
101 | "c-ext/decompressionwriter.c", | |
|
102 | "c-ext/frameparams.c", | |
|
95 | 103 | ] |
|
96 | 104 | |
|
97 | 105 | zstd_depends = [ |
|
98 |
|
|
|
106 | "c-ext/python-zstandard.h", | |
|
99 | 107 | ] |
|
100 | 108 | |
|
101 | 109 | |
|
102 | def get_c_extension(support_legacy=False, system_zstd=False, name='zstd', | |
|
103 | warnings_as_errors=False, root=None): | |
|
110 | def get_c_extension( | |
|
111 | support_legacy=False, | |
|
112 | system_zstd=False, | |
|
113 | name="zstd", | |
|
114 | warnings_as_errors=False, | |
|
115 | root=None, | |
|
116 | ): | |
|
104 | 117 | """Obtain a distutils.extension.Extension for the C extension. |
|
105 | 118 | |
|
106 | 119 | ``support_legacy`` controls whether to compile in legacy zstd format support. |
@@ -125,17 +138,16 def get_c_extension(support_legacy=False | |||
|
125 | 138 | if not system_zstd: |
|
126 | 139 | sources.update([os.path.join(actual_root, p) for p in zstd_sources]) |
|
127 | 140 | if support_legacy: |
|
128 | sources.update([os.path.join(actual_root, p) | |
|
129 | for p in zstd_sources_legacy]) | |
|
141 | sources.update([os.path.join(actual_root, p) for p in zstd_sources_legacy]) | |
|
130 | 142 | sources = list(sources) |
|
131 | 143 | |
|
132 | 144 | include_dirs = set([os.path.join(actual_root, d) for d in ext_includes]) |
|
133 | 145 | if not system_zstd: |
|
134 | include_dirs.update([os.path.join(actual_root, d) | |
|
135 | for d in zstd_includes]) | |
|
146 | include_dirs.update([os.path.join(actual_root, d) for d in zstd_includes]) | |
|
136 | 147 | if support_legacy: |
|
137 |
include_dirs.update( |
|
|
138 |
|
|
|
148 | include_dirs.update( | |
|
149 | [os.path.join(actual_root, d) for d in zstd_includes_legacy] | |
|
150 | ) | |
|
139 | 151 | include_dirs = list(include_dirs) |
|
140 | 152 | |
|
141 | 153 | depends = [os.path.join(actual_root, p) for p in zstd_depends] |
@@ -143,41 +155,40 def get_c_extension(support_legacy=False | |||
|
143 | 155 | compiler = distutils.ccompiler.new_compiler() |
|
144 | 156 | |
|
145 | 157 | # Needed for MSVC. |
|
146 |
if hasattr(compiler, |
|
|
158 | if hasattr(compiler, "initialize"): | |
|
147 | 159 | compiler.initialize() |
|
148 | 160 | |
|
149 |
if compiler.compiler_type == |
|
|
150 |
compiler_type = |
|
|
151 |
elif compiler.compiler_type == |
|
|
152 |
compiler_type = |
|
|
153 |
elif compiler.compiler_type == |
|
|
154 |
compiler_type = |
|
|
161 | if compiler.compiler_type == "unix": | |
|
162 | compiler_type = "unix" | |
|
163 | elif compiler.compiler_type == "msvc": | |
|
164 | compiler_type = "msvc" | |
|
165 | elif compiler.compiler_type == "mingw32": | |
|
166 | compiler_type = "mingw32" | |
|
155 | 167 | else: |
|
156 |
raise Exception( |
|
|
157 | compiler.compiler_type) | |
|
168 | raise Exception("unhandled compiler type: %s" % compiler.compiler_type) | |
|
158 | 169 | |
|
159 |
extra_args = [ |
|
|
170 | extra_args = ["-DZSTD_MULTITHREAD"] | |
|
160 | 171 | |
|
161 | 172 | if not system_zstd: |
|
162 |
extra_args.append( |
|
|
163 |
extra_args.append( |
|
|
164 |
extra_args.append( |
|
|
173 | extra_args.append("-DZSTDLIB_VISIBILITY=") | |
|
174 | extra_args.append("-DZDICTLIB_VISIBILITY=") | |
|
175 | extra_args.append("-DZSTDERRORLIB_VISIBILITY=") | |
|
165 | 176 | |
|
166 |
if compiler_type == |
|
|
167 |
extra_args.append( |
|
|
177 | if compiler_type == "unix": | |
|
178 | extra_args.append("-fvisibility=hidden") | |
|
168 | 179 | |
|
169 | 180 | if not system_zstd and support_legacy: |
|
170 |
extra_args.append( |
|
|
181 | extra_args.append("-DZSTD_LEGACY_SUPPORT=1") | |
|
171 | 182 | |
|
172 | 183 | if warnings_as_errors: |
|
173 |
if compiler_type in ( |
|
|
174 |
extra_args.append( |
|
|
175 |
elif compiler_type == |
|
|
176 |
extra_args.append( |
|
|
184 | if compiler_type in ("unix", "mingw32"): | |
|
185 | extra_args.append("-Werror") | |
|
186 | elif compiler_type == "msvc": | |
|
187 | extra_args.append("/WX") | |
|
177 | 188 | else: |
|
178 | 189 | assert False |
|
179 | 190 | |
|
180 |
libraries = [ |
|
|
191 | libraries = ["zstd"] if system_zstd else [] | |
|
181 | 192 | |
|
182 | 193 | # Python 3.7 doesn't like absolute paths. So normalize to relative. |
|
183 | 194 | sources = [os.path.relpath(p, root) for p in sources] |
@@ -185,8 +196,11 def get_c_extension(support_legacy=False | |||
|
185 | 196 | depends = [os.path.relpath(p, root) for p in depends] |
|
186 | 197 | |
|
187 | 198 | # TODO compile with optimizations. |
|
188 |
return Extension( |
|
|
189 | include_dirs=include_dirs, | |
|
190 | depends=depends, | |
|
191 | extra_compile_args=extra_args, | |
|
192 | libraries=libraries) | |
|
199 | return Extension( | |
|
200 | name, | |
|
201 | sources, | |
|
202 | include_dirs=include_dirs, | |
|
203 | depends=depends, | |
|
204 | extra_compile_args=extra_args, | |
|
205 | libraries=libraries, | |
|
206 | ) |
@@ -3,6 +3,7 import inspect | |||
|
3 | 3 | import io |
|
4 | 4 | import os |
|
5 | 5 | import types |
|
6 | import unittest | |
|
6 | 7 | |
|
7 | 8 | try: |
|
8 | 9 | import hypothesis |
@@ -10,39 +11,46 except ImportError: | |||
|
10 | 11 | hypothesis = None |
|
11 | 12 | |
|
12 | 13 | |
|
14 | class TestCase(unittest.TestCase): | |
|
15 | if not getattr(unittest.TestCase, "assertRaisesRegex", False): | |
|
16 | assertRaisesRegex = unittest.TestCase.assertRaisesRegexp | |
|
17 | ||
|
18 | ||
|
13 | 19 | def make_cffi(cls): |
|
14 | 20 | """Decorator to add CFFI versions of each test method.""" |
|
15 | 21 | |
|
16 | 22 | # The module containing this class definition should |
|
17 | 23 | # `import zstandard as zstd`. Otherwise things may blow up. |
|
18 | 24 | mod = inspect.getmodule(cls) |
|
19 |
if not hasattr(mod, |
|
|
25 | if not hasattr(mod, "zstd"): | |
|
20 | 26 | raise Exception('test module does not contain "zstd" symbol') |
|
21 | 27 | |
|
22 |
if not hasattr(mod.zstd, |
|
|
23 | raise Exception('zstd symbol does not have "backend" attribute; did ' | |
|
24 | 'you `import zstandard as zstd`?') | |
|
28 | if not hasattr(mod.zstd, "backend"): | |
|
29 | raise Exception( | |
|
30 | 'zstd symbol does not have "backend" attribute; did ' | |
|
31 | "you `import zstandard as zstd`?" | |
|
32 | ) | |
|
25 | 33 | |
|
26 | 34 | # If `import zstandard` already chose the cffi backend, there is nothing |
|
27 | 35 | # for us to do: we only add the cffi variation if the default backend |
|
28 | 36 | # is the C extension. |
|
29 |
if mod.zstd.backend == |
|
|
37 | if mod.zstd.backend == "cffi": | |
|
30 | 38 | return cls |
|
31 | 39 | |
|
32 | 40 | old_env = dict(os.environ) |
|
33 |
os.environ[ |
|
|
41 | os.environ["PYTHON_ZSTANDARD_IMPORT_POLICY"] = "cffi" | |
|
34 | 42 | try: |
|
35 | 43 | try: |
|
36 |
mod_info = imp.find_module( |
|
|
37 |
mod = imp.load_module( |
|
|
44 | mod_info = imp.find_module("zstandard") | |
|
45 | mod = imp.load_module("zstandard_cffi", *mod_info) | |
|
38 | 46 | except ImportError: |
|
39 | 47 | return cls |
|
40 | 48 | finally: |
|
41 | 49 | os.environ.clear() |
|
42 | 50 | os.environ.update(old_env) |
|
43 | 51 | |
|
44 |
if mod.backend != |
|
|
45 |
raise Exception( |
|
|
52 | if mod.backend != "cffi": | |
|
53 | raise Exception("got the zstandard %s backend instead of cffi" % mod.backend) | |
|
46 | 54 | |
|
47 | 55 | # If CFFI version is available, dynamically construct test methods |
|
48 | 56 | # that use it. |
@@ -52,27 +60,31 def make_cffi(cls): | |||
|
52 | 60 | if not inspect.ismethod(fn) and not inspect.isfunction(fn): |
|
53 | 61 | continue |
|
54 | 62 | |
|
55 |
if not fn.__name__.startswith( |
|
|
63 | if not fn.__name__.startswith("test_"): | |
|
56 | 64 | continue |
|
57 | 65 | |
|
58 |
name = |
|
|
66 | name = "%s_cffi" % fn.__name__ | |
|
59 | 67 | |
|
60 | 68 | # Replace the "zstd" symbol with the CFFI module instance. Then copy |
|
61 | 69 | # the function object and install it in a new attribute. |
|
62 | 70 | if isinstance(fn, types.FunctionType): |
|
63 | 71 | globs = dict(fn.__globals__) |
|
64 |
globs[ |
|
|
65 |
new_fn = types.FunctionType( |
|
|
66 |
|
|
|
72 | globs["zstd"] = mod | |
|
73 | new_fn = types.FunctionType( | |
|
74 | fn.__code__, globs, name, fn.__defaults__, fn.__closure__ | |
|
75 | ) | |
|
67 | 76 | new_method = new_fn |
|
68 | 77 | else: |
|
69 | 78 | globs = dict(fn.__func__.func_globals) |
|
70 |
globs[ |
|
|
71 |
new_fn = types.FunctionType( |
|
|
72 |
|
|
|
73 | fn.__func__.func_closure) | |
|
74 | new_method = types.UnboundMethodType(new_fn, fn.im_self, | |
|
75 | fn.im_class) | |
|
79 | globs["zstd"] = mod | |
|
80 | new_fn = types.FunctionType( | |
|
81 | fn.__func__.func_code, | |
|
82 | globs, | |
|
83 | name, | |
|
84 | fn.__func__.func_defaults, | |
|
85 | fn.__func__.func_closure, | |
|
86 | ) | |
|
87 | new_method = types.UnboundMethodType(new_fn, fn.im_self, fn.im_class) | |
|
76 | 88 | |
|
77 | 89 | setattr(cls, name, new_method) |
|
78 | 90 | |
@@ -84,6 +96,7 class NonClosingBytesIO(io.BytesIO): | |||
|
84 | 96 | |
|
85 | 97 | This allows us to access written data after close(). |
|
86 | 98 | """ |
|
99 | ||
|
87 | 100 | def __init__(self, *args, **kwargs): |
|
88 | 101 | super(NonClosingBytesIO, self).__init__(*args, **kwargs) |
|
89 | 102 | self._saved_buffer = None |
@@ -135,7 +148,7 def random_input_data(): | |||
|
135 | 148 | dirs[:] = list(sorted(dirs)) |
|
136 | 149 | for f in sorted(files): |
|
137 | 150 | try: |
|
138 |
with open(os.path.join(root, f), |
|
|
151 | with open(os.path.join(root, f), "rb") as fh: | |
|
139 | 152 | data = fh.read() |
|
140 | 153 | if data: |
|
141 | 154 | _source_files.append(data) |
@@ -154,11 +167,11 def random_input_data(): | |||
|
154 | 167 | |
|
155 | 168 | def generate_samples(): |
|
156 | 169 | inputs = [ |
|
157 |
b |
|
|
158 |
b |
|
|
159 |
b |
|
|
160 |
b |
|
|
161 |
b |
|
|
170 | b"foo", | |
|
171 | b"bar", | |
|
172 | b"abcdef", | |
|
173 | b"sometext", | |
|
174 | b"baz", | |
|
162 | 175 | ] |
|
163 | 176 | |
|
164 | 177 | samples = [] |
@@ -173,13 +186,12 def generate_samples(): | |||
|
173 | 186 | |
|
174 | 187 | if hypothesis: |
|
175 | 188 | default_settings = hypothesis.settings(deadline=10000) |
|
176 |
hypothesis.settings.register_profile( |
|
|
189 | hypothesis.settings.register_profile("default", default_settings) | |
|
177 | 190 | |
|
178 | 191 | ci_settings = hypothesis.settings(deadline=20000, max_examples=1000) |
|
179 |
hypothesis.settings.register_profile( |
|
|
192 | hypothesis.settings.register_profile("ci", ci_settings) | |
|
180 | 193 | |
|
181 | 194 | expensive_settings = hypothesis.settings(deadline=None, max_examples=10000) |
|
182 |
hypothesis.settings.register_profile( |
|
|
195 | hypothesis.settings.register_profile("expensive", expensive_settings) | |
|
183 | 196 | |
|
184 | hypothesis.settings.load_profile( | |
|
185 | os.environ.get('HYPOTHESIS_PROFILE', 'default')) | |
|
197 | hypothesis.settings.load_profile(os.environ.get("HYPOTHESIS_PROFILE", "default")) |
@@ -3,104 +3,114 import unittest | |||
|
3 | 3 | |
|
4 | 4 | import zstandard as zstd |
|
5 | 5 | |
|
6 | ss = struct.Struct('=QQ') | |
|
6 | from .common import TestCase | |
|
7 | ||
|
8 | ss = struct.Struct("=QQ") | |
|
7 | 9 | |
|
8 | 10 | |
|
9 |
class TestBufferWithSegments( |
|
|
11 | class TestBufferWithSegments(TestCase): | |
|
10 | 12 | def test_arguments(self): |
|
11 |
if not hasattr(zstd, |
|
|
12 |
self.skipTest( |
|
|
13 | if not hasattr(zstd, "BufferWithSegments"): | |
|
14 | self.skipTest("BufferWithSegments not available") | |
|
13 | 15 | |
|
14 | 16 | with self.assertRaises(TypeError): |
|
15 | 17 | zstd.BufferWithSegments() |
|
16 | 18 | |
|
17 | 19 | with self.assertRaises(TypeError): |
|
18 |
zstd.BufferWithSegments(b |
|
|
20 | zstd.BufferWithSegments(b"foo") | |
|
19 | 21 | |
|
20 | 22 | # Segments data should be a multiple of 16. |
|
21 | with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'): | |
|
22 | zstd.BufferWithSegments(b'foo', b'\x00\x00') | |
|
23 | with self.assertRaisesRegex( | |
|
24 | ValueError, "segments array size is not a multiple of 16" | |
|
25 | ): | |
|
26 | zstd.BufferWithSegments(b"foo", b"\x00\x00") | |
|
23 | 27 | |
|
24 | 28 | def test_invalid_offset(self): |
|
25 |
if not hasattr(zstd, |
|
|
26 |
self.skipTest( |
|
|
29 | if not hasattr(zstd, "BufferWithSegments"): | |
|
30 | self.skipTest("BufferWithSegments not available") | |
|
27 | 31 | |
|
28 | with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'): | |
|
29 | zstd.BufferWithSegments(b'foo', ss.pack(0, 4)) | |
|
32 | with self.assertRaisesRegex( | |
|
33 | ValueError, "offset within segments array references memory" | |
|
34 | ): | |
|
35 | zstd.BufferWithSegments(b"foo", ss.pack(0, 4)) | |
|
30 | 36 | |
|
31 | 37 | def test_invalid_getitem(self): |
|
32 |
if not hasattr(zstd, |
|
|
33 |
self.skipTest( |
|
|
38 | if not hasattr(zstd, "BufferWithSegments"): | |
|
39 | self.skipTest("BufferWithSegments not available") | |
|
34 | 40 | |
|
35 |
b = zstd.BufferWithSegments(b |
|
|
41 | b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
36 | 42 | |
|
37 |
with self.assertRaisesRegex |
|
|
43 | with self.assertRaisesRegex(IndexError, "offset must be non-negative"): | |
|
38 | 44 | test = b[-10] |
|
39 | 45 | |
|
40 |
with self.assertRaisesRegex |
|
|
46 | with self.assertRaisesRegex(IndexError, "offset must be less than 1"): | |
|
41 | 47 | test = b[1] |
|
42 | 48 | |
|
43 |
with self.assertRaisesRegex |
|
|
49 | with self.assertRaisesRegex(IndexError, "offset must be less than 1"): | |
|
44 | 50 | test = b[2] |
|
45 | 51 | |
|
46 | 52 | def test_single(self): |
|
47 |
if not hasattr(zstd, |
|
|
48 |
self.skipTest( |
|
|
53 | if not hasattr(zstd, "BufferWithSegments"): | |
|
54 | self.skipTest("BufferWithSegments not available") | |
|
49 | 55 | |
|
50 |
b = zstd.BufferWithSegments(b |
|
|
56 | b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
51 | 57 | self.assertEqual(len(b), 1) |
|
52 | 58 | self.assertEqual(b.size, 3) |
|
53 |
self.assertEqual(b.tobytes(), b |
|
|
59 | self.assertEqual(b.tobytes(), b"foo") | |
|
54 | 60 | |
|
55 | 61 | self.assertEqual(len(b[0]), 3) |
|
56 | 62 | self.assertEqual(b[0].offset, 0) |
|
57 |
self.assertEqual(b[0].tobytes(), b |
|
|
63 | self.assertEqual(b[0].tobytes(), b"foo") | |
|
58 | 64 | |
|
59 | 65 | def test_multiple(self): |
|
60 |
if not hasattr(zstd, |
|
|
61 |
self.skipTest( |
|
|
66 | if not hasattr(zstd, "BufferWithSegments"): | |
|
67 | self.skipTest("BufferWithSegments not available") | |
|
62 | 68 | |
|
63 |
b = zstd.BufferWithSegments( |
|
|
64 | ss.pack(3, 4), | |
|
65 | ss.pack(7, 5)])) | |
|
69 | b = zstd.BufferWithSegments( | |
|
70 | b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)]) | |
|
71 | ) | |
|
66 | 72 | self.assertEqual(len(b), 3) |
|
67 | 73 | self.assertEqual(b.size, 12) |
|
68 |
self.assertEqual(b.tobytes(), b |
|
|
74 | self.assertEqual(b.tobytes(), b"foofooxfooxy") | |
|
69 | 75 | |
|
70 |
self.assertEqual(b[0].tobytes(), b |
|
|
71 |
self.assertEqual(b[1].tobytes(), b |
|
|
72 |
self.assertEqual(b[2].tobytes(), b |
|
|
76 | self.assertEqual(b[0].tobytes(), b"foo") | |
|
77 | self.assertEqual(b[1].tobytes(), b"foox") | |
|
78 | self.assertEqual(b[2].tobytes(), b"fooxy") | |
|
73 | 79 | |
|
74 | 80 | |
|
75 |
class TestBufferWithSegmentsCollection( |
|
|
81 | class TestBufferWithSegmentsCollection(TestCase): | |
|
76 | 82 | def test_empty_constructor(self): |
|
77 |
if not hasattr(zstd, |
|
|
78 |
self.skipTest( |
|
|
83 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
84 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
79 | 85 | |
|
80 |
with self.assertRaisesRegex |
|
|
86 | with self.assertRaisesRegex(ValueError, "must pass at least 1 argument"): | |
|
81 | 87 | zstd.BufferWithSegmentsCollection() |
|
82 | 88 | |
|
83 | 89 | def test_argument_validation(self): |
|
84 |
if not hasattr(zstd, |
|
|
85 |
self.skipTest( |
|
|
90 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
91 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
86 | 92 | |
|
87 |
with self.assertRaisesRegex |
|
|
93 | with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): | |
|
88 | 94 | zstd.BufferWithSegmentsCollection(None) |
|
89 | 95 | |
|
90 |
with self.assertRaisesRegex |
|
|
91 |
zstd.BufferWithSegmentsCollection( |
|
|
92 | None) | |
|
96 | with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): | |
|
97 | zstd.BufferWithSegmentsCollection( | |
|
98 | zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None | |
|
99 | ) | |
|
93 | 100 | |
|
94 | with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'): | |
|
95 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b'')) | |
|
101 | with self.assertRaisesRegex( | |
|
102 | ValueError, "ZstdBufferWithSegments cannot be empty" | |
|
103 | ): | |
|
104 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b"", b"")) | |
|
96 | 105 | |
|
97 | 106 | def test_length(self): |
|
98 |
if not hasattr(zstd, |
|
|
99 |
self.skipTest( |
|
|
107 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
108 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
100 | 109 | |
|
101 |
b1 = zstd.BufferWithSegments(b |
|
|
102 |
b2 = zstd.BufferWithSegments( |
|
|
103 | ss.pack(3, 3)])) | |
|
110 | b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
111 | b2 = zstd.BufferWithSegments( | |
|
112 | b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) | |
|
113 | ) | |
|
104 | 114 | |
|
105 | 115 | c = zstd.BufferWithSegmentsCollection(b1) |
|
106 | 116 | self.assertEqual(len(c), 1) |
@@ -115,21 +125,22 class TestBufferWithSegmentsCollection(u | |||
|
115 | 125 | self.assertEqual(c.size(), 9) |
|
116 | 126 | |
|
117 | 127 | def test_getitem(self): |
|
118 |
if not hasattr(zstd, |
|
|
119 |
self.skipTest( |
|
|
128 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
129 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
120 | 130 | |
|
121 |
b1 = zstd.BufferWithSegments(b |
|
|
122 |
b2 = zstd.BufferWithSegments( |
|
|
123 | ss.pack(3, 3)])) | |
|
131 | b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
132 | b2 = zstd.BufferWithSegments( | |
|
133 | b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) | |
|
134 | ) | |
|
124 | 135 | |
|
125 | 136 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
126 | 137 | |
|
127 |
with self.assertRaisesRegex |
|
|
138 | with self.assertRaisesRegex(IndexError, "offset must be less than 3"): | |
|
128 | 139 | c[3] |
|
129 | 140 | |
|
130 |
with self.assertRaisesRegex |
|
|
141 | with self.assertRaisesRegex(IndexError, "offset must be less than 3"): | |
|
131 | 142 | c[4] |
|
132 | 143 | |
|
133 |
self.assertEqual(c[0].tobytes(), b |
|
|
134 |
self.assertEqual(c[1].tobytes(), b |
|
|
135 |
self.assertEqual(c[2].tobytes(), b |
|
|
144 | self.assertEqual(c[0].tobytes(), b"foo") | |
|
145 | self.assertEqual(c[1].tobytes(), b"bar") | |
|
146 | self.assertEqual(c[2].tobytes(), b"baz") |
This diff has been collapsed as it changes many lines, (811 lines changed) Show them Hide them | |||
@@ -13,6 +13,7 from .common import ( | |||
|
13 | 13 | make_cffi, |
|
14 | 14 | NonClosingBytesIO, |
|
15 | 15 | OpCountingBytesIO, |
|
16 | TestCase, | |
|
16 | 17 | ) |
|
17 | 18 | |
|
18 | 19 | |
@@ -23,14 +24,13 else: | |||
|
23 | 24 | |
|
24 | 25 | |
|
25 | 26 | def multithreaded_chunk_size(level, source_size=0): |
|
26 | params = zstd.ZstdCompressionParameters.from_level(level, | |
|
27 | source_size=source_size) | |
|
27 | params = zstd.ZstdCompressionParameters.from_level(level, source_size=source_size) | |
|
28 | 28 | |
|
29 | 29 | return 1 << (params.window_log + 2) |
|
30 | 30 | |
|
31 | 31 | |
|
32 | 32 | @make_cffi |
|
33 |
class TestCompressor( |
|
|
33 | class TestCompressor(TestCase): | |
|
34 | 34 | def test_level_bounds(self): |
|
35 | 35 | with self.assertRaises(ValueError): |
|
36 | 36 | zstd.ZstdCompressor(level=23) |
@@ -41,11 +41,11 class TestCompressor(unittest.TestCase): | |||
|
41 | 41 | |
|
42 | 42 | |
|
43 | 43 | @make_cffi |
|
44 |
class TestCompressor_compress( |
|
|
44 | class TestCompressor_compress(TestCase): | |
|
45 | 45 | def test_compress_empty(self): |
|
46 | 46 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
47 |
result = cctx.compress(b |
|
|
48 |
self.assertEqual(result, b |
|
|
47 | result = cctx.compress(b"") | |
|
48 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
49 | 49 | params = zstd.get_frame_parameters(result) |
|
50 | 50 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
51 | 51 | self.assertEqual(params.window_size, 524288) |
@@ -53,21 +53,21 class TestCompressor_compress(unittest.T | |||
|
53 | 53 | self.assertFalse(params.has_checksum, 0) |
|
54 | 54 | |
|
55 | 55 | cctx = zstd.ZstdCompressor() |
|
56 |
result = cctx.compress(b |
|
|
57 |
self.assertEqual(result, b |
|
|
56 | result = cctx.compress(b"") | |
|
57 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00") | |
|
58 | 58 | params = zstd.get_frame_parameters(result) |
|
59 | 59 | self.assertEqual(params.content_size, 0) |
|
60 | 60 | |
|
61 | 61 | def test_input_types(self): |
|
62 | 62 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
63 |
expected = b |
|
|
63 | expected = b"\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f" | |
|
64 | 64 | |
|
65 | 65 | mutable_array = bytearray(3) |
|
66 |
mutable_array[:] = b |
|
|
66 | mutable_array[:] = b"foo" | |
|
67 | 67 | |
|
68 | 68 | sources = [ |
|
69 |
memoryview(b |
|
|
70 |
bytearray(b |
|
|
69 | memoryview(b"foo"), | |
|
70 | bytearray(b"foo"), | |
|
71 | 71 | mutable_array, |
|
72 | 72 | ] |
|
73 | 73 | |
@@ -77,43 +77,46 class TestCompressor_compress(unittest.T | |||
|
77 | 77 | def test_compress_large(self): |
|
78 | 78 | chunks = [] |
|
79 | 79 | for i in range(255): |
|
80 |
chunks.append(struct.Struct( |
|
|
80 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
|
81 | 81 | |
|
82 | 82 | cctx = zstd.ZstdCompressor(level=3, write_content_size=False) |
|
83 |
result = cctx.compress(b |
|
|
83 | result = cctx.compress(b"".join(chunks)) | |
|
84 | 84 | self.assertEqual(len(result), 999) |
|
85 |
self.assertEqual(result[0:4], b |
|
|
85 | self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") | |
|
86 | 86 | |
|
87 | 87 | # This matches the test for read_to_iter() below. |
|
88 | 88 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
89 |
result = cctx.compress(b |
|
|
90 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' | |
|
91 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' | |
|
92 | b'\x02\x09\x00\x00\x6f') | |
|
89 | result = cctx.compress(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b"o") | |
|
90 | self.assertEqual( | |
|
91 | result, | |
|
92 | b"\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00" | |
|
93 | b"\x10\x66\x66\x01\x00\xfb\xff\x39\xc0" | |
|
94 | b"\x02\x09\x00\x00\x6f", | |
|
95 | ) | |
|
93 | 96 | |
|
94 | 97 | def test_negative_level(self): |
|
95 | 98 | cctx = zstd.ZstdCompressor(level=-4) |
|
96 |
result = cctx.compress(b |
|
|
99 | result = cctx.compress(b"foo" * 256) | |
|
97 | 100 | |
|
98 | 101 | def test_no_magic(self): |
|
99 | params = zstd.ZstdCompressionParameters.from_level( | |
|
100 | 1, format=zstd.FORMAT_ZSTD1) | |
|
102 | params = zstd.ZstdCompressionParameters.from_level(1, format=zstd.FORMAT_ZSTD1) | |
|
101 | 103 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
102 |
magic = cctx.compress(b |
|
|
104 | magic = cctx.compress(b"foobar") | |
|
103 | 105 | |
|
104 | 106 | params = zstd.ZstdCompressionParameters.from_level( |
|
105 |
1, format=zstd.FORMAT_ZSTD1_MAGICLESS |
|
|
107 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS | |
|
108 | ) | |
|
106 | 109 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
107 |
no_magic = cctx.compress(b |
|
|
110 | no_magic = cctx.compress(b"foobar") | |
|
108 | 111 | |
|
109 |
self.assertEqual(magic[0:4], b |
|
|
112 | self.assertEqual(magic[0:4], b"\x28\xb5\x2f\xfd") | |
|
110 | 113 | self.assertEqual(magic[4:], no_magic) |
|
111 | 114 | |
|
112 | 115 | def test_write_checksum(self): |
|
113 | 116 | cctx = zstd.ZstdCompressor(level=1) |
|
114 |
no_checksum = cctx.compress(b |
|
|
117 | no_checksum = cctx.compress(b"foobar") | |
|
115 | 118 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
116 |
with_checksum = cctx.compress(b |
|
|
119 | with_checksum = cctx.compress(b"foobar") | |
|
117 | 120 | |
|
118 | 121 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
119 | 122 | |
@@ -125,9 +128,9 class TestCompressor_compress(unittest.T | |||
|
125 | 128 | |
|
126 | 129 | def test_write_content_size(self): |
|
127 | 130 | cctx = zstd.ZstdCompressor(level=1) |
|
128 |
with_size = cctx.compress(b |
|
|
131 | with_size = cctx.compress(b"foobar" * 256) | |
|
129 | 132 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
130 |
no_size = cctx.compress(b |
|
|
133 | no_size = cctx.compress(b"foobar" * 256) | |
|
131 | 134 | |
|
132 | 135 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
133 | 136 | |
@@ -139,17 +142,17 class TestCompressor_compress(unittest.T | |||
|
139 | 142 | def test_no_dict_id(self): |
|
140 | 143 | samples = [] |
|
141 | 144 | for i in range(128): |
|
142 |
samples.append(b |
|
|
143 |
samples.append(b |
|
|
144 |
samples.append(b |
|
|
145 | samples.append(b"foo" * 64) | |
|
146 | samples.append(b"bar" * 64) | |
|
147 | samples.append(b"foobar" * 64) | |
|
145 | 148 | |
|
146 | 149 | d = zstd.train_dictionary(1024, samples) |
|
147 | 150 | |
|
148 | 151 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
149 |
with_dict_id = cctx.compress(b |
|
|
152 | with_dict_id = cctx.compress(b"foobarfoobar") | |
|
150 | 153 | |
|
151 | 154 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
152 |
no_dict_id = cctx.compress(b |
|
|
155 | no_dict_id = cctx.compress(b"foobarfoobar") | |
|
153 | 156 | |
|
154 | 157 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) |
|
155 | 158 | |
@@ -161,23 +164,23 class TestCompressor_compress(unittest.T | |||
|
161 | 164 | def test_compress_dict_multiple(self): |
|
162 | 165 | samples = [] |
|
163 | 166 | for i in range(128): |
|
164 |
samples.append(b |
|
|
165 |
samples.append(b |
|
|
166 |
samples.append(b |
|
|
167 | samples.append(b"foo" * 64) | |
|
168 | samples.append(b"bar" * 64) | |
|
169 | samples.append(b"foobar" * 64) | |
|
167 | 170 | |
|
168 | 171 | d = zstd.train_dictionary(8192, samples) |
|
169 | 172 | |
|
170 | 173 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
171 | 174 | |
|
172 | 175 | for i in range(32): |
|
173 |
cctx.compress(b |
|
|
176 | cctx.compress(b"foo bar foobar foo bar foobar") | |
|
174 | 177 | |
|
175 | 178 | def test_dict_precompute(self): |
|
176 | 179 | samples = [] |
|
177 | 180 | for i in range(128): |
|
178 |
samples.append(b |
|
|
179 |
samples.append(b |
|
|
180 |
samples.append(b |
|
|
181 | samples.append(b"foo" * 64) | |
|
182 | samples.append(b"bar" * 64) | |
|
183 | samples.append(b"foobar" * 64) | |
|
181 | 184 | |
|
182 | 185 | d = zstd.train_dictionary(8192, samples) |
|
183 | 186 | d.precompute_compress(level=1) |
@@ -185,11 +188,11 class TestCompressor_compress(unittest.T | |||
|
185 | 188 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
186 | 189 | |
|
187 | 190 | for i in range(32): |
|
188 |
cctx.compress(b |
|
|
191 | cctx.compress(b"foo bar foobar foo bar foobar") | |
|
189 | 192 | |
|
190 | 193 | def test_multithreaded(self): |
|
191 | 194 | chunk_size = multithreaded_chunk_size(1) |
|
192 |
source = b |
|
|
195 | source = b"".join([b"x" * chunk_size, b"y" * chunk_size]) | |
|
193 | 196 | |
|
194 | 197 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
|
195 | 198 | compressed = cctx.compress(source) |
@@ -205,73 +208,72 class TestCompressor_compress(unittest.T | |||
|
205 | 208 | def test_multithreaded_dict(self): |
|
206 | 209 | samples = [] |
|
207 | 210 | for i in range(128): |
|
208 |
samples.append(b |
|
|
209 |
samples.append(b |
|
|
210 |
samples.append(b |
|
|
211 | samples.append(b"foo" * 64) | |
|
212 | samples.append(b"bar" * 64) | |
|
213 | samples.append(b"foobar" * 64) | |
|
211 | 214 | |
|
212 | 215 | d = zstd.train_dictionary(1024, samples) |
|
213 | 216 | |
|
214 | 217 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) |
|
215 | 218 | |
|
216 |
result = cctx.compress(b |
|
|
217 |
params = zstd.get_frame_parameters(result) |
|
|
218 |
self.assertEqual(params.content_size, 3) |
|
|
219 | result = cctx.compress(b"foo") | |
|
220 | params = zstd.get_frame_parameters(result) | |
|
221 | self.assertEqual(params.content_size, 3) | |
|
219 | 222 | self.assertEqual(params.dict_id, d.dict_id()) |
|
220 | 223 | |
|
221 |
self.assertEqual( |
|
|
222 | b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00' | |
|
223 | b'\x66\x6f\x6f') | |
|
224 | self.assertEqual( | |
|
225 | result, | |
|
226 | b"\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00" b"\x66\x6f\x6f", | |
|
227 | ) | |
|
224 | 228 | |
|
225 | 229 | def test_multithreaded_compression_params(self): |
|
226 | 230 | params = zstd.ZstdCompressionParameters.from_level(0, threads=2) |
|
227 | 231 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
228 | 232 | |
|
229 |
result = cctx.compress(b |
|
|
230 |
params = zstd.get_frame_parameters(result) |
|
|
231 |
self.assertEqual(params.content_size, 3) |
|
|
233 | result = cctx.compress(b"foo") | |
|
234 | params = zstd.get_frame_parameters(result) | |
|
235 | self.assertEqual(params.content_size, 3) | |
|
232 | 236 | |
|
233 | self.assertEqual(result, | |
|
234 | b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f') | |
|
237 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f") | |
|
235 | 238 | |
|
236 | 239 | |
|
237 | 240 | @make_cffi |
|
238 |
class TestCompressor_compressobj( |
|
|
241 | class TestCompressor_compressobj(TestCase): | |
|
239 | 242 | def test_compressobj_empty(self): |
|
240 | 243 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
241 | 244 | cobj = cctx.compressobj() |
|
242 |
self.assertEqual(cobj.compress(b |
|
|
243 | self.assertEqual(cobj.flush(), | |
|
244 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
|
245 | self.assertEqual(cobj.compress(b""), b"") | |
|
246 | self.assertEqual(cobj.flush(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
245 | 247 | |
|
246 | 248 | def test_input_types(self): |
|
247 |
expected = b |
|
|
249 | expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" | |
|
248 | 250 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
249 | 251 | |
|
250 | 252 | mutable_array = bytearray(3) |
|
251 |
mutable_array[:] = b |
|
|
253 | mutable_array[:] = b"foo" | |
|
252 | 254 | |
|
253 | 255 | sources = [ |
|
254 |
memoryview(b |
|
|
255 |
bytearray(b |
|
|
256 | memoryview(b"foo"), | |
|
257 | bytearray(b"foo"), | |
|
256 | 258 | mutable_array, |
|
257 | 259 | ] |
|
258 | 260 | |
|
259 | 261 | for source in sources: |
|
260 | 262 | cobj = cctx.compressobj() |
|
261 |
self.assertEqual(cobj.compress(source), b |
|
|
263 | self.assertEqual(cobj.compress(source), b"") | |
|
262 | 264 | self.assertEqual(cobj.flush(), expected) |
|
263 | 265 | |
|
264 | 266 | def test_compressobj_large(self): |
|
265 | 267 | chunks = [] |
|
266 | 268 | for i in range(255): |
|
267 |
chunks.append(struct.Struct( |
|
|
269 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
|
268 | 270 | |
|
269 | 271 | cctx = zstd.ZstdCompressor(level=3) |
|
270 | 272 | cobj = cctx.compressobj() |
|
271 | 273 | |
|
272 |
result = cobj.compress(b |
|
|
274 | result = cobj.compress(b"".join(chunks)) + cobj.flush() | |
|
273 | 275 | self.assertEqual(len(result), 999) |
|
274 |
self.assertEqual(result[0:4], b |
|
|
276 | self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") | |
|
275 | 277 | |
|
276 | 278 | params = zstd.get_frame_parameters(result) |
|
277 | 279 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
@@ -282,10 +284,10 class TestCompressor_compressobj(unittes | |||
|
282 | 284 | def test_write_checksum(self): |
|
283 | 285 | cctx = zstd.ZstdCompressor(level=1) |
|
284 | 286 | cobj = cctx.compressobj() |
|
285 |
no_checksum = cobj.compress(b |
|
|
287 | no_checksum = cobj.compress(b"foobar") + cobj.flush() | |
|
286 | 288 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
287 | 289 | cobj = cctx.compressobj() |
|
288 |
with_checksum = cobj.compress(b |
|
|
290 | with_checksum = cobj.compress(b"foobar") + cobj.flush() | |
|
289 | 291 | |
|
290 | 292 | no_params = zstd.get_frame_parameters(no_checksum) |
|
291 | 293 | with_params = zstd.get_frame_parameters(with_checksum) |
@@ -300,11 +302,11 class TestCompressor_compressobj(unittes | |||
|
300 | 302 | |
|
301 | 303 | def test_write_content_size(self): |
|
302 | 304 | cctx = zstd.ZstdCompressor(level=1) |
|
303 |
cobj = cctx.compressobj(size=len(b |
|
|
304 |
with_size = cobj.compress(b |
|
|
305 | cobj = cctx.compressobj(size=len(b"foobar" * 256)) | |
|
306 | with_size = cobj.compress(b"foobar" * 256) + cobj.flush() | |
|
305 | 307 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
306 |
cobj = cctx.compressobj(size=len(b |
|
|
307 |
no_size = cobj.compress(b |
|
|
308 | cobj = cctx.compressobj(size=len(b"foobar" * 256)) | |
|
309 | no_size = cobj.compress(b"foobar" * 256) + cobj.flush() | |
|
308 | 310 | |
|
309 | 311 | no_params = zstd.get_frame_parameters(no_size) |
|
310 | 312 | with_params = zstd.get_frame_parameters(with_size) |
@@ -321,48 +323,53 class TestCompressor_compressobj(unittes | |||
|
321 | 323 | cctx = zstd.ZstdCompressor() |
|
322 | 324 | cobj = cctx.compressobj() |
|
323 | 325 | |
|
324 |
cobj.compress(b |
|
|
326 | cobj.compress(b"foo") | |
|
325 | 327 | cobj.flush() |
|
326 | 328 | |
|
327 | with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'): | |
|
328 | cobj.compress(b'foo') | |
|
329 | with self.assertRaisesRegex( | |
|
330 | zstd.ZstdError, r"cannot call compress\(\) after compressor" | |
|
331 | ): | |
|
332 | cobj.compress(b"foo") | |
|
329 | 333 | |
|
330 | with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): | |
|
334 | with self.assertRaisesRegex( | |
|
335 | zstd.ZstdError, "compressor object already finished" | |
|
336 | ): | |
|
331 | 337 | cobj.flush() |
|
332 | 338 | |
|
333 | 339 | def test_flush_block_repeated(self): |
|
334 | 340 | cctx = zstd.ZstdCompressor(level=1) |
|
335 | 341 | cobj = cctx.compressobj() |
|
336 | 342 | |
|
337 |
self.assertEqual(cobj.compress(b |
|
|
338 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
339 | b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') | |
|
340 | self.assertEqual(cobj.compress(b'bar'), b'') | |
|
343 | self.assertEqual(cobj.compress(b"foo"), b"") | |
|
344 | self.assertEqual( | |
|
345 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
346 | b"\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo", | |
|
347 | ) | |
|
348 | self.assertEqual(cobj.compress(b"bar"), b"") | |
|
341 | 349 | # 3 byte header plus content. |
|
342 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
343 | b'\x18\x00\x00bar') | |
|
344 | self.assertEqual(cobj.flush(), b'\x01\x00\x00') | |
|
350 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"\x18\x00\x00bar") | |
|
351 | self.assertEqual(cobj.flush(), b"\x01\x00\x00") | |
|
345 | 352 | |
|
346 | 353 | def test_flush_empty_block(self): |
|
347 | 354 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
348 | 355 | cobj = cctx.compressobj() |
|
349 | 356 | |
|
350 |
cobj.compress(b |
|
|
357 | cobj.compress(b"foobar") | |
|
351 | 358 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
352 | 359 | # No-op if no block is active (this is internal to zstd). |
|
353 |
self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b |
|
|
360 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"") | |
|
354 | 361 | |
|
355 | 362 | trailing = cobj.flush() |
|
356 | 363 | # 3 bytes block header + 4 bytes frame checksum |
|
357 | 364 | self.assertEqual(len(trailing), 7) |
|
358 | 365 | header = trailing[0:3] |
|
359 |
self.assertEqual(header, b |
|
|
366 | self.assertEqual(header, b"\x01\x00\x00") | |
|
360 | 367 | |
|
361 | 368 | def test_multithreaded(self): |
|
362 | 369 | source = io.BytesIO() |
|
363 |
source.write(b |
|
|
364 |
source.write(b |
|
|
365 |
source.write(b |
|
|
370 | source.write(b"a" * 1048576) | |
|
371 | source.write(b"b" * 1048576) | |
|
372 | source.write(b"c" * 1048576) | |
|
366 | 373 | source.seek(0) |
|
367 | 374 | |
|
368 | 375 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
@@ -378,9 +385,9 class TestCompressor_compressobj(unittes | |||
|
378 | 385 | |
|
379 | 386 | chunks.append(cobj.flush()) |
|
380 | 387 | |
|
381 |
compressed = b |
|
|
388 | compressed = b"".join(chunks) | |
|
382 | 389 | |
|
383 |
self.assertEqual(len(compressed), |
|
|
390 | self.assertEqual(len(compressed), 119) | |
|
384 | 391 | |
|
385 | 392 | def test_frame_progression(self): |
|
386 | 393 | cctx = zstd.ZstdCompressor() |
@@ -389,7 +396,7 class TestCompressor_compressobj(unittes | |||
|
389 | 396 | |
|
390 | 397 | cobj = cctx.compressobj() |
|
391 | 398 | |
|
392 |
cobj.compress(b |
|
|
399 | cobj.compress(b"foobar") | |
|
393 | 400 | self.assertEqual(cctx.frame_progression(), (6, 0, 0)) |
|
394 | 401 | |
|
395 | 402 | cobj.flush() |
@@ -399,20 +406,20 class TestCompressor_compressobj(unittes | |||
|
399 | 406 | cctx = zstd.ZstdCompressor() |
|
400 | 407 | |
|
401 | 408 | cobj = cctx.compressobj(size=2) |
|
402 |
with self.assertRaisesRegex |
|
|
403 |
cobj.compress(b |
|
|
409 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
410 | cobj.compress(b"foo") | |
|
404 | 411 | |
|
405 | 412 | # Try another operation on this instance. |
|
406 |
with self.assertRaisesRegex |
|
|
407 |
cobj.compress(b |
|
|
413 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
414 | cobj.compress(b"aa") | |
|
408 | 415 | |
|
409 | 416 | # Try another operation on the compressor. |
|
410 | 417 | cctx.compressobj(size=4) |
|
411 |
cctx.compress(b |
|
|
418 | cctx.compress(b"foobar") | |
|
412 | 419 | |
|
413 | 420 | |
|
414 | 421 | @make_cffi |
|
415 |
class TestCompressor_copy_stream( |
|
|
422 | class TestCompressor_copy_stream(TestCase): | |
|
416 | 423 | def test_no_read(self): |
|
417 | 424 | source = object() |
|
418 | 425 | dest = io.BytesIO() |
@@ -438,13 +445,12 class TestCompressor_copy_stream(unittes | |||
|
438 | 445 | self.assertEqual(int(r), 0) |
|
439 | 446 | self.assertEqual(w, 9) |
|
440 | 447 | |
|
441 | self.assertEqual(dest.getvalue(), | |
|
442 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
|
448 | self.assertEqual(dest.getvalue(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
443 | 449 | |
|
444 | 450 | def test_large_data(self): |
|
445 | 451 | source = io.BytesIO() |
|
446 | 452 | for i in range(255): |
|
447 |
source.write(struct.Struct( |
|
|
453 | source.write(struct.Struct(">B").pack(i) * 16384) | |
|
448 | 454 | source.seek(0) |
|
449 | 455 | |
|
450 | 456 | dest = io.BytesIO() |
@@ -461,7 +467,7 class TestCompressor_copy_stream(unittes | |||
|
461 | 467 | self.assertFalse(params.has_checksum) |
|
462 | 468 | |
|
463 | 469 | def test_write_checksum(self): |
|
464 |
source = io.BytesIO(b |
|
|
470 | source = io.BytesIO(b"foobar") | |
|
465 | 471 | no_checksum = io.BytesIO() |
|
466 | 472 | |
|
467 | 473 | cctx = zstd.ZstdCompressor(level=1) |
@@ -472,8 +478,7 class TestCompressor_copy_stream(unittes | |||
|
472 | 478 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
473 | 479 | cctx.copy_stream(source, with_checksum) |
|
474 | 480 | |
|
475 | self.assertEqual(len(with_checksum.getvalue()), | |
|
476 | len(no_checksum.getvalue()) + 4) | |
|
481 | self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) | |
|
477 | 482 | |
|
478 | 483 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
479 | 484 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
@@ -485,7 +490,7 class TestCompressor_copy_stream(unittes | |||
|
485 | 490 | self.assertTrue(with_params.has_checksum) |
|
486 | 491 | |
|
487 | 492 | def test_write_content_size(self): |
|
488 |
source = io.BytesIO(b |
|
|
493 | source = io.BytesIO(b"foobar" * 256) | |
|
489 | 494 | no_size = io.BytesIO() |
|
490 | 495 | |
|
491 | 496 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
@@ -497,16 +502,14 class TestCompressor_copy_stream(unittes | |||
|
497 | 502 | cctx.copy_stream(source, with_size) |
|
498 | 503 | |
|
499 | 504 | # Source content size is unknown, so no content size written. |
|
500 | self.assertEqual(len(with_size.getvalue()), | |
|
501 | len(no_size.getvalue())) | |
|
505 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) | |
|
502 | 506 | |
|
503 | 507 | source.seek(0) |
|
504 | 508 | with_size = io.BytesIO() |
|
505 | 509 | cctx.copy_stream(source, with_size, size=len(source.getvalue())) |
|
506 | 510 | |
|
507 | 511 | # We specified source size, so content size header is present. |
|
508 | self.assertEqual(len(with_size.getvalue()), | |
|
509 | len(no_size.getvalue()) + 1) | |
|
512 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) | |
|
510 | 513 | |
|
511 | 514 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
512 | 515 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
@@ -518,7 +521,7 class TestCompressor_copy_stream(unittes | |||
|
518 | 521 | self.assertFalse(with_params.has_checksum) |
|
519 | 522 | |
|
520 | 523 | def test_read_write_size(self): |
|
521 |
source = OpCountingBytesIO(b |
|
|
524 | source = OpCountingBytesIO(b"foobarfoobar") | |
|
522 | 525 | dest = OpCountingBytesIO() |
|
523 | 526 | cctx = zstd.ZstdCompressor() |
|
524 | 527 | r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) |
@@ -530,16 +533,16 class TestCompressor_copy_stream(unittes | |||
|
530 | 533 | |
|
531 | 534 | def test_multithreaded(self): |
|
532 | 535 | source = io.BytesIO() |
|
533 |
source.write(b |
|
|
534 |
source.write(b |
|
|
535 |
source.write(b |
|
|
536 | source.write(b"a" * 1048576) | |
|
537 | source.write(b"b" * 1048576) | |
|
538 | source.write(b"c" * 1048576) | |
|
536 | 539 | source.seek(0) |
|
537 | 540 | |
|
538 | 541 | dest = io.BytesIO() |
|
539 | 542 | cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) |
|
540 | 543 | r, w = cctx.copy_stream(source, dest) |
|
541 | 544 | self.assertEqual(r, 3145728) |
|
542 |
self.assertEqual(w, |
|
|
545 | self.assertEqual(w, 111) | |
|
543 | 546 | |
|
544 | 547 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
545 | 548 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
@@ -559,15 +562,15 class TestCompressor_copy_stream(unittes | |||
|
559 | 562 | |
|
560 | 563 | def test_bad_size(self): |
|
561 | 564 | source = io.BytesIO() |
|
562 |
source.write(b |
|
|
563 |
source.write(b |
|
|
565 | source.write(b"a" * 32768) | |
|
566 | source.write(b"b" * 32768) | |
|
564 | 567 | source.seek(0) |
|
565 | 568 | |
|
566 | 569 | dest = io.BytesIO() |
|
567 | 570 | |
|
568 | 571 | cctx = zstd.ZstdCompressor() |
|
569 | 572 | |
|
570 |
with self.assertRaisesRegex |
|
|
573 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
571 | 574 | cctx.copy_stream(source, dest, size=42) |
|
572 | 575 | |
|
573 | 576 | # Try another operation on this compressor. |
@@ -577,31 +580,31 class TestCompressor_copy_stream(unittes | |||
|
577 | 580 | |
|
578 | 581 | |
|
579 | 582 | @make_cffi |
|
580 |
class TestCompressor_stream_reader( |
|
|
583 | class TestCompressor_stream_reader(TestCase): | |
|
581 | 584 | def test_context_manager(self): |
|
582 | 585 | cctx = zstd.ZstdCompressor() |
|
583 | 586 | |
|
584 |
with cctx.stream_reader(b |
|
|
585 |
with self.assertRaisesRegex |
|
|
587 | with cctx.stream_reader(b"foo") as reader: | |
|
588 | with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): | |
|
586 | 589 | with reader as reader2: |
|
587 | 590 | pass |
|
588 | 591 | |
|
589 | 592 | def test_no_context_manager(self): |
|
590 | 593 | cctx = zstd.ZstdCompressor() |
|
591 | 594 | |
|
592 |
reader = cctx.stream_reader(b |
|
|
595 | reader = cctx.stream_reader(b"foo") | |
|
593 | 596 | reader.read(4) |
|
594 | 597 | self.assertFalse(reader.closed) |
|
595 | 598 | |
|
596 | 599 | reader.close() |
|
597 | 600 | self.assertTrue(reader.closed) |
|
598 |
with self.assertRaisesRegex |
|
|
601 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
599 | 602 | reader.read(1) |
|
600 | 603 | |
|
601 | 604 | def test_not_implemented(self): |
|
602 | 605 | cctx = zstd.ZstdCompressor() |
|
603 | 606 | |
|
604 |
with cctx.stream_reader(b |
|
|
607 | with cctx.stream_reader(b"foo" * 60) as reader: | |
|
605 | 608 | with self.assertRaises(io.UnsupportedOperation): |
|
606 | 609 | reader.readline() |
|
607 | 610 | |
@@ -618,12 +621,12 class TestCompressor_stream_reader(unitt | |||
|
618 | 621 | reader.writelines([]) |
|
619 | 622 | |
|
620 | 623 | with self.assertRaises(OSError): |
|
621 |
reader.write(b |
|
|
624 | reader.write(b"foo") | |
|
622 | 625 | |
|
623 | 626 | def test_constant_methods(self): |
|
624 | 627 | cctx = zstd.ZstdCompressor() |
|
625 | 628 | |
|
626 |
with cctx.stream_reader(b |
|
|
629 | with cctx.stream_reader(b"boo") as reader: | |
|
627 | 630 | self.assertTrue(reader.readable()) |
|
628 | 631 | self.assertFalse(reader.writable()) |
|
629 | 632 | self.assertFalse(reader.seekable()) |
@@ -637,27 +640,29 class TestCompressor_stream_reader(unitt | |||
|
637 | 640 | def test_read_closed(self): |
|
638 | 641 | cctx = zstd.ZstdCompressor() |
|
639 | 642 | |
|
640 |
with cctx.stream_reader(b |
|
|
643 | with cctx.stream_reader(b"foo" * 60) as reader: | |
|
641 | 644 | reader.close() |
|
642 | 645 | self.assertTrue(reader.closed) |
|
643 |
with self.assertRaisesRegex |
|
|
646 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
644 | 647 | reader.read(10) |
|
645 | 648 | |
|
646 | 649 | def test_read_sizes(self): |
|
647 | 650 | cctx = zstd.ZstdCompressor() |
|
648 |
foo = cctx.compress(b |
|
|
651 | foo = cctx.compress(b"foo") | |
|
649 | 652 | |
|
650 |
with cctx.stream_reader(b |
|
|
651 | with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): | |
|
653 | with cctx.stream_reader(b"foo") as reader: | |
|
654 | with self.assertRaisesRegex( | |
|
655 | ValueError, "cannot read negative amounts less than -1" | |
|
656 | ): | |
|
652 | 657 | reader.read(-2) |
|
653 | 658 | |
|
654 |
self.assertEqual(reader.read(0), b |
|
|
659 | self.assertEqual(reader.read(0), b"") | |
|
655 | 660 | self.assertEqual(reader.read(), foo) |
|
656 | 661 | |
|
657 | 662 | def test_read_buffer(self): |
|
658 | 663 | cctx = zstd.ZstdCompressor() |
|
659 | 664 | |
|
660 |
source = b |
|
|
665 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
661 | 666 | frame = cctx.compress(source) |
|
662 | 667 | |
|
663 | 668 | with cctx.stream_reader(source) as reader: |
@@ -667,13 +672,13 class TestCompressor_stream_reader(unitt | |||
|
667 | 672 | result = reader.read(8192) |
|
668 | 673 | self.assertEqual(result, frame) |
|
669 | 674 | self.assertEqual(reader.tell(), len(result)) |
|
670 |
self.assertEqual(reader.read(), b |
|
|
675 | self.assertEqual(reader.read(), b"") | |
|
671 | 676 | self.assertEqual(reader.tell(), len(result)) |
|
672 | 677 | |
|
673 | 678 | def test_read_buffer_small_chunks(self): |
|
674 | 679 | cctx = zstd.ZstdCompressor() |
|
675 | 680 | |
|
676 |
source = b |
|
|
681 | source = b"foo" * 60 | |
|
677 | 682 | chunks = [] |
|
678 | 683 | |
|
679 | 684 | with cctx.stream_reader(source) as reader: |
@@ -687,12 +692,12 class TestCompressor_stream_reader(unitt | |||
|
687 | 692 | chunks.append(chunk) |
|
688 | 693 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
689 | 694 | |
|
690 |
self.assertEqual(b |
|
|
695 | self.assertEqual(b"".join(chunks), cctx.compress(source)) | |
|
691 | 696 | |
|
692 | 697 | def test_read_stream(self): |
|
693 | 698 | cctx = zstd.ZstdCompressor() |
|
694 | 699 | |
|
695 |
source = b |
|
|
700 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
696 | 701 | frame = cctx.compress(source) |
|
697 | 702 | |
|
698 | 703 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
@@ -701,13 +706,13 class TestCompressor_stream_reader(unitt | |||
|
701 | 706 | chunk = reader.read(8192) |
|
702 | 707 | self.assertEqual(chunk, frame) |
|
703 | 708 | self.assertEqual(reader.tell(), len(chunk)) |
|
704 |
self.assertEqual(reader.read(), b |
|
|
709 | self.assertEqual(reader.read(), b"") | |
|
705 | 710 | self.assertEqual(reader.tell(), len(chunk)) |
|
706 | 711 | |
|
707 | 712 | def test_read_stream_small_chunks(self): |
|
708 | 713 | cctx = zstd.ZstdCompressor() |
|
709 | 714 | |
|
710 |
source = b |
|
|
715 | source = b"foo" * 60 | |
|
711 | 716 | chunks = [] |
|
712 | 717 | |
|
713 | 718 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
@@ -721,25 +726,25 class TestCompressor_stream_reader(unitt | |||
|
721 | 726 | chunks.append(chunk) |
|
722 | 727 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
723 | 728 | |
|
724 |
self.assertEqual(b |
|
|
729 | self.assertEqual(b"".join(chunks), cctx.compress(source)) | |
|
725 | 730 | |
|
726 | 731 | def test_read_after_exit(self): |
|
727 | 732 | cctx = zstd.ZstdCompressor() |
|
728 | 733 | |
|
729 |
with cctx.stream_reader(b |
|
|
734 | with cctx.stream_reader(b"foo" * 60) as reader: | |
|
730 | 735 | while reader.read(8192): |
|
731 | 736 | pass |
|
732 | 737 | |
|
733 |
with self.assertRaisesRegex |
|
|
738 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
734 | 739 | reader.read(10) |
|
735 | 740 | |
|
736 | 741 | def test_bad_size(self): |
|
737 | 742 | cctx = zstd.ZstdCompressor() |
|
738 | 743 | |
|
739 |
source = io.BytesIO(b |
|
|
744 | source = io.BytesIO(b"foobar") | |
|
740 | 745 | |
|
741 | 746 | with cctx.stream_reader(source, size=2) as reader: |
|
742 |
with self.assertRaisesRegex |
|
|
747 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
743 | 748 | reader.read(10) |
|
744 | 749 | |
|
745 | 750 | # Try another compression operation. |
@@ -748,36 +753,36 class TestCompressor_stream_reader(unitt | |||
|
748 | 753 | |
|
749 | 754 | def test_readall(self): |
|
750 | 755 | cctx = zstd.ZstdCompressor() |
|
751 |
frame = cctx.compress(b |
|
|
756 | frame = cctx.compress(b"foo" * 1024) | |
|
752 | 757 | |
|
753 |
reader = cctx.stream_reader(b |
|
|
758 | reader = cctx.stream_reader(b"foo" * 1024) | |
|
754 | 759 | self.assertEqual(reader.readall(), frame) |
|
755 | 760 | |
|
756 | 761 | def test_readinto(self): |
|
757 | 762 | cctx = zstd.ZstdCompressor() |
|
758 |
foo = cctx.compress(b |
|
|
763 | foo = cctx.compress(b"foo") | |
|
759 | 764 | |
|
760 |
reader = cctx.stream_reader(b |
|
|
765 | reader = cctx.stream_reader(b"foo") | |
|
761 | 766 | with self.assertRaises(Exception): |
|
762 |
reader.readinto(b |
|
|
767 | reader.readinto(b"foobar") | |
|
763 | 768 | |
|
764 | 769 | # readinto() with sufficiently large destination. |
|
765 | 770 | b = bytearray(1024) |
|
766 |
reader = cctx.stream_reader(b |
|
|
771 | reader = cctx.stream_reader(b"foo") | |
|
767 | 772 | self.assertEqual(reader.readinto(b), len(foo)) |
|
768 | self.assertEqual(b[0:len(foo)], foo) | |
|
773 | self.assertEqual(b[0 : len(foo)], foo) | |
|
769 | 774 | self.assertEqual(reader.readinto(b), 0) |
|
770 | self.assertEqual(b[0:len(foo)], foo) | |
|
775 | self.assertEqual(b[0 : len(foo)], foo) | |
|
771 | 776 | |
|
772 | 777 | # readinto() with small reads. |
|
773 | 778 | b = bytearray(1024) |
|
774 |
reader = cctx.stream_reader(b |
|
|
779 | reader = cctx.stream_reader(b"foo", read_size=1) | |
|
775 | 780 | self.assertEqual(reader.readinto(b), len(foo)) |
|
776 | self.assertEqual(b[0:len(foo)], foo) | |
|
781 | self.assertEqual(b[0 : len(foo)], foo) | |
|
777 | 782 | |
|
778 | 783 | # Too small destination buffer. |
|
779 | 784 | b = bytearray(2) |
|
780 |
reader = cctx.stream_reader(b |
|
|
785 | reader = cctx.stream_reader(b"foo") | |
|
781 | 786 | self.assertEqual(reader.readinto(b), 2) |
|
782 | 787 | self.assertEqual(b[:], foo[0:2]) |
|
783 | 788 | self.assertEqual(reader.readinto(b), 2) |
@@ -787,41 +792,41 class TestCompressor_stream_reader(unitt | |||
|
787 | 792 | |
|
788 | 793 | def test_readinto1(self): |
|
789 | 794 | cctx = zstd.ZstdCompressor() |
|
790 |
foo = b |
|
|
795 | foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) | |
|
791 | 796 | |
|
792 |
reader = cctx.stream_reader(b |
|
|
797 | reader = cctx.stream_reader(b"foo") | |
|
793 | 798 | with self.assertRaises(Exception): |
|
794 |
reader.readinto1(b |
|
|
799 | reader.readinto1(b"foobar") | |
|
795 | 800 | |
|
796 | 801 | b = bytearray(1024) |
|
797 |
source = OpCountingBytesIO(b |
|
|
802 | source = OpCountingBytesIO(b"foo") | |
|
798 | 803 | reader = cctx.stream_reader(source) |
|
799 | 804 | self.assertEqual(reader.readinto1(b), len(foo)) |
|
800 | self.assertEqual(b[0:len(foo)], foo) | |
|
805 | self.assertEqual(b[0 : len(foo)], foo) | |
|
801 | 806 | self.assertEqual(source._read_count, 2) |
|
802 | 807 | |
|
803 | 808 | # readinto1() with small reads. |
|
804 | 809 | b = bytearray(1024) |
|
805 |
source = OpCountingBytesIO(b |
|
|
810 | source = OpCountingBytesIO(b"foo") | |
|
806 | 811 | reader = cctx.stream_reader(source, read_size=1) |
|
807 | 812 | self.assertEqual(reader.readinto1(b), len(foo)) |
|
808 | self.assertEqual(b[0:len(foo)], foo) | |
|
813 | self.assertEqual(b[0 : len(foo)], foo) | |
|
809 | 814 | self.assertEqual(source._read_count, 4) |
|
810 | 815 | |
|
811 | 816 | def test_read1(self): |
|
812 | 817 | cctx = zstd.ZstdCompressor() |
|
813 |
foo = b |
|
|
818 | foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) | |
|
814 | 819 | |
|
815 |
b = OpCountingBytesIO(b |
|
|
820 | b = OpCountingBytesIO(b"foo") | |
|
816 | 821 | reader = cctx.stream_reader(b) |
|
817 | 822 | |
|
818 | 823 | self.assertEqual(reader.read1(), foo) |
|
819 | 824 | self.assertEqual(b._read_count, 2) |
|
820 | 825 | |
|
821 |
b = OpCountingBytesIO(b |
|
|
826 | b = OpCountingBytesIO(b"foo") | |
|
822 | 827 | reader = cctx.stream_reader(b) |
|
823 | 828 | |
|
824 |
self.assertEqual(reader.read1(0), b |
|
|
829 | self.assertEqual(reader.read1(0), b"") | |
|
825 | 830 | self.assertEqual(reader.read1(2), foo[0:2]) |
|
826 | 831 | self.assertEqual(b._read_count, 2) |
|
827 | 832 | self.assertEqual(reader.read1(2), foo[2:4]) |
@@ -829,7 +834,7 class TestCompressor_stream_reader(unitt | |||
|
829 | 834 | |
|
830 | 835 | |
|
831 | 836 | @make_cffi |
|
832 |
class TestCompressor_stream_writer( |
|
|
837 | class TestCompressor_stream_writer(TestCase): | |
|
833 | 838 | def test_io_api(self): |
|
834 | 839 | buffer = io.BytesIO() |
|
835 | 840 | cctx = zstd.ZstdCompressor() |
@@ -899,7 +904,7 class TestCompressor_stream_writer(unitt | |||
|
899 | 904 | self.assertFalse(writer.closed) |
|
900 | 905 | |
|
901 | 906 | def test_fileno_file(self): |
|
902 |
with tempfile.TemporaryFile( |
|
|
907 | with tempfile.TemporaryFile("wb") as tf: | |
|
903 | 908 | cctx = zstd.ZstdCompressor() |
|
904 | 909 | writer = cctx.stream_writer(tf) |
|
905 | 910 | |
@@ -910,33 +915,35 class TestCompressor_stream_writer(unitt | |||
|
910 | 915 | cctx = zstd.ZstdCompressor(level=1) |
|
911 | 916 | writer = cctx.stream_writer(buffer) |
|
912 | 917 | |
|
913 |
writer.write(b |
|
|
918 | writer.write(b"foo" * 1024) | |
|
914 | 919 | self.assertFalse(writer.closed) |
|
915 | 920 | self.assertFalse(buffer.closed) |
|
916 | 921 | writer.close() |
|
917 | 922 | self.assertTrue(writer.closed) |
|
918 | 923 | self.assertTrue(buffer.closed) |
|
919 | 924 | |
|
920 |
with self.assertRaisesRegex |
|
|
921 |
writer.write(b |
|
|
925 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
926 | writer.write(b"foo") | |
|
922 | 927 | |
|
923 |
with self.assertRaisesRegex |
|
|
928 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
924 | 929 | writer.flush() |
|
925 | 930 | |
|
926 |
with self.assertRaisesRegex |
|
|
931 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
927 | 932 | with writer: |
|
928 | 933 | pass |
|
929 | 934 | |
|
930 |
self.assertEqual( |
|
|
931 | b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f' | |
|
932 | b'\x6f\x01\x00\xfa\xd3\x77\x43') | |
|
935 | self.assertEqual( | |
|
936 | buffer.getvalue(), | |
|
937 | b"\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f" | |
|
938 | b"\x6f\x01\x00\xfa\xd3\x77\x43", | |
|
939 | ) | |
|
933 | 940 | |
|
934 | 941 | # Context manager exit should close stream. |
|
935 | 942 | buffer = io.BytesIO() |
|
936 | 943 | writer = cctx.stream_writer(buffer) |
|
937 | 944 | |
|
938 | 945 | with writer: |
|
939 |
writer.write(b |
|
|
946 | writer.write(b"foo") | |
|
940 | 947 | |
|
941 | 948 | self.assertTrue(writer.closed) |
|
942 | 949 | |
@@ -944,10 +951,10 class TestCompressor_stream_writer(unitt | |||
|
944 | 951 | buffer = NonClosingBytesIO() |
|
945 | 952 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
946 | 953 | with cctx.stream_writer(buffer) as compressor: |
|
947 |
compressor.write(b |
|
|
954 | compressor.write(b"") | |
|
948 | 955 | |
|
949 | 956 | result = buffer.getvalue() |
|
950 |
self.assertEqual(result, b |
|
|
957 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
951 | 958 | |
|
952 | 959 | params = zstd.get_frame_parameters(result) |
|
953 | 960 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
@@ -958,11 +965,11 class TestCompressor_stream_writer(unitt | |||
|
958 | 965 | # Test without context manager. |
|
959 | 966 | buffer = io.BytesIO() |
|
960 | 967 | compressor = cctx.stream_writer(buffer) |
|
961 |
self.assertEqual(compressor.write(b |
|
|
962 |
self.assertEqual(buffer.getvalue(), b |
|
|
968 | self.assertEqual(compressor.write(b""), 0) | |
|
969 | self.assertEqual(buffer.getvalue(), b"") | |
|
963 | 970 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9) |
|
964 | 971 | result = buffer.getvalue() |
|
965 |
self.assertEqual(result, b |
|
|
972 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
966 | 973 | |
|
967 | 974 | params = zstd.get_frame_parameters(result) |
|
968 | 975 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
@@ -972,18 +979,18 class TestCompressor_stream_writer(unitt | |||
|
972 | 979 | |
|
973 | 980 | # Test write_return_read=True |
|
974 | 981 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
975 |
self.assertEqual(compressor.write(b |
|
|
982 | self.assertEqual(compressor.write(b""), 0) | |
|
976 | 983 | |
|
977 | 984 | def test_input_types(self): |
|
978 |
expected = b |
|
|
985 | expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" | |
|
979 | 986 | cctx = zstd.ZstdCompressor(level=1) |
|
980 | 987 | |
|
981 | 988 | mutable_array = bytearray(3) |
|
982 |
mutable_array[:] = b |
|
|
989 | mutable_array[:] = b"foo" | |
|
983 | 990 | |
|
984 | 991 | sources = [ |
|
985 |
memoryview(b |
|
|
986 |
bytearray(b |
|
|
992 | memoryview(b"foo"), | |
|
993 | bytearray(b"foo"), | |
|
987 | 994 | mutable_array, |
|
988 | 995 | ] |
|
989 | 996 | |
@@ -1001,51 +1008,55 class TestCompressor_stream_writer(unitt | |||
|
1001 | 1008 | buffer = NonClosingBytesIO() |
|
1002 | 1009 | cctx = zstd.ZstdCompressor(level=5) |
|
1003 | 1010 | with cctx.stream_writer(buffer) as compressor: |
|
1004 |
self.assertEqual(compressor.write(b |
|
|
1005 |
self.assertEqual(compressor.write(b |
|
|
1006 |
self.assertEqual(compressor.write(b |
|
|
1011 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1012 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1013 | self.assertEqual(compressor.write(b"x" * 8192), 0) | |
|
1007 | 1014 | |
|
1008 | 1015 | result = buffer.getvalue() |
|
1009 |
self.assertEqual( |
|
|
1010 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' | |
|
1011 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') | |
|
1016 | self.assertEqual( | |
|
1017 | result, | |
|
1018 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" | |
|
1019 | b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", | |
|
1020 | ) | |
|
1012 | 1021 | |
|
1013 | 1022 | # Test without context manager. |
|
1014 | 1023 | buffer = io.BytesIO() |
|
1015 | 1024 | compressor = cctx.stream_writer(buffer) |
|
1016 |
self.assertEqual(compressor.write(b |
|
|
1017 |
self.assertEqual(compressor.write(b |
|
|
1018 |
self.assertEqual(compressor.write(b |
|
|
1025 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1026 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1027 | self.assertEqual(compressor.write(b"x" * 8192), 0) | |
|
1019 | 1028 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) |
|
1020 | 1029 | result = buffer.getvalue() |
|
1021 |
self.assertEqual( |
|
|
1022 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' | |
|
1023 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') | |
|
1030 | self.assertEqual( | |
|
1031 | result, | |
|
1032 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" | |
|
1033 | b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", | |
|
1034 | ) | |
|
1024 | 1035 | |
|
1025 | 1036 | # Test with write_return_read=True. |
|
1026 | 1037 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
1027 |
self.assertEqual(compressor.write(b |
|
|
1028 |
self.assertEqual(compressor.write(b |
|
|
1029 |
self.assertEqual(compressor.write(b |
|
|
1038 | self.assertEqual(compressor.write(b"foo"), 3) | |
|
1039 | self.assertEqual(compressor.write(b"barbiz"), 6) | |
|
1040 | self.assertEqual(compressor.write(b"x" * 8192), 8192) | |
|
1030 | 1041 | |
|
1031 | 1042 | def test_dictionary(self): |
|
1032 | 1043 | samples = [] |
|
1033 | 1044 | for i in range(128): |
|
1034 |
samples.append(b |
|
|
1035 |
samples.append(b |
|
|
1036 |
samples.append(b |
|
|
1045 | samples.append(b"foo" * 64) | |
|
1046 | samples.append(b"bar" * 64) | |
|
1047 | samples.append(b"foobar" * 64) | |
|
1037 | 1048 | |
|
1038 | 1049 | d = zstd.train_dictionary(8192, samples) |
|
1039 | 1050 | |
|
1040 | 1051 | h = hashlib.sha1(d.as_bytes()).hexdigest() |
|
1041 |
self.assertEqual(h, |
|
|
1052 | self.assertEqual(h, "7a2e59a876db958f74257141045af8f912e00d4e") | |
|
1042 | 1053 | |
|
1043 | 1054 | buffer = NonClosingBytesIO() |
|
1044 | 1055 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
1045 | 1056 | with cctx.stream_writer(buffer) as compressor: |
|
1046 |
self.assertEqual(compressor.write(b |
|
|
1047 |
self.assertEqual(compressor.write(b |
|
|
1048 |
self.assertEqual(compressor.write(b |
|
|
1057 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1058 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1059 | self.assertEqual(compressor.write(b"foo" * 16384), 0) | |
|
1049 | 1060 | |
|
1050 | 1061 | compressed = buffer.getvalue() |
|
1051 | 1062 | |
@@ -1056,14 +1067,15 class TestCompressor_stream_writer(unitt | |||
|
1056 | 1067 | self.assertFalse(params.has_checksum) |
|
1057 | 1068 | |
|
1058 | 1069 | h = hashlib.sha1(compressed).hexdigest() |
|
1059 |
self.assertEqual(h, |
|
|
1070 | self.assertEqual(h, "0a7c05635061f58039727cdbe76388c6f4cfef06") | |
|
1060 | 1071 | |
|
1061 |
source = b |
|
|
1072 | source = b"foo" + b"bar" + (b"foo" * 16384) | |
|
1062 | 1073 | |
|
1063 | 1074 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1064 | 1075 | |
|
1065 | self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)), | |
|
1066 | source) | |
|
1076 | self.assertEqual( | |
|
1077 | dctx.decompress(compressed, max_output_size=len(source)), source | |
|
1078 | ) | |
|
1067 | 1079 | |
|
1068 | 1080 | def test_compression_params(self): |
|
1069 | 1081 | params = zstd.ZstdCompressionParameters( |
@@ -1073,14 +1085,15 class TestCompressor_stream_writer(unitt | |||
|
1073 | 1085 | min_match=5, |
|
1074 | 1086 | search_log=4, |
|
1075 | 1087 | target_length=10, |
|
1076 |
strategy=zstd.STRATEGY_FAST |
|
|
1088 | strategy=zstd.STRATEGY_FAST, | |
|
1089 | ) | |
|
1077 | 1090 | |
|
1078 | 1091 | buffer = NonClosingBytesIO() |
|
1079 | 1092 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1080 | 1093 | with cctx.stream_writer(buffer) as compressor: |
|
1081 |
self.assertEqual(compressor.write(b |
|
|
1082 |
self.assertEqual(compressor.write(b |
|
|
1083 |
self.assertEqual(compressor.write(b |
|
|
1094 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1095 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1096 | self.assertEqual(compressor.write(b"foobar" * 16384), 0) | |
|
1084 | 1097 | |
|
1085 | 1098 | compressed = buffer.getvalue() |
|
1086 | 1099 | |
@@ -1091,18 +1104,18 class TestCompressor_stream_writer(unitt | |||
|
1091 | 1104 | self.assertFalse(params.has_checksum) |
|
1092 | 1105 | |
|
1093 | 1106 | h = hashlib.sha1(compressed).hexdigest() |
|
1094 |
self.assertEqual(h, |
|
|
1107 | self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b") | |
|
1095 | 1108 | |
|
1096 | 1109 | def test_write_checksum(self): |
|
1097 | 1110 | no_checksum = NonClosingBytesIO() |
|
1098 | 1111 | cctx = zstd.ZstdCompressor(level=1) |
|
1099 | 1112 | with cctx.stream_writer(no_checksum) as compressor: |
|
1100 |
self.assertEqual(compressor.write(b |
|
|
1113 | self.assertEqual(compressor.write(b"foobar"), 0) | |
|
1101 | 1114 | |
|
1102 | 1115 | with_checksum = NonClosingBytesIO() |
|
1103 | 1116 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
1104 | 1117 | with cctx.stream_writer(with_checksum) as compressor: |
|
1105 |
self.assertEqual(compressor.write(b |
|
|
1118 | self.assertEqual(compressor.write(b"foobar"), 0) | |
|
1106 | 1119 | |
|
1107 | 1120 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
1108 | 1121 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
@@ -1113,29 +1126,27 class TestCompressor_stream_writer(unitt | |||
|
1113 | 1126 | self.assertFalse(no_params.has_checksum) |
|
1114 | 1127 | self.assertTrue(with_params.has_checksum) |
|
1115 | 1128 | |
|
1116 | self.assertEqual(len(with_checksum.getvalue()), | |
|
1117 | len(no_checksum.getvalue()) + 4) | |
|
1129 | self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) | |
|
1118 | 1130 | |
|
1119 | 1131 | def test_write_content_size(self): |
|
1120 | 1132 | no_size = NonClosingBytesIO() |
|
1121 | 1133 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1122 | 1134 | with cctx.stream_writer(no_size) as compressor: |
|
1123 |
self.assertEqual(compressor.write(b |
|
|
1135 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
|
1124 | 1136 | |
|
1125 | 1137 | with_size = NonClosingBytesIO() |
|
1126 | 1138 | cctx = zstd.ZstdCompressor(level=1) |
|
1127 | 1139 | with cctx.stream_writer(with_size) as compressor: |
|
1128 |
self.assertEqual(compressor.write(b |
|
|
1140 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
|
1129 | 1141 | |
|
1130 | 1142 | # Source size is not known in streaming mode, so header not |
|
1131 | 1143 | # written. |
|
1132 | self.assertEqual(len(with_size.getvalue()), | |
|
1133 | len(no_size.getvalue())) | |
|
1144 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) | |
|
1134 | 1145 | |
|
1135 | 1146 | # Declaring size will write the header. |
|
1136 | 1147 | with_size = NonClosingBytesIO() |
|
1137 |
with cctx.stream_writer(with_size, size=len(b |
|
|
1138 |
self.assertEqual(compressor.write(b |
|
|
1148 | with cctx.stream_writer(with_size, size=len(b"foobar" * 256)) as compressor: | |
|
1149 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
|
1139 | 1150 | |
|
1140 | 1151 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
1141 | 1152 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
@@ -1146,31 +1157,30 class TestCompressor_stream_writer(unitt | |||
|
1146 | 1157 | self.assertFalse(no_params.has_checksum) |
|
1147 | 1158 | self.assertFalse(with_params.has_checksum) |
|
1148 | 1159 | |
|
1149 | self.assertEqual(len(with_size.getvalue()), | |
|
1150 | len(no_size.getvalue()) + 1) | |
|
1160 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) | |
|
1151 | 1161 | |
|
1152 | 1162 | def test_no_dict_id(self): |
|
1153 | 1163 | samples = [] |
|
1154 | 1164 | for i in range(128): |
|
1155 |
samples.append(b |
|
|
1156 |
samples.append(b |
|
|
1157 |
samples.append(b |
|
|
1165 | samples.append(b"foo" * 64) | |
|
1166 | samples.append(b"bar" * 64) | |
|
1167 | samples.append(b"foobar" * 64) | |
|
1158 | 1168 | |
|
1159 | 1169 | d = zstd.train_dictionary(1024, samples) |
|
1160 | 1170 | |
|
1161 | 1171 | with_dict_id = NonClosingBytesIO() |
|
1162 | 1172 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
1163 | 1173 | with cctx.stream_writer(with_dict_id) as compressor: |
|
1164 |
self.assertEqual(compressor.write(b |
|
|
1174 | self.assertEqual(compressor.write(b"foobarfoobar"), 0) | |
|
1165 | 1175 | |
|
1166 |
self.assertEqual(with_dict_id.getvalue()[4:5], b |
|
|
1176 | self.assertEqual(with_dict_id.getvalue()[4:5], b"\x03") | |
|
1167 | 1177 | |
|
1168 | 1178 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
1169 | 1179 | no_dict_id = NonClosingBytesIO() |
|
1170 | 1180 | with cctx.stream_writer(no_dict_id) as compressor: |
|
1171 |
self.assertEqual(compressor.write(b |
|
|
1181 | self.assertEqual(compressor.write(b"foobarfoobar"), 0) | |
|
1172 | 1182 | |
|
1173 |
self.assertEqual(no_dict_id.getvalue()[4:5], b |
|
|
1183 | self.assertEqual(no_dict_id.getvalue()[4:5], b"\x00") | |
|
1174 | 1184 | |
|
1175 | 1185 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) |
|
1176 | 1186 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) |
@@ -1181,14 +1191,13 class TestCompressor_stream_writer(unitt | |||
|
1181 | 1191 | self.assertFalse(no_params.has_checksum) |
|
1182 | 1192 | self.assertFalse(with_params.has_checksum) |
|
1183 | 1193 | |
|
1184 | self.assertEqual(len(with_dict_id.getvalue()), | |
|
1185 | len(no_dict_id.getvalue()) + 4) | |
|
1194 | self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4) | |
|
1186 | 1195 | |
|
1187 | 1196 | def test_memory_size(self): |
|
1188 | 1197 | cctx = zstd.ZstdCompressor(level=3) |
|
1189 | 1198 | buffer = io.BytesIO() |
|
1190 | 1199 | with cctx.stream_writer(buffer) as compressor: |
|
1191 |
compressor.write(b |
|
|
1200 | compressor.write(b"foo") | |
|
1192 | 1201 | size = compressor.memory_size() |
|
1193 | 1202 | |
|
1194 | 1203 | self.assertGreater(size, 100000) |
@@ -1197,9 +1206,9 class TestCompressor_stream_writer(unitt | |||
|
1197 | 1206 | cctx = zstd.ZstdCompressor(level=3) |
|
1198 | 1207 | dest = OpCountingBytesIO() |
|
1199 | 1208 | with cctx.stream_writer(dest, write_size=1) as compressor: |
|
1200 |
self.assertEqual(compressor.write(b |
|
|
1201 |
self.assertEqual(compressor.write(b |
|
|
1202 |
self.assertEqual(compressor.write(b |
|
|
1209 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1210 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1211 | self.assertEqual(compressor.write(b"foobar"), 0) | |
|
1203 | 1212 | |
|
1204 | 1213 | self.assertEqual(len(dest.getvalue()), dest._write_count) |
|
1205 | 1214 | |
@@ -1207,15 +1216,15 class TestCompressor_stream_writer(unitt | |||
|
1207 | 1216 | cctx = zstd.ZstdCompressor(level=3) |
|
1208 | 1217 | dest = OpCountingBytesIO() |
|
1209 | 1218 | with cctx.stream_writer(dest) as compressor: |
|
1210 |
self.assertEqual(compressor.write(b |
|
|
1219 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1211 | 1220 | self.assertEqual(dest._write_count, 0) |
|
1212 | 1221 | self.assertEqual(compressor.flush(), 12) |
|
1213 | 1222 | self.assertEqual(dest._write_count, 1) |
|
1214 |
self.assertEqual(compressor.write(b |
|
|
1223 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1215 | 1224 | self.assertEqual(dest._write_count, 1) |
|
1216 | 1225 | self.assertEqual(compressor.flush(), 6) |
|
1217 | 1226 | self.assertEqual(dest._write_count, 2) |
|
1218 |
self.assertEqual(compressor.write(b |
|
|
1227 | self.assertEqual(compressor.write(b"baz"), 0) | |
|
1219 | 1228 | |
|
1220 | 1229 | self.assertEqual(dest._write_count, 3) |
|
1221 | 1230 | |
@@ -1223,7 +1232,7 class TestCompressor_stream_writer(unitt | |||
|
1223 | 1232 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) |
|
1224 | 1233 | dest = OpCountingBytesIO() |
|
1225 | 1234 | with cctx.stream_writer(dest) as compressor: |
|
1226 |
self.assertEqual(compressor.write(b |
|
|
1235 | self.assertEqual(compressor.write(b"foobar" * 8192), 0) | |
|
1227 | 1236 | count = dest._write_count |
|
1228 | 1237 | offset = dest.tell() |
|
1229 | 1238 | self.assertEqual(compressor.flush(), 23) |
@@ -1238,41 +1247,43 class TestCompressor_stream_writer(unitt | |||
|
1238 | 1247 | self.assertEqual(len(trailing), 7) |
|
1239 | 1248 | |
|
1240 | 1249 | header = trailing[0:3] |
|
1241 |
self.assertEqual(header, b |
|
|
1250 | self.assertEqual(header, b"\x01\x00\x00") | |
|
1242 | 1251 | |
|
1243 | 1252 | def test_flush_frame(self): |
|
1244 | 1253 | cctx = zstd.ZstdCompressor(level=3) |
|
1245 | 1254 | dest = OpCountingBytesIO() |
|
1246 | 1255 | |
|
1247 | 1256 | with cctx.stream_writer(dest) as compressor: |
|
1248 |
self.assertEqual(compressor.write(b |
|
|
1257 | self.assertEqual(compressor.write(b"foobar" * 8192), 0) | |
|
1249 | 1258 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) |
|
1250 |
compressor.write(b |
|
|
1259 | compressor.write(b"biz" * 16384) | |
|
1251 | 1260 | |
|
1252 |
self.assertEqual( |
|
|
1253 | # Frame 1. | |
|
1254 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f' | |
|
1255 | b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08' | |
|
1256 | # Frame 2. | |
|
1257 | b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a' | |
|
1258 | b'\x01\x00\xfa\x3f\x75\x37\x04') | |
|
1261 | self.assertEqual( | |
|
1262 | dest.getvalue(), | |
|
1263 | # Frame 1. | |
|
1264 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f" | |
|
1265 | b"\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08" | |
|
1266 | # Frame 2. | |
|
1267 | b"\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a" | |
|
1268 | b"\x01\x00\xfa\x3f\x75\x37\x04", | |
|
1269 | ) | |
|
1259 | 1270 | |
|
1260 | 1271 | def test_bad_flush_mode(self): |
|
1261 | 1272 | cctx = zstd.ZstdCompressor() |
|
1262 | 1273 | dest = io.BytesIO() |
|
1263 | 1274 | with cctx.stream_writer(dest) as compressor: |
|
1264 |
with self.assertRaisesRegex |
|
|
1275 | with self.assertRaisesRegex(ValueError, "unknown flush_mode: 42"): | |
|
1265 | 1276 | compressor.flush(flush_mode=42) |
|
1266 | 1277 | |
|
1267 | 1278 | def test_multithreaded(self): |
|
1268 | 1279 | dest = NonClosingBytesIO() |
|
1269 | 1280 | cctx = zstd.ZstdCompressor(threads=2) |
|
1270 | 1281 | with cctx.stream_writer(dest) as compressor: |
|
1271 |
compressor.write(b |
|
|
1272 |
compressor.write(b |
|
|
1273 |
compressor.write(b |
|
|
1282 | compressor.write(b"a" * 1048576) | |
|
1283 | compressor.write(b"b" * 1048576) | |
|
1284 | compressor.write(b"c" * 1048576) | |
|
1274 | 1285 | |
|
1275 |
self.assertEqual(len(dest.getvalue()), |
|
|
1286 | self.assertEqual(len(dest.getvalue()), 111) | |
|
1276 | 1287 | |
|
1277 | 1288 | def test_tell(self): |
|
1278 | 1289 | dest = io.BytesIO() |
@@ -1281,7 +1292,7 class TestCompressor_stream_writer(unitt | |||
|
1281 | 1292 | self.assertEqual(compressor.tell(), 0) |
|
1282 | 1293 | |
|
1283 | 1294 | for i in range(256): |
|
1284 |
compressor.write(b |
|
|
1295 | compressor.write(b"foo" * (i + 1)) | |
|
1285 | 1296 | self.assertEqual(compressor.tell(), dest.tell()) |
|
1286 | 1297 | |
|
1287 | 1298 | def test_bad_size(self): |
@@ -1289,9 +1300,9 class TestCompressor_stream_writer(unitt | |||
|
1289 | 1300 | |
|
1290 | 1301 | dest = io.BytesIO() |
|
1291 | 1302 | |
|
1292 |
with self.assertRaisesRegex |
|
|
1303 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
1293 | 1304 | with cctx.stream_writer(dest, size=2) as compressor: |
|
1294 |
compressor.write(b |
|
|
1305 | compressor.write(b"foo") | |
|
1295 | 1306 | |
|
1296 | 1307 | # Test another operation. |
|
1297 | 1308 | with cctx.stream_writer(dest, size=42): |
@@ -1301,20 +1312,20 class TestCompressor_stream_writer(unitt | |||
|
1301 | 1312 | dest = NonClosingBytesIO() |
|
1302 | 1313 | cctx = zstd.ZstdCompressor() |
|
1303 | 1314 | with cctx.stream_writer(dest) as compressor: |
|
1304 |
with tarfile.open( |
|
|
1305 |
tf.add(__file__, |
|
|
1315 | with tarfile.open("tf", mode="w|", fileobj=compressor) as tf: | |
|
1316 | tf.add(__file__, "test_compressor.py") | |
|
1306 | 1317 | |
|
1307 | 1318 | dest = io.BytesIO(dest.getvalue()) |
|
1308 | 1319 | |
|
1309 | 1320 | dctx = zstd.ZstdDecompressor() |
|
1310 | 1321 | with dctx.stream_reader(dest) as reader: |
|
1311 |
with tarfile.open(mode= |
|
|
1322 | with tarfile.open(mode="r|", fileobj=reader) as tf: | |
|
1312 | 1323 | for member in tf: |
|
1313 |
self.assertEqual(member.name, |
|
|
1324 | self.assertEqual(member.name, "test_compressor.py") | |
|
1314 | 1325 | |
|
1315 | 1326 | |
|
1316 | 1327 | @make_cffi |
|
1317 |
class TestCompressor_read_to_iter( |
|
|
1328 | class TestCompressor_read_to_iter(TestCase): | |
|
1318 | 1329 | def test_type_validation(self): |
|
1319 | 1330 | cctx = zstd.ZstdCompressor() |
|
1320 | 1331 | |
@@ -1323,10 +1334,10 class TestCompressor_read_to_iter(unitte | |||
|
1323 | 1334 | pass |
|
1324 | 1335 | |
|
1325 | 1336 | # Buffer protocol works. |
|
1326 |
for chunk in cctx.read_to_iter(b |
|
|
1337 | for chunk in cctx.read_to_iter(b"foobar"): | |
|
1327 | 1338 | pass |
|
1328 | 1339 | |
|
1329 |
with self.assertRaisesRegex |
|
|
1340 | with self.assertRaisesRegex(ValueError, "must pass an object with a read"): | |
|
1330 | 1341 | for chunk in cctx.read_to_iter(True): |
|
1331 | 1342 | pass |
|
1332 | 1343 | |
@@ -1337,22 +1348,22 class TestCompressor_read_to_iter(unitte | |||
|
1337 | 1348 | it = cctx.read_to_iter(source) |
|
1338 | 1349 | chunks = list(it) |
|
1339 | 1350 | self.assertEqual(len(chunks), 1) |
|
1340 |
compressed = b |
|
|
1341 |
self.assertEqual(compressed, b |
|
|
1351 | compressed = b"".join(chunks) | |
|
1352 | self.assertEqual(compressed, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
1342 | 1353 | |
|
1343 | 1354 | # And again with the buffer protocol. |
|
1344 |
it = cctx.read_to_iter(b |
|
|
1355 | it = cctx.read_to_iter(b"") | |
|
1345 | 1356 | chunks = list(it) |
|
1346 | 1357 | self.assertEqual(len(chunks), 1) |
|
1347 |
compressed2 = b |
|
|
1358 | compressed2 = b"".join(chunks) | |
|
1348 | 1359 | self.assertEqual(compressed2, compressed) |
|
1349 | 1360 | |
|
1350 | 1361 | def test_read_large(self): |
|
1351 | 1362 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1352 | 1363 | |
|
1353 | 1364 | source = io.BytesIO() |
|
1354 |
source.write(b |
|
|
1355 |
source.write(b |
|
|
1365 | source.write(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) | |
|
1366 | source.write(b"o") | |
|
1356 | 1367 | source.seek(0) |
|
1357 | 1368 | |
|
1358 | 1369 | # Creating an iterator should not perform any compression until |
@@ -1380,9 +1391,9 class TestCompressor_read_to_iter(unitte | |||
|
1380 | 1391 | next(it) |
|
1381 | 1392 | |
|
1382 | 1393 | # We should get the same output as the one-shot compression mechanism. |
|
1383 |
self.assertEqual(b |
|
|
1394 | self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) | |
|
1384 | 1395 | |
|
1385 |
params = zstd.get_frame_parameters(b |
|
|
1396 | params = zstd.get_frame_parameters(b"".join(chunks)) | |
|
1386 | 1397 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1387 | 1398 | self.assertEqual(params.window_size, 262144) |
|
1388 | 1399 | self.assertEqual(params.dict_id, 0) |
@@ -1393,16 +1404,16 class TestCompressor_read_to_iter(unitte | |||
|
1393 | 1404 | chunks = list(it) |
|
1394 | 1405 | self.assertEqual(len(chunks), 2) |
|
1395 | 1406 | |
|
1396 |
params = zstd.get_frame_parameters(b |
|
|
1407 | params = zstd.get_frame_parameters(b"".join(chunks)) | |
|
1397 | 1408 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1398 | #self.assertEqual(params.window_size, 262144) | |
|
1409 | # self.assertEqual(params.window_size, 262144) | |
|
1399 | 1410 | self.assertEqual(params.dict_id, 0) |
|
1400 | 1411 | self.assertFalse(params.has_checksum) |
|
1401 | 1412 | |
|
1402 |
self.assertEqual(b |
|
|
1413 | self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) | |
|
1403 | 1414 | |
|
1404 | 1415 | def test_read_write_size(self): |
|
1405 |
source = OpCountingBytesIO(b |
|
|
1416 | source = OpCountingBytesIO(b"foobarfoobar") | |
|
1406 | 1417 | cctx = zstd.ZstdCompressor(level=3) |
|
1407 | 1418 | for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): |
|
1408 | 1419 | self.assertEqual(len(chunk), 1) |
@@ -1411,42 +1422,42 class TestCompressor_read_to_iter(unitte | |||
|
1411 | 1422 | |
|
1412 | 1423 | def test_multithreaded(self): |
|
1413 | 1424 | source = io.BytesIO() |
|
1414 |
source.write(b |
|
|
1415 |
source.write(b |
|
|
1416 |
source.write(b |
|
|
1425 | source.write(b"a" * 1048576) | |
|
1426 | source.write(b"b" * 1048576) | |
|
1427 | source.write(b"c" * 1048576) | |
|
1417 | 1428 | source.seek(0) |
|
1418 | 1429 | |
|
1419 | 1430 | cctx = zstd.ZstdCompressor(threads=2) |
|
1420 | 1431 | |
|
1421 |
compressed = b |
|
|
1422 |
self.assertEqual(len(compressed), |
|
|
1432 | compressed = b"".join(cctx.read_to_iter(source)) | |
|
1433 | self.assertEqual(len(compressed), 111) | |
|
1423 | 1434 | |
|
1424 | 1435 | def test_bad_size(self): |
|
1425 | 1436 | cctx = zstd.ZstdCompressor() |
|
1426 | 1437 | |
|
1427 |
source = io.BytesIO(b |
|
|
1438 | source = io.BytesIO(b"a" * 42) | |
|
1428 | 1439 | |
|
1429 |
with self.assertRaisesRegex |
|
|
1430 |
b |
|
|
1440 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
1441 | b"".join(cctx.read_to_iter(source, size=2)) | |
|
1431 | 1442 | |
|
1432 | 1443 | # Test another operation on errored compressor. |
|
1433 |
b |
|
|
1444 | b"".join(cctx.read_to_iter(source)) | |
|
1434 | 1445 | |
|
1435 | 1446 | |
|
1436 | 1447 | @make_cffi |
|
1437 |
class TestCompressor_chunker( |
|
|
1448 | class TestCompressor_chunker(TestCase): | |
|
1438 | 1449 | def test_empty(self): |
|
1439 | 1450 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1440 | 1451 | chunker = cctx.chunker() |
|
1441 | 1452 | |
|
1442 |
it = chunker.compress(b |
|
|
1453 | it = chunker.compress(b"") | |
|
1443 | 1454 | |
|
1444 | 1455 | with self.assertRaises(StopIteration): |
|
1445 | 1456 | next(it) |
|
1446 | 1457 | |
|
1447 | 1458 | it = chunker.finish() |
|
1448 | 1459 | |
|
1449 |
self.assertEqual(next(it), b |
|
|
1460 | self.assertEqual(next(it), b"\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00") | |
|
1450 | 1461 | |
|
1451 | 1462 | with self.assertRaises(StopIteration): |
|
1452 | 1463 | next(it) |
@@ -1455,21 +1466,23 class TestCompressor_chunker(unittest.Te | |||
|
1455 | 1466 | cctx = zstd.ZstdCompressor() |
|
1456 | 1467 | chunker = cctx.chunker() |
|
1457 | 1468 | |
|
1458 |
it = chunker.compress(b |
|
|
1469 | it = chunker.compress(b"foobar") | |
|
1459 | 1470 | |
|
1460 | 1471 | with self.assertRaises(StopIteration): |
|
1461 | 1472 | next(it) |
|
1462 | 1473 | |
|
1463 |
it = chunker.compress(b |
|
|
1474 | it = chunker.compress(b"baz" * 30) | |
|
1464 | 1475 | |
|
1465 | 1476 | with self.assertRaises(StopIteration): |
|
1466 | 1477 | next(it) |
|
1467 | 1478 | |
|
1468 | 1479 | it = chunker.finish() |
|
1469 | 1480 | |
|
1470 |
self.assertEqual( |
|
|
1471 | b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f' | |
|
1472 | b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e') | |
|
1481 | self.assertEqual( | |
|
1482 | next(it), | |
|
1483 | b"\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f" | |
|
1484 | b"\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e", | |
|
1485 | ) | |
|
1473 | 1486 | |
|
1474 | 1487 | with self.assertRaises(StopIteration): |
|
1475 | 1488 | next(it) |
@@ -1478,57 +1491,60 class TestCompressor_chunker(unittest.Te | |||
|
1478 | 1491 | cctx = zstd.ZstdCompressor() |
|
1479 | 1492 | chunker = cctx.chunker(size=1024) |
|
1480 | 1493 | |
|
1481 |
it = chunker.compress(b |
|
|
1494 | it = chunker.compress(b"x" * 1000) | |
|
1482 | 1495 | |
|
1483 | 1496 | with self.assertRaises(StopIteration): |
|
1484 | 1497 | next(it) |
|
1485 | 1498 | |
|
1486 |
it = chunker.compress(b |
|
|
1499 | it = chunker.compress(b"y" * 24) | |
|
1487 | 1500 | |
|
1488 | 1501 | with self.assertRaises(StopIteration): |
|
1489 | 1502 | next(it) |
|
1490 | 1503 | |
|
1491 | 1504 | chunks = list(chunker.finish()) |
|
1492 | 1505 | |
|
1493 |
self.assertEqual( |
|
|
1494 | b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00' | |
|
1495 | b'\xa0\x16\xe3\x2b\x80\x05' | |
|
1496 | ]) | |
|
1506 | self.assertEqual( | |
|
1507 | chunks, | |
|
1508 | [ | |
|
1509 | b"\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00" | |
|
1510 | b"\xa0\x16\xe3\x2b\x80\x05" | |
|
1511 | ], | |
|
1512 | ) | |
|
1497 | 1513 | |
|
1498 | 1514 | dctx = zstd.ZstdDecompressor() |
|
1499 | 1515 | |
|
1500 |
self.assertEqual(dctx.decompress(b |
|
|
1501 | (b'x' * 1000) + (b'y' * 24)) | |
|
1516 | self.assertEqual(dctx.decompress(b"".join(chunks)), (b"x" * 1000) + (b"y" * 24)) | |
|
1502 | 1517 | |
|
1503 | 1518 | def test_small_chunk_size(self): |
|
1504 | 1519 | cctx = zstd.ZstdCompressor() |
|
1505 | 1520 | chunker = cctx.chunker(chunk_size=1) |
|
1506 | 1521 | |
|
1507 |
chunks = list(chunker.compress(b |
|
|
1522 | chunks = list(chunker.compress(b"foo" * 1024)) | |
|
1508 | 1523 | self.assertEqual(chunks, []) |
|
1509 | 1524 | |
|
1510 | 1525 | chunks = list(chunker.finish()) |
|
1511 | 1526 | self.assertTrue(all(len(chunk) == 1 for chunk in chunks)) |
|
1512 | 1527 | |
|
1513 | 1528 | self.assertEqual( |
|
1514 |
b |
|
|
1515 |
b |
|
|
1516 |
b |
|
|
1529 | b"".join(chunks), | |
|
1530 | b"\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00" | |
|
1531 | b"\xfa\xd3\x77\x43", | |
|
1532 | ) | |
|
1517 | 1533 | |
|
1518 | 1534 | dctx = zstd.ZstdDecompressor() |
|
1519 |
self.assertEqual( |
|
|
1520 | max_output_size=10000), | |
|
1521 | b'foo' * 1024) | |
|
1535 | self.assertEqual( | |
|
1536 | dctx.decompress(b"".join(chunks), max_output_size=10000), b"foo" * 1024 | |
|
1537 | ) | |
|
1522 | 1538 | |
|
1523 | 1539 | def test_input_types(self): |
|
1524 | 1540 | cctx = zstd.ZstdCompressor() |
|
1525 | 1541 | |
|
1526 | 1542 | mutable_array = bytearray(3) |
|
1527 |
mutable_array[:] = b |
|
|
1543 | mutable_array[:] = b"foo" | |
|
1528 | 1544 | |
|
1529 | 1545 | sources = [ |
|
1530 |
memoryview(b |
|
|
1531 |
bytearray(b |
|
|
1546 | memoryview(b"foo"), | |
|
1547 | bytearray(b"foo"), | |
|
1532 | 1548 | mutable_array, |
|
1533 | 1549 | ] |
|
1534 | 1550 | |
@@ -1536,28 +1552,32 class TestCompressor_chunker(unittest.Te | |||
|
1536 | 1552 | chunker = cctx.chunker() |
|
1537 | 1553 | |
|
1538 | 1554 | self.assertEqual(list(chunker.compress(source)), []) |
|
1539 |
self.assertEqual( |
|
|
1540 | b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f' | |
|
1541 | ]) | |
|
1555 | self.assertEqual( | |
|
1556 | list(chunker.finish()), | |
|
1557 | [b"\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f"], | |
|
1558 | ) | |
|
1542 | 1559 | |
|
1543 | 1560 | def test_flush(self): |
|
1544 | 1561 | cctx = zstd.ZstdCompressor() |
|
1545 | 1562 | chunker = cctx.chunker() |
|
1546 | 1563 | |
|
1547 |
self.assertEqual(list(chunker.compress(b |
|
|
1548 |
self.assertEqual(list(chunker.compress(b |
|
|
1564 | self.assertEqual(list(chunker.compress(b"foo" * 1024)), []) | |
|
1565 | self.assertEqual(list(chunker.compress(b"bar" * 1024)), []) | |
|
1549 | 1566 | |
|
1550 | 1567 | chunks1 = list(chunker.flush()) |
|
1551 | 1568 | |
|
1552 |
self.assertEqual( |
|
|
1553 | b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72' | |
|
1554 | b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02' | |
|
1555 | ]) | |
|
1569 | self.assertEqual( | |
|
1570 | chunks1, | |
|
1571 | [ | |
|
1572 | b"\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72" | |
|
1573 | b"\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02" | |
|
1574 | ], | |
|
1575 | ) | |
|
1556 | 1576 | |
|
1557 | 1577 | self.assertEqual(list(chunker.flush()), []) |
|
1558 | 1578 | self.assertEqual(list(chunker.flush()), []) |
|
1559 | 1579 | |
|
1560 |
self.assertEqual(list(chunker.compress(b |
|
|
1580 | self.assertEqual(list(chunker.compress(b"baz" * 1024)), []) | |
|
1561 | 1581 | |
|
1562 | 1582 | chunks2 = list(chunker.flush()) |
|
1563 | 1583 | self.assertEqual(len(chunks2), 1) |
@@ -1567,53 +1587,56 class TestCompressor_chunker(unittest.Te | |||
|
1567 | 1587 | |
|
1568 | 1588 | dctx = zstd.ZstdDecompressor() |
|
1569 | 1589 | |
|
1570 | self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3), | |
|
1571 | max_output_size=10000), | |
|
1572 | (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024)) | |
|
1590 | self.assertEqual( | |
|
1591 | dctx.decompress( | |
|
1592 | b"".join(chunks1 + chunks2 + chunks3), max_output_size=10000 | |
|
1593 | ), | |
|
1594 | (b"foo" * 1024) + (b"bar" * 1024) + (b"baz" * 1024), | |
|
1595 | ) | |
|
1573 | 1596 | |
|
1574 | 1597 | def test_compress_after_finish(self): |
|
1575 | 1598 | cctx = zstd.ZstdCompressor() |
|
1576 | 1599 | chunker = cctx.chunker() |
|
1577 | 1600 | |
|
1578 |
list(chunker.compress(b |
|
|
1601 | list(chunker.compress(b"foo")) | |
|
1579 | 1602 | list(chunker.finish()) |
|
1580 | 1603 | |
|
1581 |
with self.assertRaisesRegex |
|
|
1582 | zstd.ZstdError, | |
|
1583 | r'cannot call compress\(\) after compression finished'): | |
|
1584 |
list(chunker.compress(b |
|
|
1604 | with self.assertRaisesRegex( | |
|
1605 | zstd.ZstdError, r"cannot call compress\(\) after compression finished" | |
|
1606 | ): | |
|
1607 | list(chunker.compress(b"foo")) | |
|
1585 | 1608 | |
|
1586 | 1609 | def test_flush_after_finish(self): |
|
1587 | 1610 | cctx = zstd.ZstdCompressor() |
|
1588 | 1611 | chunker = cctx.chunker() |
|
1589 | 1612 | |
|
1590 |
list(chunker.compress(b |
|
|
1613 | list(chunker.compress(b"foo")) | |
|
1591 | 1614 | list(chunker.finish()) |
|
1592 | 1615 | |
|
1593 |
with self.assertRaisesRegex |
|
|
1594 | zstd.ZstdError, | |
|
1595 | r'cannot call flush\(\) after compression finished'): | |
|
1616 | with self.assertRaisesRegex( | |
|
1617 | zstd.ZstdError, r"cannot call flush\(\) after compression finished" | |
|
1618 | ): | |
|
1596 | 1619 | list(chunker.flush()) |
|
1597 | 1620 | |
|
1598 | 1621 | def test_finish_after_finish(self): |
|
1599 | 1622 | cctx = zstd.ZstdCompressor() |
|
1600 | 1623 | chunker = cctx.chunker() |
|
1601 | 1624 | |
|
1602 |
list(chunker.compress(b |
|
|
1625 | list(chunker.compress(b"foo")) | |
|
1603 | 1626 | list(chunker.finish()) |
|
1604 | 1627 | |
|
1605 |
with self.assertRaisesRegex |
|
|
1606 | zstd.ZstdError, | |
|
1607 | r'cannot call finish\(\) after compression finished'): | |
|
1628 | with self.assertRaisesRegex( | |
|
1629 | zstd.ZstdError, r"cannot call finish\(\) after compression finished" | |
|
1630 | ): | |
|
1608 | 1631 | list(chunker.finish()) |
|
1609 | 1632 | |
|
1610 | 1633 | |
|
1611 |
class TestCompressor_multi_compress_to_buffer( |
|
|
1634 | class TestCompressor_multi_compress_to_buffer(TestCase): | |
|
1612 | 1635 | def test_invalid_inputs(self): |
|
1613 | 1636 | cctx = zstd.ZstdCompressor() |
|
1614 | 1637 | |
|
1615 |
if not hasattr(cctx, |
|
|
1616 |
self.skipTest( |
|
|
1638 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1639 | self.skipTest("multi_compress_to_buffer not available") | |
|
1617 | 1640 | |
|
1618 | 1641 | with self.assertRaises(TypeError): |
|
1619 | 1642 | cctx.multi_compress_to_buffer(True) |
@@ -1621,28 +1644,28 class TestCompressor_multi_compress_to_b | |||
|
1621 | 1644 | with self.assertRaises(TypeError): |
|
1622 | 1645 | cctx.multi_compress_to_buffer((1, 2)) |
|
1623 | 1646 | |
|
1624 |
with self.assertRaisesRegex |
|
|
1625 |
cctx.multi_compress_to_buffer([u |
|
|
1647 | with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): | |
|
1648 | cctx.multi_compress_to_buffer([u"foo"]) | |
|
1626 | 1649 | |
|
1627 | 1650 | def test_empty_input(self): |
|
1628 | 1651 | cctx = zstd.ZstdCompressor() |
|
1629 | 1652 | |
|
1630 |
if not hasattr(cctx, |
|
|
1631 |
self.skipTest( |
|
|
1653 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1654 | self.skipTest("multi_compress_to_buffer not available") | |
|
1632 | 1655 | |
|
1633 |
with self.assertRaisesRegex |
|
|
1656 | with self.assertRaisesRegex(ValueError, "no source elements found"): | |
|
1634 | 1657 | cctx.multi_compress_to_buffer([]) |
|
1635 | 1658 | |
|
1636 |
with self.assertRaisesRegex |
|
|
1637 |
cctx.multi_compress_to_buffer([b |
|
|
1659 | with self.assertRaisesRegex(ValueError, "source elements are empty"): | |
|
1660 | cctx.multi_compress_to_buffer([b"", b"", b""]) | |
|
1638 | 1661 | |
|
1639 | 1662 | def test_list_input(self): |
|
1640 | 1663 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1641 | 1664 | |
|
1642 |
if not hasattr(cctx, |
|
|
1643 |
self.skipTest( |
|
|
1665 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1666 | self.skipTest("multi_compress_to_buffer not available") | |
|
1644 | 1667 | |
|
1645 |
original = [b |
|
|
1668 | original = [b"foo" * 12, b"bar" * 6] | |
|
1646 | 1669 | frames = [cctx.compress(c) for c in original] |
|
1647 | 1670 | b = cctx.multi_compress_to_buffer(original) |
|
1648 | 1671 | |
@@ -1657,15 +1680,16 class TestCompressor_multi_compress_to_b | |||
|
1657 | 1680 | def test_buffer_with_segments_input(self): |
|
1658 | 1681 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1659 | 1682 | |
|
1660 |
if not hasattr(cctx, |
|
|
1661 |
self.skipTest( |
|
|
1683 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1684 | self.skipTest("multi_compress_to_buffer not available") | |
|
1662 | 1685 | |
|
1663 |
original = [b |
|
|
1686 | original = [b"foo" * 4, b"bar" * 6] | |
|
1664 | 1687 | frames = [cctx.compress(c) for c in original] |
|
1665 | 1688 | |
|
1666 |
offsets = struct.pack( |
|
|
1667 |
|
|
|
1668 | segments = zstd.BufferWithSegments(b''.join(original), offsets) | |
|
1689 | offsets = struct.pack( | |
|
1690 | "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) | |
|
1691 | ) | |
|
1692 | segments = zstd.BufferWithSegments(b"".join(original), offsets) | |
|
1669 | 1693 | |
|
1670 | 1694 | result = cctx.multi_compress_to_buffer(segments) |
|
1671 | 1695 | |
@@ -1678,28 +1702,39 class TestCompressor_multi_compress_to_b | |||
|
1678 | 1702 | def test_buffer_with_segments_collection_input(self): |
|
1679 | 1703 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1680 | 1704 | |
|
1681 |
if not hasattr(cctx, |
|
|
1682 |
self.skipTest( |
|
|
1705 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1706 | self.skipTest("multi_compress_to_buffer not available") | |
|
1683 | 1707 | |
|
1684 | 1708 | original = [ |
|
1685 |
b |
|
|
1686 |
b |
|
|
1687 |
b |
|
|
1688 |
b |
|
|
1689 |
b |
|
|
1709 | b"foo1", | |
|
1710 | b"foo2" * 2, | |
|
1711 | b"foo3" * 3, | |
|
1712 | b"foo4" * 4, | |
|
1713 | b"foo5" * 5, | |
|
1690 | 1714 | ] |
|
1691 | 1715 | |
|
1692 | 1716 | frames = [cctx.compress(c) for c in original] |
|
1693 | 1717 | |
|
1694 |
b = b |
|
|
1695 |
b1 = zstd.BufferWithSegments( |
|
|
1696 | 0, len(original[0]), | |
|
1697 | len(original[0]), len(original[1]))) | |
|
1698 |
|
|
|
1699 | b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', | |
|
1700 | 0, len(original[2]), | |
|
1701 | len(original[2]), len(original[3]), | |
|
1702 | len(original[2]) + len(original[3]), len(original[4]))) | |
|
1718 | b = b"".join([original[0], original[1]]) | |
|
1719 | b1 = zstd.BufferWithSegments( | |
|
1720 | b, | |
|
1721 | struct.pack( | |
|
1722 | "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) | |
|
1723 | ), | |
|
1724 | ) | |
|
1725 | b = b"".join([original[2], original[3], original[4]]) | |
|
1726 | b2 = zstd.BufferWithSegments( | |
|
1727 | b, | |
|
1728 | struct.pack( | |
|
1729 | "=QQQQQQ", | |
|
1730 | 0, | |
|
1731 | len(original[2]), | |
|
1732 | len(original[2]), | |
|
1733 | len(original[3]), | |
|
1734 | len(original[2]) + len(original[3]), | |
|
1735 | len(original[4]), | |
|
1736 | ), | |
|
1737 | ) | |
|
1703 | 1738 | |
|
1704 | 1739 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1705 | 1740 | |
@@ -1714,16 +1749,16 class TestCompressor_multi_compress_to_b | |||
|
1714 | 1749 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will |
|
1715 | 1750 | # make output different. |
|
1716 | 1751 | refcctx = zstd.ZstdCompressor(write_checksum=True) |
|
1717 |
reference = [refcctx.compress(b |
|
|
1752 | reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)] | |
|
1718 | 1753 | |
|
1719 | 1754 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1720 | 1755 | |
|
1721 |
if not hasattr(cctx, |
|
|
1722 |
self.skipTest( |
|
|
1756 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1757 | self.skipTest("multi_compress_to_buffer not available") | |
|
1723 | 1758 | |
|
1724 | 1759 | frames = [] |
|
1725 |
frames.extend(b |
|
|
1726 |
frames.extend(b |
|
|
1760 | frames.extend(b"x" * 64 for i in range(256)) | |
|
1761 | frames.extend(b"y" * 64 for i in range(256)) | |
|
1727 | 1762 | |
|
1728 | 1763 | result = cctx.multi_compress_to_buffer(frames, threads=-1) |
|
1729 | 1764 |
This diff has been collapsed as it changes many lines, (631 lines changed) Show them Hide them | |||
@@ -6,28 +6,31 try: | |||
|
6 | 6 | import hypothesis |
|
7 | 7 | import hypothesis.strategies as strategies |
|
8 | 8 | except ImportError: |
|
9 |
raise unittest.SkipTest( |
|
|
9 | raise unittest.SkipTest("hypothesis not available") | |
|
10 | 10 | |
|
11 | 11 | import zstandard as zstd |
|
12 | 12 | |
|
13 |
from . |
|
|
13 | from .common import ( | |
|
14 | 14 | make_cffi, |
|
15 | 15 | NonClosingBytesIO, |
|
16 | 16 | random_input_data, |
|
17 | TestCase, | |
|
17 | 18 | ) |
|
18 | 19 | |
|
19 | 20 | |
|
20 |
@unittest.skipUnless( |
|
|
21 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
21 | 22 | @make_cffi |
|
22 |
class TestCompressor_stream_reader_fuzzing( |
|
|
23 | class TestCompressor_stream_reader_fuzzing(TestCase): | |
|
23 | 24 | @hypothesis.settings( |
|
24 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
25 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
26 | level=strategies.integers(min_value=1, max_value=5), | |
|
27 | source_read_size=strategies.integers(1, 16384), | |
|
28 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
29 | def test_stream_source_read(self, original, level, source_read_size, | |
|
30 | read_size): | |
|
25 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
26 | ) | |
|
27 | @hypothesis.given( | |
|
28 | original=strategies.sampled_from(random_input_data()), | |
|
29 | level=strategies.integers(min_value=1, max_value=5), | |
|
30 | source_read_size=strategies.integers(1, 16384), | |
|
31 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
32 | ) | |
|
33 | def test_stream_source_read(self, original, level, source_read_size, read_size): | |
|
31 | 34 | if read_size == 0: |
|
32 | 35 | read_size = -1 |
|
33 | 36 | |
@@ -35,8 +38,9 class TestCompressor_stream_reader_fuzzi | |||
|
35 | 38 | ref_frame = refctx.compress(original) |
|
36 | 39 | |
|
37 | 40 | cctx = zstd.ZstdCompressor(level=level) |
|
38 |
with cctx.stream_reader( |
|
|
39 | read_size=source_read_size) as reader: | |
|
41 | with cctx.stream_reader( | |
|
42 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
43 | ) as reader: | |
|
40 | 44 | chunks = [] |
|
41 | 45 | while True: |
|
42 | 46 | chunk = reader.read(read_size) |
@@ -45,16 +49,18 class TestCompressor_stream_reader_fuzzi | |||
|
45 | 49 | |
|
46 | 50 | chunks.append(chunk) |
|
47 | 51 | |
|
48 |
self.assertEqual(b |
|
|
52 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
49 | 53 | |
|
50 | 54 | @hypothesis.settings( |
|
51 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
52 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
53 | level=strategies.integers(min_value=1, max_value=5), | |
|
54 | source_read_size=strategies.integers(1, 16384), | |
|
55 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
56 | def test_buffer_source_read(self, original, level, source_read_size, | |
|
57 | read_size): | |
|
55 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
56 | ) | |
|
57 | @hypothesis.given( | |
|
58 | original=strategies.sampled_from(random_input_data()), | |
|
59 | level=strategies.integers(min_value=1, max_value=5), | |
|
60 | source_read_size=strategies.integers(1, 16384), | |
|
61 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
62 | ) | |
|
63 | def test_buffer_source_read(self, original, level, source_read_size, read_size): | |
|
58 | 64 | if read_size == 0: |
|
59 | 65 | read_size = -1 |
|
60 | 66 | |
@@ -62,8 +68,9 class TestCompressor_stream_reader_fuzzi | |||
|
62 | 68 | ref_frame = refctx.compress(original) |
|
63 | 69 | |
|
64 | 70 | cctx = zstd.ZstdCompressor(level=level) |
|
65 |
with cctx.stream_reader( |
|
|
66 | read_size=source_read_size) as reader: | |
|
71 | with cctx.stream_reader( | |
|
72 | original, size=len(original), read_size=source_read_size | |
|
73 | ) as reader: | |
|
67 | 74 | chunks = [] |
|
68 | 75 | while True: |
|
69 | 76 | chunk = reader.read(read_size) |
@@ -72,22 +79,30 class TestCompressor_stream_reader_fuzzi | |||
|
72 | 79 | |
|
73 | 80 | chunks.append(chunk) |
|
74 | 81 | |
|
75 |
self.assertEqual(b |
|
|
82 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
76 | 83 | |
|
77 | 84 | @hypothesis.settings( |
|
78 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
79 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
80 | level=strategies.integers(min_value=1, max_value=5), | |
|
81 | source_read_size=strategies.integers(1, 16384), | |
|
82 | read_sizes=strategies.data()) | |
|
83 | def test_stream_source_read_variance(self, original, level, source_read_size, | |
|
84 | read_sizes): | |
|
85 | suppress_health_check=[ | |
|
86 | hypothesis.HealthCheck.large_base_example, | |
|
87 | hypothesis.HealthCheck.too_slow, | |
|
88 | ] | |
|
89 | ) | |
|
90 | @hypothesis.given( | |
|
91 | original=strategies.sampled_from(random_input_data()), | |
|
92 | level=strategies.integers(min_value=1, max_value=5), | |
|
93 | source_read_size=strategies.integers(1, 16384), | |
|
94 | read_sizes=strategies.data(), | |
|
95 | ) | |
|
96 | def test_stream_source_read_variance( | |
|
97 | self, original, level, source_read_size, read_sizes | |
|
98 | ): | |
|
85 | 99 | refctx = zstd.ZstdCompressor(level=level) |
|
86 | 100 | ref_frame = refctx.compress(original) |
|
87 | 101 | |
|
88 | 102 | cctx = zstd.ZstdCompressor(level=level) |
|
89 |
with cctx.stream_reader( |
|
|
90 | read_size=source_read_size) as reader: | |
|
103 | with cctx.stream_reader( | |
|
104 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
105 | ) as reader: | |
|
91 | 106 | chunks = [] |
|
92 | 107 | while True: |
|
93 | 108 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
@@ -97,23 +112,31 class TestCompressor_stream_reader_fuzzi | |||
|
97 | 112 | |
|
98 | 113 | chunks.append(chunk) |
|
99 | 114 | |
|
100 |
self.assertEqual(b |
|
|
115 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
101 | 116 | |
|
102 | 117 | @hypothesis.settings( |
|
103 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
104 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
105 | level=strategies.integers(min_value=1, max_value=5), | |
|
106 | source_read_size=strategies.integers(1, 16384), | |
|
107 | read_sizes=strategies.data()) | |
|
108 | def test_buffer_source_read_variance(self, original, level, source_read_size, | |
|
109 | read_sizes): | |
|
118 | suppress_health_check=[ | |
|
119 | hypothesis.HealthCheck.large_base_example, | |
|
120 | hypothesis.HealthCheck.too_slow, | |
|
121 | ] | |
|
122 | ) | |
|
123 | @hypothesis.given( | |
|
124 | original=strategies.sampled_from(random_input_data()), | |
|
125 | level=strategies.integers(min_value=1, max_value=5), | |
|
126 | source_read_size=strategies.integers(1, 16384), | |
|
127 | read_sizes=strategies.data(), | |
|
128 | ) | |
|
129 | def test_buffer_source_read_variance( | |
|
130 | self, original, level, source_read_size, read_sizes | |
|
131 | ): | |
|
110 | 132 | |
|
111 | 133 | refctx = zstd.ZstdCompressor(level=level) |
|
112 | 134 | ref_frame = refctx.compress(original) |
|
113 | 135 | |
|
114 | 136 | cctx = zstd.ZstdCompressor(level=level) |
|
115 |
with cctx.stream_reader( |
|
|
116 | read_size=source_read_size) as reader: | |
|
137 | with cctx.stream_reader( | |
|
138 | original, size=len(original), read_size=source_read_size | |
|
139 | ) as reader: | |
|
117 | 140 | chunks = [] |
|
118 | 141 | while True: |
|
119 | 142 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
@@ -123,22 +146,25 class TestCompressor_stream_reader_fuzzi | |||
|
123 | 146 | |
|
124 | 147 | chunks.append(chunk) |
|
125 | 148 | |
|
126 |
self.assertEqual(b |
|
|
149 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
127 | 150 | |
|
128 | 151 | @hypothesis.settings( |
|
129 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
130 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
131 | level=strategies.integers(min_value=1, max_value=5), | |
|
132 | source_read_size=strategies.integers(1, 16384), | |
|
133 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
134 | def test_stream_source_readinto(self, original, level, | |
|
135 | source_read_size, read_size): | |
|
152 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
153 | ) | |
|
154 | @hypothesis.given( | |
|
155 | original=strategies.sampled_from(random_input_data()), | |
|
156 | level=strategies.integers(min_value=1, max_value=5), | |
|
157 | source_read_size=strategies.integers(1, 16384), | |
|
158 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
159 | ) | |
|
160 | def test_stream_source_readinto(self, original, level, source_read_size, read_size): | |
|
136 | 161 | refctx = zstd.ZstdCompressor(level=level) |
|
137 | 162 | ref_frame = refctx.compress(original) |
|
138 | 163 | |
|
139 | 164 | cctx = zstd.ZstdCompressor(level=level) |
|
140 |
with cctx.stream_reader( |
|
|
141 | read_size=source_read_size) as reader: | |
|
165 | with cctx.stream_reader( | |
|
166 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
167 | ) as reader: | |
|
142 | 168 | chunks = [] |
|
143 | 169 | while True: |
|
144 | 170 | b = bytearray(read_size) |
@@ -149,23 +175,26 class TestCompressor_stream_reader_fuzzi | |||
|
149 | 175 | |
|
150 | 176 | chunks.append(bytes(b[0:count])) |
|
151 | 177 | |
|
152 |
self.assertEqual(b |
|
|
178 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
153 | 179 | |
|
154 | 180 | @hypothesis.settings( |
|
155 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
156 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
157 | level=strategies.integers(min_value=1, max_value=5), | |
|
158 | source_read_size=strategies.integers(1, 16384), | |
|
159 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
160 | def test_buffer_source_readinto(self, original, level, | |
|
161 | source_read_size, read_size): | |
|
181 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
182 | ) | |
|
183 | @hypothesis.given( | |
|
184 | original=strategies.sampled_from(random_input_data()), | |
|
185 | level=strategies.integers(min_value=1, max_value=5), | |
|
186 | source_read_size=strategies.integers(1, 16384), | |
|
187 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
188 | ) | |
|
189 | def test_buffer_source_readinto(self, original, level, source_read_size, read_size): | |
|
162 | 190 | |
|
163 | 191 | refctx = zstd.ZstdCompressor(level=level) |
|
164 | 192 | ref_frame = refctx.compress(original) |
|
165 | 193 | |
|
166 | 194 | cctx = zstd.ZstdCompressor(level=level) |
|
167 |
with cctx.stream_reader( |
|
|
168 | read_size=source_read_size) as reader: | |
|
195 | with cctx.stream_reader( | |
|
196 | original, size=len(original), read_size=source_read_size | |
|
197 | ) as reader: | |
|
169 | 198 | chunks = [] |
|
170 | 199 | while True: |
|
171 | 200 | b = bytearray(read_size) |
@@ -176,22 +205,30 class TestCompressor_stream_reader_fuzzi | |||
|
176 | 205 | |
|
177 | 206 | chunks.append(bytes(b[0:count])) |
|
178 | 207 | |
|
179 |
self.assertEqual(b |
|
|
208 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
180 | 209 | |
|
181 | 210 | @hypothesis.settings( |
|
182 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
183 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
184 | level=strategies.integers(min_value=1, max_value=5), | |
|
185 | source_read_size=strategies.integers(1, 16384), | |
|
186 | read_sizes=strategies.data()) | |
|
187 | def test_stream_source_readinto_variance(self, original, level, | |
|
188 | source_read_size, read_sizes): | |
|
211 | suppress_health_check=[ | |
|
212 | hypothesis.HealthCheck.large_base_example, | |
|
213 | hypothesis.HealthCheck.too_slow, | |
|
214 | ] | |
|
215 | ) | |
|
216 | @hypothesis.given( | |
|
217 | original=strategies.sampled_from(random_input_data()), | |
|
218 | level=strategies.integers(min_value=1, max_value=5), | |
|
219 | source_read_size=strategies.integers(1, 16384), | |
|
220 | read_sizes=strategies.data(), | |
|
221 | ) | |
|
222 | def test_stream_source_readinto_variance( | |
|
223 | self, original, level, source_read_size, read_sizes | |
|
224 | ): | |
|
189 | 225 | refctx = zstd.ZstdCompressor(level=level) |
|
190 | 226 | ref_frame = refctx.compress(original) |
|
191 | 227 | |
|
192 | 228 | cctx = zstd.ZstdCompressor(level=level) |
|
193 |
with cctx.stream_reader( |
|
|
194 | read_size=source_read_size) as reader: | |
|
229 | with cctx.stream_reader( | |
|
230 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
231 | ) as reader: | |
|
195 | 232 | chunks = [] |
|
196 | 233 | while True: |
|
197 | 234 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
@@ -203,23 +240,31 class TestCompressor_stream_reader_fuzzi | |||
|
203 | 240 | |
|
204 | 241 | chunks.append(bytes(b[0:count])) |
|
205 | 242 | |
|
206 |
self.assertEqual(b |
|
|
243 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
207 | 244 | |
|
208 | 245 | @hypothesis.settings( |
|
209 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
210 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
211 | level=strategies.integers(min_value=1, max_value=5), | |
|
212 | source_read_size=strategies.integers(1, 16384), | |
|
213 | read_sizes=strategies.data()) | |
|
214 | def test_buffer_source_readinto_variance(self, original, level, | |
|
215 | source_read_size, read_sizes): | |
|
246 | suppress_health_check=[ | |
|
247 | hypothesis.HealthCheck.large_base_example, | |
|
248 | hypothesis.HealthCheck.too_slow, | |
|
249 | ] | |
|
250 | ) | |
|
251 | @hypothesis.given( | |
|
252 | original=strategies.sampled_from(random_input_data()), | |
|
253 | level=strategies.integers(min_value=1, max_value=5), | |
|
254 | source_read_size=strategies.integers(1, 16384), | |
|
255 | read_sizes=strategies.data(), | |
|
256 | ) | |
|
257 | def test_buffer_source_readinto_variance( | |
|
258 | self, original, level, source_read_size, read_sizes | |
|
259 | ): | |
|
216 | 260 | |
|
217 | 261 | refctx = zstd.ZstdCompressor(level=level) |
|
218 | 262 | ref_frame = refctx.compress(original) |
|
219 | 263 | |
|
220 | 264 | cctx = zstd.ZstdCompressor(level=level) |
|
221 |
with cctx.stream_reader( |
|
|
222 | read_size=source_read_size) as reader: | |
|
265 | with cctx.stream_reader( | |
|
266 | original, size=len(original), read_size=source_read_size | |
|
267 | ) as reader: | |
|
223 | 268 | chunks = [] |
|
224 | 269 | while True: |
|
225 | 270 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
@@ -231,16 +276,18 class TestCompressor_stream_reader_fuzzi | |||
|
231 | 276 | |
|
232 | 277 | chunks.append(bytes(b[0:count])) |
|
233 | 278 | |
|
234 |
self.assertEqual(b |
|
|
279 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
235 | 280 | |
|
236 | 281 | @hypothesis.settings( |
|
237 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
238 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
239 | level=strategies.integers(min_value=1, max_value=5), | |
|
240 | source_read_size=strategies.integers(1, 16384), | |
|
241 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
242 | def test_stream_source_read1(self, original, level, source_read_size, | |
|
243 | read_size): | |
|
282 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
283 | ) | |
|
284 | @hypothesis.given( | |
|
285 | original=strategies.sampled_from(random_input_data()), | |
|
286 | level=strategies.integers(min_value=1, max_value=5), | |
|
287 | source_read_size=strategies.integers(1, 16384), | |
|
288 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
289 | ) | |
|
290 | def test_stream_source_read1(self, original, level, source_read_size, read_size): | |
|
244 | 291 | if read_size == 0: |
|
245 | 292 | read_size = -1 |
|
246 | 293 | |
@@ -248,8 +295,9 class TestCompressor_stream_reader_fuzzi | |||
|
248 | 295 | ref_frame = refctx.compress(original) |
|
249 | 296 | |
|
250 | 297 | cctx = zstd.ZstdCompressor(level=level) |
|
251 |
with cctx.stream_reader( |
|
|
252 | read_size=source_read_size) as reader: | |
|
298 | with cctx.stream_reader( | |
|
299 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
300 | ) as reader: | |
|
253 | 301 | chunks = [] |
|
254 | 302 | while True: |
|
255 | 303 | chunk = reader.read1(read_size) |
@@ -258,16 +306,18 class TestCompressor_stream_reader_fuzzi | |||
|
258 | 306 | |
|
259 | 307 | chunks.append(chunk) |
|
260 | 308 | |
|
261 |
self.assertEqual(b |
|
|
309 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
262 | 310 | |
|
263 | 311 | @hypothesis.settings( |
|
264 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
265 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
266 | level=strategies.integers(min_value=1, max_value=5), | |
|
267 | source_read_size=strategies.integers(1, 16384), | |
|
268 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
269 | def test_buffer_source_read1(self, original, level, source_read_size, | |
|
270 | read_size): | |
|
312 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
313 | ) | |
|
314 | @hypothesis.given( | |
|
315 | original=strategies.sampled_from(random_input_data()), | |
|
316 | level=strategies.integers(min_value=1, max_value=5), | |
|
317 | source_read_size=strategies.integers(1, 16384), | |
|
318 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
319 | ) | |
|
320 | def test_buffer_source_read1(self, original, level, source_read_size, read_size): | |
|
271 | 321 | if read_size == 0: |
|
272 | 322 | read_size = -1 |
|
273 | 323 | |
@@ -275,8 +325,9 class TestCompressor_stream_reader_fuzzi | |||
|
275 | 325 | ref_frame = refctx.compress(original) |
|
276 | 326 | |
|
277 | 327 | cctx = zstd.ZstdCompressor(level=level) |
|
278 |
with cctx.stream_reader( |
|
|
279 | read_size=source_read_size) as reader: | |
|
328 | with cctx.stream_reader( | |
|
329 | original, size=len(original), read_size=source_read_size | |
|
330 | ) as reader: | |
|
280 | 331 | chunks = [] |
|
281 | 332 | while True: |
|
282 | 333 | chunk = reader.read1(read_size) |
@@ -285,22 +336,30 class TestCompressor_stream_reader_fuzzi | |||
|
285 | 336 | |
|
286 | 337 | chunks.append(chunk) |
|
287 | 338 | |
|
288 |
self.assertEqual(b |
|
|
339 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
289 | 340 | |
|
290 | 341 | @hypothesis.settings( |
|
291 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
292 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
293 | level=strategies.integers(min_value=1, max_value=5), | |
|
294 | source_read_size=strategies.integers(1, 16384), | |
|
295 | read_sizes=strategies.data()) | |
|
296 | def test_stream_source_read1_variance(self, original, level, source_read_size, | |
|
297 | read_sizes): | |
|
342 | suppress_health_check=[ | |
|
343 | hypothesis.HealthCheck.large_base_example, | |
|
344 | hypothesis.HealthCheck.too_slow, | |
|
345 | ] | |
|
346 | ) | |
|
347 | @hypothesis.given( | |
|
348 | original=strategies.sampled_from(random_input_data()), | |
|
349 | level=strategies.integers(min_value=1, max_value=5), | |
|
350 | source_read_size=strategies.integers(1, 16384), | |
|
351 | read_sizes=strategies.data(), | |
|
352 | ) | |
|
353 | def test_stream_source_read1_variance( | |
|
354 | self, original, level, source_read_size, read_sizes | |
|
355 | ): | |
|
298 | 356 | refctx = zstd.ZstdCompressor(level=level) |
|
299 | 357 | ref_frame = refctx.compress(original) |
|
300 | 358 | |
|
301 | 359 | cctx = zstd.ZstdCompressor(level=level) |
|
302 |
with cctx.stream_reader( |
|
|
303 | read_size=source_read_size) as reader: | |
|
360 | with cctx.stream_reader( | |
|
361 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
362 | ) as reader: | |
|
304 | 363 | chunks = [] |
|
305 | 364 | while True: |
|
306 | 365 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
@@ -310,23 +369,31 class TestCompressor_stream_reader_fuzzi | |||
|
310 | 369 | |
|
311 | 370 | chunks.append(chunk) |
|
312 | 371 | |
|
313 |
self.assertEqual(b |
|
|
372 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
314 | 373 | |
|
315 | 374 | @hypothesis.settings( |
|
316 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
317 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
318 | level=strategies.integers(min_value=1, max_value=5), | |
|
319 | source_read_size=strategies.integers(1, 16384), | |
|
320 | read_sizes=strategies.data()) | |
|
321 | def test_buffer_source_read1_variance(self, original, level, source_read_size, | |
|
322 | read_sizes): | |
|
375 | suppress_health_check=[ | |
|
376 | hypothesis.HealthCheck.large_base_example, | |
|
377 | hypothesis.HealthCheck.too_slow, | |
|
378 | ] | |
|
379 | ) | |
|
380 | @hypothesis.given( | |
|
381 | original=strategies.sampled_from(random_input_data()), | |
|
382 | level=strategies.integers(min_value=1, max_value=5), | |
|
383 | source_read_size=strategies.integers(1, 16384), | |
|
384 | read_sizes=strategies.data(), | |
|
385 | ) | |
|
386 | def test_buffer_source_read1_variance( | |
|
387 | self, original, level, source_read_size, read_sizes | |
|
388 | ): | |
|
323 | 389 | |
|
324 | 390 | refctx = zstd.ZstdCompressor(level=level) |
|
325 | 391 | ref_frame = refctx.compress(original) |
|
326 | 392 | |
|
327 | 393 | cctx = zstd.ZstdCompressor(level=level) |
|
328 |
with cctx.stream_reader( |
|
|
329 | read_size=source_read_size) as reader: | |
|
394 | with cctx.stream_reader( | |
|
395 | original, size=len(original), read_size=source_read_size | |
|
396 | ) as reader: | |
|
330 | 397 | chunks = [] |
|
331 | 398 | while True: |
|
332 | 399 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
@@ -336,17 +403,20 class TestCompressor_stream_reader_fuzzi | |||
|
336 | 403 | |
|
337 | 404 | chunks.append(chunk) |
|
338 | 405 | |
|
339 |
self.assertEqual(b |
|
|
340 | ||
|
406 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
341 | 407 | |
|
342 | 408 | @hypothesis.settings( |
|
343 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
344 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
345 | level=strategies.integers(min_value=1, max_value=5), | |
|
346 | source_read_size=strategies.integers(1, 16384), | |
|
347 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
348 | def test_stream_source_readinto1(self, original, level, source_read_size, | |
|
349 | read_size): | |
|
409 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
410 | ) | |
|
411 | @hypothesis.given( | |
|
412 | original=strategies.sampled_from(random_input_data()), | |
|
413 | level=strategies.integers(min_value=1, max_value=5), | |
|
414 | source_read_size=strategies.integers(1, 16384), | |
|
415 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
416 | ) | |
|
417 | def test_stream_source_readinto1( | |
|
418 | self, original, level, source_read_size, read_size | |
|
419 | ): | |
|
350 | 420 | if read_size == 0: |
|
351 | 421 | read_size = -1 |
|
352 | 422 | |
@@ -354,8 +424,9 class TestCompressor_stream_reader_fuzzi | |||
|
354 | 424 | ref_frame = refctx.compress(original) |
|
355 | 425 | |
|
356 | 426 | cctx = zstd.ZstdCompressor(level=level) |
|
357 |
with cctx.stream_reader( |
|
|
358 | read_size=source_read_size) as reader: | |
|
427 | with cctx.stream_reader( | |
|
428 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
429 | ) as reader: | |
|
359 | 430 | chunks = [] |
|
360 | 431 | while True: |
|
361 | 432 | b = bytearray(read_size) |
@@ -366,16 +437,20 class TestCompressor_stream_reader_fuzzi | |||
|
366 | 437 | |
|
367 | 438 | chunks.append(bytes(b[0:count])) |
|
368 | 439 | |
|
369 |
self.assertEqual(b |
|
|
440 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
370 | 441 | |
|
371 | 442 | @hypothesis.settings( |
|
372 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
373 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
374 | level=strategies.integers(min_value=1, max_value=5), | |
|
375 | source_read_size=strategies.integers(1, 16384), | |
|
376 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
377 | def test_buffer_source_readinto1(self, original, level, source_read_size, | |
|
378 | read_size): | |
|
443 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
444 | ) | |
|
445 | @hypothesis.given( | |
|
446 | original=strategies.sampled_from(random_input_data()), | |
|
447 | level=strategies.integers(min_value=1, max_value=5), | |
|
448 | source_read_size=strategies.integers(1, 16384), | |
|
449 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
450 | ) | |
|
451 | def test_buffer_source_readinto1( | |
|
452 | self, original, level, source_read_size, read_size | |
|
453 | ): | |
|
379 | 454 | if read_size == 0: |
|
380 | 455 | read_size = -1 |
|
381 | 456 | |
@@ -383,8 +458,9 class TestCompressor_stream_reader_fuzzi | |||
|
383 | 458 | ref_frame = refctx.compress(original) |
|
384 | 459 | |
|
385 | 460 | cctx = zstd.ZstdCompressor(level=level) |
|
386 |
with cctx.stream_reader( |
|
|
387 | read_size=source_read_size) as reader: | |
|
461 | with cctx.stream_reader( | |
|
462 | original, size=len(original), read_size=source_read_size | |
|
463 | ) as reader: | |
|
388 | 464 | chunks = [] |
|
389 | 465 | while True: |
|
390 | 466 | b = bytearray(read_size) |
@@ -395,22 +471,30 class TestCompressor_stream_reader_fuzzi | |||
|
395 | 471 | |
|
396 | 472 | chunks.append(bytes(b[0:count])) |
|
397 | 473 | |
|
398 |
self.assertEqual(b |
|
|
474 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
399 | 475 | |
|
400 | 476 | @hypothesis.settings( |
|
401 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
402 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
403 | level=strategies.integers(min_value=1, max_value=5), | |
|
404 | source_read_size=strategies.integers(1, 16384), | |
|
405 | read_sizes=strategies.data()) | |
|
406 | def test_stream_source_readinto1_variance(self, original, level, source_read_size, | |
|
407 | read_sizes): | |
|
477 | suppress_health_check=[ | |
|
478 | hypothesis.HealthCheck.large_base_example, | |
|
479 | hypothesis.HealthCheck.too_slow, | |
|
480 | ] | |
|
481 | ) | |
|
482 | @hypothesis.given( | |
|
483 | original=strategies.sampled_from(random_input_data()), | |
|
484 | level=strategies.integers(min_value=1, max_value=5), | |
|
485 | source_read_size=strategies.integers(1, 16384), | |
|
486 | read_sizes=strategies.data(), | |
|
487 | ) | |
|
488 | def test_stream_source_readinto1_variance( | |
|
489 | self, original, level, source_read_size, read_sizes | |
|
490 | ): | |
|
408 | 491 | refctx = zstd.ZstdCompressor(level=level) |
|
409 | 492 | ref_frame = refctx.compress(original) |
|
410 | 493 | |
|
411 | 494 | cctx = zstd.ZstdCompressor(level=level) |
|
412 |
with cctx.stream_reader( |
|
|
413 | read_size=source_read_size) as reader: | |
|
495 | with cctx.stream_reader( | |
|
496 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
497 | ) as reader: | |
|
414 | 498 | chunks = [] |
|
415 | 499 | while True: |
|
416 | 500 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
@@ -422,23 +506,31 class TestCompressor_stream_reader_fuzzi | |||
|
422 | 506 | |
|
423 | 507 | chunks.append(bytes(b[0:count])) |
|
424 | 508 | |
|
425 |
self.assertEqual(b |
|
|
509 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
426 | 510 | |
|
427 | 511 | @hypothesis.settings( |
|
428 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
429 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
430 | level=strategies.integers(min_value=1, max_value=5), | |
|
431 | source_read_size=strategies.integers(1, 16384), | |
|
432 | read_sizes=strategies.data()) | |
|
433 | def test_buffer_source_readinto1_variance(self, original, level, source_read_size, | |
|
434 | read_sizes): | |
|
512 | suppress_health_check=[ | |
|
513 | hypothesis.HealthCheck.large_base_example, | |
|
514 | hypothesis.HealthCheck.too_slow, | |
|
515 | ] | |
|
516 | ) | |
|
517 | @hypothesis.given( | |
|
518 | original=strategies.sampled_from(random_input_data()), | |
|
519 | level=strategies.integers(min_value=1, max_value=5), | |
|
520 | source_read_size=strategies.integers(1, 16384), | |
|
521 | read_sizes=strategies.data(), | |
|
522 | ) | |
|
523 | def test_buffer_source_readinto1_variance( | |
|
524 | self, original, level, source_read_size, read_sizes | |
|
525 | ): | |
|
435 | 526 | |
|
436 | 527 | refctx = zstd.ZstdCompressor(level=level) |
|
437 | 528 | ref_frame = refctx.compress(original) |
|
438 | 529 | |
|
439 | 530 | cctx = zstd.ZstdCompressor(level=level) |
|
440 |
with cctx.stream_reader( |
|
|
441 | read_size=source_read_size) as reader: | |
|
531 | with cctx.stream_reader( | |
|
532 | original, size=len(original), read_size=source_read_size | |
|
533 | ) as reader: | |
|
442 | 534 | chunks = [] |
|
443 | 535 | while True: |
|
444 | 536 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
@@ -450,35 +542,40 class TestCompressor_stream_reader_fuzzi | |||
|
450 | 542 | |
|
451 | 543 | chunks.append(bytes(b[0:count])) |
|
452 | 544 | |
|
453 |
self.assertEqual(b |
|
|
454 | ||
|
545 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
455 | 546 | |
|
456 | 547 | |
|
457 |
@unittest.skipUnless( |
|
|
548 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
458 | 549 | @make_cffi |
|
459 |
class TestCompressor_stream_writer_fuzzing( |
|
|
460 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
461 | level=strategies.integers(min_value=1, max_value=5), | |
|
462 |
|
|
|
550 | class TestCompressor_stream_writer_fuzzing(TestCase): | |
|
551 | @hypothesis.given( | |
|
552 | original=strategies.sampled_from(random_input_data()), | |
|
553 | level=strategies.integers(min_value=1, max_value=5), | |
|
554 | write_size=strategies.integers(min_value=1, max_value=1048576), | |
|
555 | ) | |
|
463 | 556 | def test_write_size_variance(self, original, level, write_size): |
|
464 | 557 | refctx = zstd.ZstdCompressor(level=level) |
|
465 | 558 | ref_frame = refctx.compress(original) |
|
466 | 559 | |
|
467 | 560 | cctx = zstd.ZstdCompressor(level=level) |
|
468 | 561 | b = NonClosingBytesIO() |
|
469 | with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor: | |
|
562 | with cctx.stream_writer( | |
|
563 | b, size=len(original), write_size=write_size | |
|
564 | ) as compressor: | |
|
470 | 565 | compressor.write(original) |
|
471 | 566 | |
|
472 | 567 | self.assertEqual(b.getvalue(), ref_frame) |
|
473 | 568 | |
|
474 | 569 | |
|
475 |
@unittest.skipUnless( |
|
|
570 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
476 | 571 | @make_cffi |
|
477 |
class TestCompressor_copy_stream_fuzzing( |
|
|
478 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
479 | level=strategies.integers(min_value=1, max_value=5), | |
|
480 |
|
|
|
481 |
|
|
|
572 | class TestCompressor_copy_stream_fuzzing(TestCase): | |
|
573 | @hypothesis.given( | |
|
574 | original=strategies.sampled_from(random_input_data()), | |
|
575 | level=strategies.integers(min_value=1, max_value=5), | |
|
576 | read_size=strategies.integers(min_value=1, max_value=1048576), | |
|
577 | write_size=strategies.integers(min_value=1, max_value=1048576), | |
|
578 | ) | |
|
482 | 579 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
483 | 580 | refctx = zstd.ZstdCompressor(level=level) |
|
484 | 581 | ref_frame = refctx.compress(original) |
@@ -487,20 +584,27 class TestCompressor_copy_stream_fuzzing | |||
|
487 | 584 | source = io.BytesIO(original) |
|
488 | 585 | dest = io.BytesIO() |
|
489 | 586 | |
|
490 | cctx.copy_stream(source, dest, size=len(original), read_size=read_size, | |
|
491 | write_size=write_size) | |
|
587 | cctx.copy_stream( | |
|
588 | source, dest, size=len(original), read_size=read_size, write_size=write_size | |
|
589 | ) | |
|
492 | 590 | |
|
493 | 591 | self.assertEqual(dest.getvalue(), ref_frame) |
|
494 | 592 | |
|
495 | 593 | |
|
496 |
@unittest.skipUnless( |
|
|
594 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
497 | 595 | @make_cffi |
|
498 |
class TestCompressor_compressobj_fuzzing( |
|
|
596 | class TestCompressor_compressobj_fuzzing(TestCase): | |
|
499 | 597 | @hypothesis.settings( |
|
500 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
501 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
502 | level=strategies.integers(min_value=1, max_value=5), | |
|
503 | chunk_sizes=strategies.data()) | |
|
598 | suppress_health_check=[ | |
|
599 | hypothesis.HealthCheck.large_base_example, | |
|
600 | hypothesis.HealthCheck.too_slow, | |
|
601 | ] | |
|
602 | ) | |
|
603 | @hypothesis.given( | |
|
604 | original=strategies.sampled_from(random_input_data()), | |
|
605 | level=strategies.integers(min_value=1, max_value=5), | |
|
606 | chunk_sizes=strategies.data(), | |
|
607 | ) | |
|
504 | 608 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
505 | 609 | refctx = zstd.ZstdCompressor(level=level) |
|
506 | 610 | ref_frame = refctx.compress(original) |
@@ -512,7 +616,7 class TestCompressor_compressobj_fuzzing | |||
|
512 | 616 | i = 0 |
|
513 | 617 | while True: |
|
514 | 618 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
515 | source = original[i:i + chunk_size] | |
|
619 | source = original[i : i + chunk_size] | |
|
516 | 620 | if not source: |
|
517 | 621 | break |
|
518 | 622 | |
@@ -521,14 +625,20 class TestCompressor_compressobj_fuzzing | |||
|
521 | 625 | |
|
522 | 626 | chunks.append(cobj.flush()) |
|
523 | 627 | |
|
524 |
self.assertEqual(b |
|
|
628 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
525 | 629 | |
|
526 | 630 | @hypothesis.settings( |
|
527 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
528 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
529 | level=strategies.integers(min_value=1, max_value=5), | |
|
530 | chunk_sizes=strategies.data(), | |
|
531 | flushes=strategies.data()) | |
|
631 | suppress_health_check=[ | |
|
632 | hypothesis.HealthCheck.large_base_example, | |
|
633 | hypothesis.HealthCheck.too_slow, | |
|
634 | ] | |
|
635 | ) | |
|
636 | @hypothesis.given( | |
|
637 | original=strategies.sampled_from(random_input_data()), | |
|
638 | level=strategies.integers(min_value=1, max_value=5), | |
|
639 | chunk_sizes=strategies.data(), | |
|
640 | flushes=strategies.data(), | |
|
641 | ) | |
|
532 | 642 | def test_flush_block(self, original, level, chunk_sizes, flushes): |
|
533 | 643 | cctx = zstd.ZstdCompressor(level=level) |
|
534 | 644 | cobj = cctx.compressobj() |
@@ -541,7 +651,7 class TestCompressor_compressobj_fuzzing | |||
|
541 | 651 | i = 0 |
|
542 | 652 | while True: |
|
543 | 653 | input_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
544 | source = original[i:i + input_size] | |
|
654 | source = original[i : i + input_size] | |
|
545 | 655 | if not source: |
|
546 | 656 | break |
|
547 | 657 | |
@@ -558,24 +668,28 class TestCompressor_compressobj_fuzzing | |||
|
558 | 668 | compressed_chunks.append(chunk) |
|
559 | 669 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
560 | 670 | |
|
561 |
self.assertEqual(b |
|
|
671 | self.assertEqual(b"".join(decompressed_chunks), original[0:i]) | |
|
562 | 672 | |
|
563 | 673 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH) |
|
564 | 674 | compressed_chunks.append(chunk) |
|
565 | 675 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
566 | 676 | |
|
567 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), | |
|
568 | max_output_size=len(original)), | |
|
569 |
|
|
|
570 | self.assertEqual(b''.join(decompressed_chunks), original) | |
|
677 | self.assertEqual( | |
|
678 | dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), | |
|
679 | original, | |
|
680 | ) | |
|
681 | self.assertEqual(b"".join(decompressed_chunks), original) | |
|
682 | ||
|
571 | 683 | |
|
572 |
@unittest.skipUnless( |
|
|
684 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
573 | 685 | @make_cffi |
|
574 |
class TestCompressor_read_to_iter_fuzzing( |
|
|
575 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
576 | level=strategies.integers(min_value=1, max_value=5), | |
|
577 |
|
|
|
578 |
|
|
|
686 | class TestCompressor_read_to_iter_fuzzing(TestCase): | |
|
687 | @hypothesis.given( | |
|
688 | original=strategies.sampled_from(random_input_data()), | |
|
689 | level=strategies.integers(min_value=1, max_value=5), | |
|
690 | read_size=strategies.integers(min_value=1, max_value=4096), | |
|
691 | write_size=strategies.integers(min_value=1, max_value=4096), | |
|
692 | ) | |
|
579 | 693 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
580 | 694 | refcctx = zstd.ZstdCompressor(level=level) |
|
581 | 695 | ref_frame = refcctx.compress(original) |
@@ -583,32 +697,35 class TestCompressor_read_to_iter_fuzzin | |||
|
583 | 697 | source = io.BytesIO(original) |
|
584 | 698 | |
|
585 | 699 | cctx = zstd.ZstdCompressor(level=level) |
|
586 | chunks = list(cctx.read_to_iter(source, size=len(original), | |
|
587 | read_size=read_size, | |
|
588 | write_size=write_size)) | |
|
700 | chunks = list( | |
|
701 | cctx.read_to_iter( | |
|
702 | source, size=len(original), read_size=read_size, write_size=write_size | |
|
703 | ) | |
|
704 | ) | |
|
589 | 705 | |
|
590 |
self.assertEqual(b |
|
|
706 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
591 | 707 | |
|
592 | 708 | |
|
593 |
@unittest.skipUnless( |
|
|
594 |
class TestCompressor_multi_compress_to_buffer_fuzzing( |
|
|
595 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), | |
|
596 | min_size=1, max_size=1024), | |
|
597 | threads=strategies.integers(min_value=1, max_value=8), | |
|
598 | use_dict=strategies.booleans()) | |
|
709 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
710 | class TestCompressor_multi_compress_to_buffer_fuzzing(TestCase): | |
|
711 | @hypothesis.given( | |
|
712 | original=strategies.lists( | |
|
713 | strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 | |
|
714 | ), | |
|
715 | threads=strategies.integers(min_value=1, max_value=8), | |
|
716 | use_dict=strategies.booleans(), | |
|
717 | ) | |
|
599 | 718 | def test_data_equivalence(self, original, threads, use_dict): |
|
600 | 719 | kwargs = {} |
|
601 | 720 | |
|
602 | 721 | # Use a content dictionary because it is cheap to create. |
|
603 | 722 | if use_dict: |
|
604 |
kwargs[ |
|
|
723 | kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) | |
|
605 | 724 | |
|
606 | cctx = zstd.ZstdCompressor(level=1, | |
|
607 | write_checksum=True, | |
|
608 | **kwargs) | |
|
725 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs) | |
|
609 | 726 | |
|
610 |
if not hasattr(cctx, |
|
|
611 |
self.skipTest( |
|
|
727 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
728 | self.skipTest("multi_compress_to_buffer not available") | |
|
612 | 729 | |
|
613 | 730 | result = cctx.multi_compress_to_buffer(original, threads=-1) |
|
614 | 731 | |
@@ -624,17 +741,21 class TestCompressor_multi_compress_to_b | |||
|
624 | 741 | self.assertEqual(dctx.decompress(frame), original[i]) |
|
625 | 742 | |
|
626 | 743 | |
|
627 |
@unittest.skipUnless( |
|
|
744 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
628 | 745 | @make_cffi |
|
629 |
class TestCompressor_chunker_fuzzing( |
|
|
746 | class TestCompressor_chunker_fuzzing(TestCase): | |
|
630 | 747 | @hypothesis.settings( |
|
631 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
632 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
633 | level=strategies.integers(min_value=1, max_value=5), | |
|
634 | chunk_size=strategies.integers( | |
|
635 | min_value=1, | |
|
636 | max_value=32 * 1048576), | |
|
637 | input_sizes=strategies.data()) | |
|
748 | suppress_health_check=[ | |
|
749 | hypothesis.HealthCheck.large_base_example, | |
|
750 | hypothesis.HealthCheck.too_slow, | |
|
751 | ] | |
|
752 | ) | |
|
753 | @hypothesis.given( | |
|
754 | original=strategies.sampled_from(random_input_data()), | |
|
755 | level=strategies.integers(min_value=1, max_value=5), | |
|
756 | chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), | |
|
757 | input_sizes=strategies.data(), | |
|
758 | ) | |
|
638 | 759 | def test_random_input_sizes(self, original, level, chunk_size, input_sizes): |
|
639 | 760 | cctx = zstd.ZstdCompressor(level=level) |
|
640 | 761 | chunker = cctx.chunker(chunk_size=chunk_size) |
@@ -643,7 +764,7 class TestCompressor_chunker_fuzzing(uni | |||
|
643 | 764 | i = 0 |
|
644 | 765 | while True: |
|
645 | 766 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
646 | source = original[i:i + input_size] | |
|
767 | source = original[i : i + input_size] | |
|
647 | 768 | if not source: |
|
648 | 769 | break |
|
649 | 770 | |
@@ -654,23 +775,26 class TestCompressor_chunker_fuzzing(uni | |||
|
654 | 775 | |
|
655 | 776 | dctx = zstd.ZstdDecompressor() |
|
656 | 777 | |
|
657 |
self.assertEqual( |
|
|
658 | max_output_size=len(original)), | |
|
659 | original) | |
|
778 | self.assertEqual( | |
|
779 | dctx.decompress(b"".join(chunks), max_output_size=len(original)), original | |
|
780 | ) | |
|
660 | 781 | |
|
661 | 782 | self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1])) |
|
662 | 783 | |
|
663 | 784 | @hypothesis.settings( |
|
664 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
665 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
666 | level=strategies.integers(min_value=1, max_value=5), | |
|
667 | chunk_size=strategies.integers( | |
|
668 | min_value=1, | |
|
669 | max_value=32 * 1048576), | |
|
670 | input_sizes=strategies.data(), | |
|
671 | flushes=strategies.data()) | |
|
672 | def test_flush_block(self, original, level, chunk_size, input_sizes, | |
|
673 | flushes): | |
|
785 | suppress_health_check=[ | |
|
786 | hypothesis.HealthCheck.large_base_example, | |
|
787 | hypothesis.HealthCheck.too_slow, | |
|
788 | ] | |
|
789 | ) | |
|
790 | @hypothesis.given( | |
|
791 | original=strategies.sampled_from(random_input_data()), | |
|
792 | level=strategies.integers(min_value=1, max_value=5), | |
|
793 | chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), | |
|
794 | input_sizes=strategies.data(), | |
|
795 | flushes=strategies.data(), | |
|
796 | ) | |
|
797 | def test_flush_block(self, original, level, chunk_size, input_sizes, flushes): | |
|
674 | 798 | cctx = zstd.ZstdCompressor(level=level) |
|
675 | 799 | chunker = cctx.chunker(chunk_size=chunk_size) |
|
676 | 800 | |
@@ -682,7 +806,7 class TestCompressor_chunker_fuzzing(uni | |||
|
682 | 806 | i = 0 |
|
683 | 807 | while True: |
|
684 | 808 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
685 | source = original[i:i + input_size] | |
|
809 | source = original[i : i + input_size] | |
|
686 | 810 | if not source: |
|
687 | 811 | break |
|
688 | 812 | |
@@ -690,22 +814,23 class TestCompressor_chunker_fuzzing(uni | |||
|
690 | 814 | |
|
691 | 815 | chunks = list(chunker.compress(source)) |
|
692 | 816 | compressed_chunks.extend(chunks) |
|
693 |
decompressed_chunks.append(dobj.decompress(b |
|
|
817 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
|
694 | 818 | |
|
695 | 819 | if not flushes.draw(strategies.booleans()): |
|
696 | 820 | continue |
|
697 | 821 | |
|
698 | 822 | chunks = list(chunker.flush()) |
|
699 | 823 | compressed_chunks.extend(chunks) |
|
700 |
decompressed_chunks.append(dobj.decompress(b |
|
|
824 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
|
701 | 825 | |
|
702 |
self.assertEqual(b |
|
|
826 | self.assertEqual(b"".join(decompressed_chunks), original[0:i]) | |
|
703 | 827 | |
|
704 | 828 | chunks = list(chunker.finish()) |
|
705 | 829 | compressed_chunks.extend(chunks) |
|
706 |
decompressed_chunks.append(dobj.decompress(b |
|
|
830 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
|
707 | 831 | |
|
708 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), | |
|
709 | max_output_size=len(original)), | |
|
710 |
|
|
|
711 | self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file | |
|
832 | self.assertEqual( | |
|
833 | dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), | |
|
834 | original, | |
|
835 | ) | |
|
836 | self.assertEqual(b"".join(decompressed_chunks), original) |
@@ -3,29 +3,34 import unittest | |||
|
3 | 3 | |
|
4 | 4 | import zstandard as zstd |
|
5 | 5 | |
|
6 |
from . |
|
|
6 | from .common import ( | |
|
7 | 7 | make_cffi, |
|
8 | TestCase, | |
|
8 | 9 | ) |
|
9 | 10 | |
|
10 | 11 | |
|
11 | 12 | @make_cffi |
|
12 |
class TestCompressionParameters( |
|
|
13 | class TestCompressionParameters(TestCase): | |
|
13 | 14 | def test_bounds(self): |
|
14 |
zstd.ZstdCompressionParameters( |
|
|
15 |
|
|
|
16 |
|
|
|
17 |
|
|
|
18 | min_match=zstd.MINMATCH_MIN + 1, | |
|
19 | target_length=zstd.TARGETLENGTH_MIN, | |
|
20 | strategy=zstd.STRATEGY_FAST) | |
|
15 | zstd.ZstdCompressionParameters( | |
|
16 | window_log=zstd.WINDOWLOG_MIN, | |
|
17 | chain_log=zstd.CHAINLOG_MIN, | |
|
18 | hash_log=zstd.HASHLOG_MIN, | |
|
19 | search_log=zstd.SEARCHLOG_MIN, | |
|
20 | min_match=zstd.MINMATCH_MIN + 1, | |
|
21 | target_length=zstd.TARGETLENGTH_MIN, | |
|
22 | strategy=zstd.STRATEGY_FAST, | |
|
23 | ) | |
|
21 | 24 | |
|
22 |
zstd.ZstdCompressionParameters( |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
|
|
|
26 | min_match=zstd.MINMATCH_MAX - 1, | |
|
27 | target_length=zstd.TARGETLENGTH_MAX, | |
|
28 | strategy=zstd.STRATEGY_BTULTRA2) | |
|
25 | zstd.ZstdCompressionParameters( | |
|
26 | window_log=zstd.WINDOWLOG_MAX, | |
|
27 | chain_log=zstd.CHAINLOG_MAX, | |
|
28 | hash_log=zstd.HASHLOG_MAX, | |
|
29 | search_log=zstd.SEARCHLOG_MAX, | |
|
30 | min_match=zstd.MINMATCH_MAX - 1, | |
|
31 | target_length=zstd.TARGETLENGTH_MAX, | |
|
32 | strategy=zstd.STRATEGY_BTULTRA2, | |
|
33 | ) | |
|
29 | 34 | |
|
30 | 35 | def test_from_level(self): |
|
31 | 36 | p = zstd.ZstdCompressionParameters.from_level(1) |
@@ -37,13 +42,15 class TestCompressionParameters(unittest | |||
|
37 | 42 | self.assertEqual(p.window_log, 19) |
|
38 | 43 | |
|
39 | 44 | def test_members(self): |
|
40 |
p = zstd.ZstdCompressionParameters( |
|
|
41 | chain_log=6, | |
|
42 | hash_log=7, | |
|
43 | search_log=4, | |
|
44 | min_match=5, | |
|
45 | target_length=8, | |
|
46 | strategy=1) | |
|
45 | p = zstd.ZstdCompressionParameters( | |
|
46 | window_log=10, | |
|
47 | chain_log=6, | |
|
48 | hash_log=7, | |
|
49 | search_log=4, | |
|
50 | min_match=5, | |
|
51 | target_length=8, | |
|
52 | strategy=1, | |
|
53 | ) | |
|
47 | 54 | self.assertEqual(p.window_log, 10) |
|
48 | 55 | self.assertEqual(p.chain_log, 6) |
|
49 | 56 | self.assertEqual(p.hash_log, 7) |
@@ -58,8 +65,7 class TestCompressionParameters(unittest | |||
|
58 | 65 | p = zstd.ZstdCompressionParameters(threads=4) |
|
59 | 66 | self.assertEqual(p.threads, 4) |
|
60 | 67 | |
|
61 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, | |
|
62 | overlap_log=6) | |
|
68 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6) | |
|
63 | 69 | self.assertEqual(p.threads, 2) |
|
64 | 70 | self.assertEqual(p.job_size, 1048576) |
|
65 | 71 | self.assertEqual(p.overlap_log, 6) |
@@ -91,20 +97,25 class TestCompressionParameters(unittest | |||
|
91 | 97 | self.assertEqual(p.ldm_hash_rate_log, 8) |
|
92 | 98 | |
|
93 | 99 | def test_estimated_compression_context_size(self): |
|
94 |
p = zstd.ZstdCompressionParameters( |
|
|
95 | chain_log=16, | |
|
96 | hash_log=17, | |
|
97 | search_log=1, | |
|
98 | min_match=5, | |
|
99 | target_length=16, | |
|
100 | strategy=zstd.STRATEGY_DFAST) | |
|
100 | p = zstd.ZstdCompressionParameters( | |
|
101 | window_log=20, | |
|
102 | chain_log=16, | |
|
103 | hash_log=17, | |
|
104 | search_log=1, | |
|
105 | min_match=5, | |
|
106 | target_length=16, | |
|
107 | strategy=zstd.STRATEGY_DFAST, | |
|
108 | ) | |
|
101 | 109 | |
|
102 | 110 | # 32-bit has slightly different values from 64-bit. |
|
103 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, | |
|
104 | delta=250) | |
|
111 | self.assertAlmostEqual( | |
|
112 | p.estimated_compression_context_size(), 1294464, delta=400 | |
|
113 | ) | |
|
105 | 114 | |
|
106 | 115 | def test_strategy(self): |
|
107 | with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'): | |
|
116 | with self.assertRaisesRegex( | |
|
117 | ValueError, "cannot specify both compression_strategy" | |
|
118 | ): | |
|
108 | 119 | zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) |
|
109 | 120 | |
|
110 | 121 | p = zstd.ZstdCompressionParameters(strategy=2) |
@@ -114,7 +125,9 class TestCompressionParameters(unittest | |||
|
114 | 125 | self.assertEqual(p.compression_strategy, 3) |
|
115 | 126 | |
|
116 | 127 | def test_ldm_hash_rate_log(self): |
|
117 | with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'): | |
|
128 | with self.assertRaisesRegex( | |
|
129 | ValueError, "cannot specify both ldm_hash_rate_log" | |
|
130 | ): | |
|
118 | 131 | zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) |
|
119 | 132 | |
|
120 | 133 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) |
@@ -124,7 +137,7 class TestCompressionParameters(unittest | |||
|
124 | 137 | self.assertEqual(p.ldm_hash_every_log, 16) |
|
125 | 138 | |
|
126 | 139 | def test_overlap_log(self): |
|
127 |
with self.assertRaisesRegex |
|
|
140 | with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"): | |
|
128 | 141 | zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) |
|
129 | 142 | |
|
130 | 143 | p = zstd.ZstdCompressionParameters(overlap_log=2) |
@@ -137,7 +150,7 class TestCompressionParameters(unittest | |||
|
137 | 150 | |
|
138 | 151 | |
|
139 | 152 | @make_cffi |
|
140 |
class TestFrameParameters( |
|
|
153 | class TestFrameParameters(TestCase): | |
|
141 | 154 | def test_invalid_type(self): |
|
142 | 155 | with self.assertRaises(TypeError): |
|
143 | 156 | zstd.get_frame_parameters(None) |
@@ -145,71 +158,71 class TestFrameParameters(unittest.TestC | |||
|
145 | 158 | # Python 3 doesn't appear to convert unicode to Py_buffer. |
|
146 | 159 | if sys.version_info[0] >= 3: |
|
147 | 160 | with self.assertRaises(TypeError): |
|
148 |
zstd.get_frame_parameters(u |
|
|
161 | zstd.get_frame_parameters(u"foobarbaz") | |
|
149 | 162 | else: |
|
150 | 163 | # CPython will convert unicode to Py_buffer. But CFFI won't. |
|
151 |
if zstd.backend == |
|
|
164 | if zstd.backend == "cffi": | |
|
152 | 165 | with self.assertRaises(TypeError): |
|
153 |
zstd.get_frame_parameters(u |
|
|
166 | zstd.get_frame_parameters(u"foobarbaz") | |
|
154 | 167 | else: |
|
155 | 168 | with self.assertRaises(zstd.ZstdError): |
|
156 |
zstd.get_frame_parameters(u |
|
|
169 | zstd.get_frame_parameters(u"foobarbaz") | |
|
157 | 170 | |
|
158 | 171 | def test_invalid_input_sizes(self): |
|
159 |
with self.assertRaisesRegex |
|
|
160 |
zstd.get_frame_parameters(b |
|
|
172 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | |
|
173 | zstd.get_frame_parameters(b"") | |
|
161 | 174 | |
|
162 |
with self.assertRaisesRegex |
|
|
175 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | |
|
163 | 176 | zstd.get_frame_parameters(zstd.FRAME_HEADER) |
|
164 | 177 | |
|
165 | 178 | def test_invalid_frame(self): |
|
166 |
with self.assertRaisesRegex |
|
|
167 |
zstd.get_frame_parameters(b |
|
|
179 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
|
180 | zstd.get_frame_parameters(b"foobarbaz") | |
|
168 | 181 | |
|
169 | 182 | def test_attributes(self): |
|
170 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
183 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00") | |
|
171 | 184 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
172 | 185 | self.assertEqual(params.window_size, 1024) |
|
173 | 186 | self.assertEqual(params.dict_id, 0) |
|
174 | 187 | self.assertFalse(params.has_checksum) |
|
175 | 188 | |
|
176 | 189 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. |
|
177 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
190 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff") | |
|
178 | 191 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
179 | 192 | self.assertEqual(params.window_size, 1024) |
|
180 | 193 | self.assertEqual(params.dict_id, 255) |
|
181 | 194 | self.assertFalse(params.has_checksum) |
|
182 | 195 | |
|
183 | 196 | # Lowest 3rd bit indicates if checksum is present. |
|
184 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
197 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00") | |
|
185 | 198 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
186 | 199 | self.assertEqual(params.window_size, 1024) |
|
187 | 200 | self.assertEqual(params.dict_id, 0) |
|
188 | 201 | self.assertTrue(params.has_checksum) |
|
189 | 202 | |
|
190 | 203 | # Upper 2 bits indicate content size. |
|
191 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
204 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00") | |
|
192 | 205 | self.assertEqual(params.content_size, 511) |
|
193 | 206 | self.assertEqual(params.window_size, 1024) |
|
194 | 207 | self.assertEqual(params.dict_id, 0) |
|
195 | 208 | self.assertFalse(params.has_checksum) |
|
196 | 209 | |
|
197 | 210 | # Window descriptor is 2nd byte after frame header. |
|
198 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
211 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40") | |
|
199 | 212 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
200 | 213 | self.assertEqual(params.window_size, 262144) |
|
201 | 214 | self.assertEqual(params.dict_id, 0) |
|
202 | 215 | self.assertFalse(params.has_checksum) |
|
203 | 216 | |
|
204 | 217 | # Set multiple things. |
|
205 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
218 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00") | |
|
206 | 219 | self.assertEqual(params.content_size, 272) |
|
207 | 220 | self.assertEqual(params.window_size, 262144) |
|
208 | 221 | self.assertEqual(params.dict_id, 15) |
|
209 | 222 | self.assertTrue(params.has_checksum) |
|
210 | 223 | |
|
211 | 224 | def test_input_types(self): |
|
212 |
v = zstd.FRAME_HEADER + b |
|
|
225 | v = zstd.FRAME_HEADER + b"\x00\x00" | |
|
213 | 226 | |
|
214 | 227 | mutable_array = bytearray(len(v)) |
|
215 | 228 | mutable_array[:] = v |
@@ -7,70 +7,99 try: | |||
|
7 | 7 | import hypothesis |
|
8 | 8 | import hypothesis.strategies as strategies |
|
9 | 9 | except ImportError: |
|
10 |
raise unittest.SkipTest( |
|
|
10 | raise unittest.SkipTest("hypothesis not available") | |
|
11 | 11 | |
|
12 | 12 | import zstandard as zstd |
|
13 | 13 | |
|
14 | 14 | from .common import ( |
|
15 | 15 | make_cffi, |
|
16 | TestCase, | |
|
17 | ) | |
|
18 | ||
|
19 | ||
|
20 | s_windowlog = strategies.integers( | |
|
21 | min_value=zstd.WINDOWLOG_MIN, max_value=zstd.WINDOWLOG_MAX | |
|
22 | ) | |
|
23 | s_chainlog = strategies.integers( | |
|
24 | min_value=zstd.CHAINLOG_MIN, max_value=zstd.CHAINLOG_MAX | |
|
25 | ) | |
|
26 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, max_value=zstd.HASHLOG_MAX) | |
|
27 | s_searchlog = strategies.integers( | |
|
28 | min_value=zstd.SEARCHLOG_MIN, max_value=zstd.SEARCHLOG_MAX | |
|
29 | ) | |
|
30 | s_minmatch = strategies.integers( | |
|
31 | min_value=zstd.MINMATCH_MIN, max_value=zstd.MINMATCH_MAX | |
|
32 | ) | |
|
33 | s_targetlength = strategies.integers( | |
|
34 | min_value=zstd.TARGETLENGTH_MIN, max_value=zstd.TARGETLENGTH_MAX | |
|
35 | ) | |
|
36 | s_strategy = strategies.sampled_from( | |
|
37 | ( | |
|
38 | zstd.STRATEGY_FAST, | |
|
39 | zstd.STRATEGY_DFAST, | |
|
40 | zstd.STRATEGY_GREEDY, | |
|
41 | zstd.STRATEGY_LAZY, | |
|
42 | zstd.STRATEGY_LAZY2, | |
|
43 | zstd.STRATEGY_BTLAZY2, | |
|
44 | zstd.STRATEGY_BTOPT, | |
|
45 | zstd.STRATEGY_BTULTRA, | |
|
46 | zstd.STRATEGY_BTULTRA2, | |
|
47 | ) | |
|
16 | 48 | ) |
|
17 | 49 | |
|
18 | 50 | |
|
19 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, | |
|
20 | max_value=zstd.WINDOWLOG_MAX) | |
|
21 | s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN, | |
|
22 | max_value=zstd.CHAINLOG_MAX) | |
|
23 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, | |
|
24 | max_value=zstd.HASHLOG_MAX) | |
|
25 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, | |
|
26 | max_value=zstd.SEARCHLOG_MAX) | |
|
27 | s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN, | |
|
28 | max_value=zstd.MINMATCH_MAX) | |
|
29 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, | |
|
30 | max_value=zstd.TARGETLENGTH_MAX) | |
|
31 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, | |
|
32 | zstd.STRATEGY_DFAST, | |
|
33 | zstd.STRATEGY_GREEDY, | |
|
34 | zstd.STRATEGY_LAZY, | |
|
35 | zstd.STRATEGY_LAZY2, | |
|
36 | zstd.STRATEGY_BTLAZY2, | |
|
37 | zstd.STRATEGY_BTOPT, | |
|
38 | zstd.STRATEGY_BTULTRA, | |
|
39 | zstd.STRATEGY_BTULTRA2)) | |
|
40 | ||
|
51 | @make_cffi | |
|
52 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
53 | class TestCompressionParametersHypothesis(TestCase): | |
|
54 | @hypothesis.given( | |
|
55 | s_windowlog, | |
|
56 | s_chainlog, | |
|
57 | s_hashlog, | |
|
58 | s_searchlog, | |
|
59 | s_minmatch, | |
|
60 | s_targetlength, | |
|
61 | s_strategy, | |
|
62 | ) | |
|
63 | def test_valid_init( | |
|
64 | self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy | |
|
65 | ): | |
|
66 | zstd.ZstdCompressionParameters( | |
|
67 | window_log=windowlog, | |
|
68 | chain_log=chainlog, | |
|
69 | hash_log=hashlog, | |
|
70 | search_log=searchlog, | |
|
71 | min_match=minmatch, | |
|
72 | target_length=targetlength, | |
|
73 | strategy=strategy, | |
|
74 | ) | |
|
41 | 75 | |
|
42 | @make_cffi | |
|
43 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
|
44 | class TestCompressionParametersHypothesis(unittest.TestCase): | |
|
45 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |
|
46 | s_minmatch, s_targetlength, s_strategy) | |
|
47 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, | |
|
48 | minmatch, targetlength, strategy): | |
|
49 | zstd.ZstdCompressionParameters(window_log=windowlog, | |
|
50 | chain_log=chainlog, | |
|
51 | hash_log=hashlog, | |
|
52 | search_log=searchlog, | |
|
53 | min_match=minmatch, | |
|
54 | target_length=targetlength, | |
|
55 | strategy=strategy) | |
|
56 | ||
|
57 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |
|
58 | s_minmatch, s_targetlength, s_strategy) | |
|
59 | def test_estimated_compression_context_size(self, windowlog, chainlog, | |
|
60 | hashlog, searchlog, | |
|
61 | minmatch, targetlength, | |
|
62 | strategy): | |
|
63 | if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): | |
|
76 | @hypothesis.given( | |
|
77 | s_windowlog, | |
|
78 | s_chainlog, | |
|
79 | s_hashlog, | |
|
80 | s_searchlog, | |
|
81 | s_minmatch, | |
|
82 | s_targetlength, | |
|
83 | s_strategy, | |
|
84 | ) | |
|
85 | def test_estimated_compression_context_size( | |
|
86 | self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy | |
|
87 | ): | |
|
88 | if minmatch == zstd.MINMATCH_MIN and strategy in ( | |
|
89 | zstd.STRATEGY_FAST, | |
|
90 | zstd.STRATEGY_GREEDY, | |
|
91 | ): | |
|
64 | 92 | minmatch += 1 |
|
65 | 93 | elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST: |
|
66 | 94 | minmatch -= 1 |
|
67 | 95 | |
|
68 |
p = zstd.ZstdCompressionParameters( |
|
|
69 | chain_log=chainlog, | |
|
70 | hash_log=hashlog, | |
|
71 | search_log=searchlog, | |
|
72 | min_match=minmatch, | |
|
73 | target_length=targetlength, | |
|
74 | strategy=strategy) | |
|
96 | p = zstd.ZstdCompressionParameters( | |
|
97 | window_log=windowlog, | |
|
98 | chain_log=chainlog, | |
|
99 | hash_log=hashlog, | |
|
100 | search_log=searchlog, | |
|
101 | min_match=minmatch, | |
|
102 | target_length=targetlength, | |
|
103 | strategy=strategy, | |
|
104 | ) | |
|
75 | 105 | size = p.estimated_compression_context_size() |
|
76 |
This diff has been collapsed as it changes many lines, (729 lines changed) Show them Hide them | |||
@@ -13,6 +13,7 from .common import ( | |||
|
13 | 13 | make_cffi, |
|
14 | 14 | NonClosingBytesIO, |
|
15 | 15 | OpCountingBytesIO, |
|
16 | TestCase, | |
|
16 | 17 | ) |
|
17 | 18 | |
|
18 | 19 | |
@@ -23,62 +24,67 else: | |||
|
23 | 24 | |
|
24 | 25 | |
|
25 | 26 | @make_cffi |
|
26 |
class TestFrameHeaderSize( |
|
|
27 | class TestFrameHeaderSize(TestCase): | |
|
27 | 28 | def test_empty(self): |
|
28 |
with self.assertRaisesRegex |
|
|
29 | zstd.ZstdError, 'could not determine frame header size: Src size ' | |
|
30 | 'is incorrect'): | |
|
31 | zstd.frame_header_size(b'') | |
|
29 | with self.assertRaisesRegex( | |
|
30 | zstd.ZstdError, | |
|
31 | "could not determine frame header size: Src size " "is incorrect", | |
|
32 | ): | |
|
33 | zstd.frame_header_size(b"") | |
|
32 | 34 | |
|
33 | 35 | def test_too_small(self): |
|
34 |
with self.assertRaisesRegex |
|
|
35 | zstd.ZstdError, 'could not determine frame header size: Src size ' | |
|
36 | 'is incorrect'): | |
|
37 | zstd.frame_header_size(b'foob') | |
|
36 | with self.assertRaisesRegex( | |
|
37 | zstd.ZstdError, | |
|
38 | "could not determine frame header size: Src size " "is incorrect", | |
|
39 | ): | |
|
40 | zstd.frame_header_size(b"foob") | |
|
38 | 41 | |
|
39 | 42 | def test_basic(self): |
|
40 | 43 | # It doesn't matter that it isn't a valid frame. |
|
41 |
self.assertEqual(zstd.frame_header_size(b |
|
|
44 | self.assertEqual(zstd.frame_header_size(b"long enough but no magic"), 6) | |
|
42 | 45 | |
|
43 | 46 | |
|
44 | 47 | @make_cffi |
|
45 |
class TestFrameContentSize( |
|
|
48 | class TestFrameContentSize(TestCase): | |
|
46 | 49 | def test_empty(self): |
|
47 |
with self.assertRaisesRegex |
|
|
48 |
|
|
|
49 | zstd.frame_content_size(b'') | |
|
50 | with self.assertRaisesRegex( | |
|
51 | zstd.ZstdError, "error when determining content size" | |
|
52 | ): | |
|
53 | zstd.frame_content_size(b"") | |
|
50 | 54 | |
|
51 | 55 | def test_too_small(self): |
|
52 |
with self.assertRaisesRegex |
|
|
53 |
|
|
|
54 | zstd.frame_content_size(b'foob') | |
|
56 | with self.assertRaisesRegex( | |
|
57 | zstd.ZstdError, "error when determining content size" | |
|
58 | ): | |
|
59 | zstd.frame_content_size(b"foob") | |
|
55 | 60 | |
|
56 | 61 | def test_bad_frame(self): |
|
57 |
with self.assertRaisesRegex |
|
|
58 |
|
|
|
59 | zstd.frame_content_size(b'invalid frame header') | |
|
62 | with self.assertRaisesRegex( | |
|
63 | zstd.ZstdError, "error when determining content size" | |
|
64 | ): | |
|
65 | zstd.frame_content_size(b"invalid frame header") | |
|
60 | 66 | |
|
61 | 67 | def test_unknown(self): |
|
62 | 68 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
63 |
frame = cctx.compress(b |
|
|
69 | frame = cctx.compress(b"foobar") | |
|
64 | 70 | |
|
65 | 71 | self.assertEqual(zstd.frame_content_size(frame), -1) |
|
66 | 72 | |
|
67 | 73 | def test_empty(self): |
|
68 | 74 | cctx = zstd.ZstdCompressor() |
|
69 |
frame = cctx.compress(b |
|
|
75 | frame = cctx.compress(b"") | |
|
70 | 76 | |
|
71 | 77 | self.assertEqual(zstd.frame_content_size(frame), 0) |
|
72 | 78 | |
|
73 | 79 | def test_basic(self): |
|
74 | 80 | cctx = zstd.ZstdCompressor() |
|
75 |
frame = cctx.compress(b |
|
|
81 | frame = cctx.compress(b"foobar") | |
|
76 | 82 | |
|
77 | 83 | self.assertEqual(zstd.frame_content_size(frame), 6) |
|
78 | 84 | |
|
79 | 85 | |
|
80 | 86 | @make_cffi |
|
81 |
class TestDecompressor( |
|
|
87 | class TestDecompressor(TestCase): | |
|
82 | 88 | def test_memory_size(self): |
|
83 | 89 | dctx = zstd.ZstdDecompressor() |
|
84 | 90 | |
@@ -86,22 +92,26 class TestDecompressor(unittest.TestCase | |||
|
86 | 92 | |
|
87 | 93 | |
|
88 | 94 | @make_cffi |
|
89 |
class TestDecompressor_decompress( |
|
|
95 | class TestDecompressor_decompress(TestCase): | |
|
90 | 96 | def test_empty_input(self): |
|
91 | 97 | dctx = zstd.ZstdDecompressor() |
|
92 | 98 | |
|
93 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): | |
|
94 | dctx.decompress(b'') | |
|
99 | with self.assertRaisesRegex( | |
|
100 | zstd.ZstdError, "error determining content size from frame header" | |
|
101 | ): | |
|
102 | dctx.decompress(b"") | |
|
95 | 103 | |
|
96 | 104 | def test_invalid_input(self): |
|
97 | 105 | dctx = zstd.ZstdDecompressor() |
|
98 | 106 | |
|
99 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): | |
|
100 | dctx.decompress(b'foobar') | |
|
107 | with self.assertRaisesRegex( | |
|
108 | zstd.ZstdError, "error determining content size from frame header" | |
|
109 | ): | |
|
110 | dctx.decompress(b"foobar") | |
|
101 | 111 | |
|
102 | 112 | def test_input_types(self): |
|
103 | 113 | cctx = zstd.ZstdCompressor(level=1) |
|
104 |
compressed = cctx.compress(b |
|
|
114 | compressed = cctx.compress(b"foo") | |
|
105 | 115 | |
|
106 | 116 | mutable_array = bytearray(len(compressed)) |
|
107 | 117 | mutable_array[:] = compressed |
@@ -114,36 +124,38 class TestDecompressor_decompress(unitte | |||
|
114 | 124 | |
|
115 | 125 | dctx = zstd.ZstdDecompressor() |
|
116 | 126 | for source in sources: |
|
117 |
self.assertEqual(dctx.decompress(source), b |
|
|
127 | self.assertEqual(dctx.decompress(source), b"foo") | |
|
118 | 128 | |
|
119 | 129 | def test_no_content_size_in_frame(self): |
|
120 | 130 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
121 |
compressed = cctx.compress(b |
|
|
131 | compressed = cctx.compress(b"foobar") | |
|
122 | 132 | |
|
123 | 133 | dctx = zstd.ZstdDecompressor() |
|
124 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'): | |
|
134 | with self.assertRaisesRegex( | |
|
135 | zstd.ZstdError, "could not determine content size in frame header" | |
|
136 | ): | |
|
125 | 137 | dctx.decompress(compressed) |
|
126 | 138 | |
|
127 | 139 | def test_content_size_present(self): |
|
128 | 140 | cctx = zstd.ZstdCompressor() |
|
129 |
compressed = cctx.compress(b |
|
|
141 | compressed = cctx.compress(b"foobar") | |
|
130 | 142 | |
|
131 | 143 | dctx = zstd.ZstdDecompressor() |
|
132 | 144 | decompressed = dctx.decompress(compressed) |
|
133 |
self.assertEqual(decompressed, b |
|
|
145 | self.assertEqual(decompressed, b"foobar") | |
|
134 | 146 | |
|
135 | 147 | def test_empty_roundtrip(self): |
|
136 | 148 | cctx = zstd.ZstdCompressor() |
|
137 |
compressed = cctx.compress(b |
|
|
149 | compressed = cctx.compress(b"") | |
|
138 | 150 | |
|
139 | 151 | dctx = zstd.ZstdDecompressor() |
|
140 | 152 | decompressed = dctx.decompress(compressed) |
|
141 | 153 | |
|
142 |
self.assertEqual(decompressed, b |
|
|
154 | self.assertEqual(decompressed, b"") | |
|
143 | 155 | |
|
144 | 156 | def test_max_output_size(self): |
|
145 | 157 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
146 |
source = b |
|
|
158 | source = b"foobar" * 256 | |
|
147 | 159 | compressed = cctx.compress(source) |
|
148 | 160 | |
|
149 | 161 | dctx = zstd.ZstdDecompressor() |
@@ -152,8 +164,9 class TestDecompressor_decompress(unitte | |||
|
152 | 164 | self.assertEqual(decompressed, source) |
|
153 | 165 | |
|
154 | 166 | # Input size - 1 fails |
|
155 |
with self.assertRaisesRegex |
|
|
156 |
|
|
|
167 | with self.assertRaisesRegex( | |
|
168 | zstd.ZstdError, "decompression error: did not decompress full frame" | |
|
169 | ): | |
|
157 | 170 | dctx.decompress(compressed, max_output_size=len(source) - 1) |
|
158 | 171 | |
|
159 | 172 | # Input size + 1 works |
@@ -166,24 +179,24 class TestDecompressor_decompress(unitte | |||
|
166 | 179 | |
|
167 | 180 | def test_stupidly_large_output_buffer(self): |
|
168 | 181 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
169 |
compressed = cctx.compress(b |
|
|
182 | compressed = cctx.compress(b"foobar" * 256) | |
|
170 | 183 | dctx = zstd.ZstdDecompressor() |
|
171 | 184 | |
|
172 | 185 | # Will get OverflowError on some Python distributions that can't |
|
173 | 186 | # handle really large integers. |
|
174 | 187 | with self.assertRaises((MemoryError, OverflowError)): |
|
175 | dctx.decompress(compressed, max_output_size=2**62) | |
|
188 | dctx.decompress(compressed, max_output_size=2 ** 62) | |
|
176 | 189 | |
|
177 | 190 | def test_dictionary(self): |
|
178 | 191 | samples = [] |
|
179 | 192 | for i in range(128): |
|
180 |
samples.append(b |
|
|
181 |
samples.append(b |
|
|
182 |
samples.append(b |
|
|
193 | samples.append(b"foo" * 64) | |
|
194 | samples.append(b"bar" * 64) | |
|
195 | samples.append(b"foobar" * 64) | |
|
183 | 196 | |
|
184 | 197 | d = zstd.train_dictionary(8192, samples) |
|
185 | 198 | |
|
186 |
orig = b |
|
|
199 | orig = b"foobar" * 16384 | |
|
187 | 200 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
188 | 201 | compressed = cctx.compress(orig) |
|
189 | 202 | |
@@ -195,13 +208,13 class TestDecompressor_decompress(unitte | |||
|
195 | 208 | def test_dictionary_multiple(self): |
|
196 | 209 | samples = [] |
|
197 | 210 | for i in range(128): |
|
198 |
samples.append(b |
|
|
199 |
samples.append(b |
|
|
200 |
samples.append(b |
|
|
211 | samples.append(b"foo" * 64) | |
|
212 | samples.append(b"bar" * 64) | |
|
213 | samples.append(b"foobar" * 64) | |
|
201 | 214 | |
|
202 | 215 | d = zstd.train_dictionary(8192, samples) |
|
203 | 216 | |
|
204 |
sources = (b |
|
|
217 | sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192) | |
|
205 | 218 | compressed = [] |
|
206 | 219 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
207 | 220 | for source in sources: |
@@ -213,7 +226,7 class TestDecompressor_decompress(unitte | |||
|
213 | 226 | self.assertEqual(decompressed, sources[i]) |
|
214 | 227 | |
|
215 | 228 | def test_max_window_size(self): |
|
216 |
with open(__file__, |
|
|
229 | with open(__file__, "rb") as fh: | |
|
217 | 230 | source = fh.read() |
|
218 | 231 | |
|
219 | 232 | # If we write a content size, the decompressor engages single pass |
@@ -221,15 +234,16 class TestDecompressor_decompress(unitte | |||
|
221 | 234 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
222 | 235 | frame = cctx.compress(source) |
|
223 | 236 | |
|
224 | dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN) | |
|
237 | dctx = zstd.ZstdDecompressor(max_window_size=2 ** zstd.WINDOWLOG_MIN) | |
|
225 | 238 | |
|
226 |
with self.assertRaisesRegex |
|
|
227 |
zstd.ZstdError, |
|
|
239 | with self.assertRaisesRegex( | |
|
240 | zstd.ZstdError, "decompression error: Frame requires too much memory" | |
|
241 | ): | |
|
228 | 242 | dctx.decompress(frame, max_output_size=len(source)) |
|
229 | 243 | |
|
230 | 244 | |
|
231 | 245 | @make_cffi |
|
232 |
class TestDecompressor_copy_stream( |
|
|
246 | class TestDecompressor_copy_stream(TestCase): | |
|
233 | 247 | def test_no_read(self): |
|
234 | 248 | source = object() |
|
235 | 249 | dest = io.BytesIO() |
@@ -256,12 +270,12 class TestDecompressor_copy_stream(unitt | |||
|
256 | 270 | |
|
257 | 271 | self.assertEqual(r, 0) |
|
258 | 272 | self.assertEqual(w, 0) |
|
259 |
self.assertEqual(dest.getvalue(), b |
|
|
273 | self.assertEqual(dest.getvalue(), b"") | |
|
260 | 274 | |
|
261 | 275 | def test_large_data(self): |
|
262 | 276 | source = io.BytesIO() |
|
263 | 277 | for i in range(255): |
|
264 |
source.write(struct.Struct( |
|
|
278 | source.write(struct.Struct(">B").pack(i) * 16384) | |
|
265 | 279 | source.seek(0) |
|
266 | 280 | |
|
267 | 281 | compressed = io.BytesIO() |
@@ -277,33 +291,32 class TestDecompressor_copy_stream(unitt | |||
|
277 | 291 | self.assertEqual(w, len(source.getvalue())) |
|
278 | 292 | |
|
279 | 293 | def test_read_write_size(self): |
|
280 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress( | |
|
281 | b'foobarfoobar')) | |
|
294 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) | |
|
282 | 295 | |
|
283 | 296 | dest = OpCountingBytesIO() |
|
284 | 297 | dctx = zstd.ZstdDecompressor() |
|
285 | 298 | r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
286 | 299 | |
|
287 | 300 | self.assertEqual(r, len(source.getvalue())) |
|
288 |
self.assertEqual(w, len(b |
|
|
301 | self.assertEqual(w, len(b"foobarfoobar")) | |
|
289 | 302 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
290 | 303 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
291 | 304 | |
|
292 | 305 | |
|
293 | 306 | @make_cffi |
|
294 |
class TestDecompressor_stream_reader( |
|
|
307 | class TestDecompressor_stream_reader(TestCase): | |
|
295 | 308 | def test_context_manager(self): |
|
296 | 309 | dctx = zstd.ZstdDecompressor() |
|
297 | 310 | |
|
298 |
with dctx.stream_reader(b |
|
|
299 |
with self.assertRaisesRegex |
|
|
311 | with dctx.stream_reader(b"foo") as reader: | |
|
312 | with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): | |
|
300 | 313 | with reader as reader2: |
|
301 | 314 | pass |
|
302 | 315 | |
|
303 | 316 | def test_not_implemented(self): |
|
304 | 317 | dctx = zstd.ZstdDecompressor() |
|
305 | 318 | |
|
306 |
with dctx.stream_reader(b |
|
|
319 | with dctx.stream_reader(b"foo") as reader: | |
|
307 | 320 | with self.assertRaises(io.UnsupportedOperation): |
|
308 | 321 | reader.readline() |
|
309 | 322 | |
@@ -317,7 +330,7 class TestDecompressor_stream_reader(uni | |||
|
317 | 330 | next(reader) |
|
318 | 331 | |
|
319 | 332 | with self.assertRaises(io.UnsupportedOperation): |
|
320 |
reader.write(b |
|
|
333 | reader.write(b"foo") | |
|
321 | 334 | |
|
322 | 335 | with self.assertRaises(io.UnsupportedOperation): |
|
323 | 336 | reader.writelines([]) |
@@ -325,7 +338,7 class TestDecompressor_stream_reader(uni | |||
|
325 | 338 | def test_constant_methods(self): |
|
326 | 339 | dctx = zstd.ZstdDecompressor() |
|
327 | 340 | |
|
328 |
with dctx.stream_reader(b |
|
|
341 | with dctx.stream_reader(b"foo") as reader: | |
|
329 | 342 | self.assertFalse(reader.closed) |
|
330 | 343 | self.assertTrue(reader.readable()) |
|
331 | 344 | self.assertFalse(reader.writable()) |
@@ -340,29 +353,31 class TestDecompressor_stream_reader(uni | |||
|
340 | 353 | def test_read_closed(self): |
|
341 | 354 | dctx = zstd.ZstdDecompressor() |
|
342 | 355 | |
|
343 |
with dctx.stream_reader(b |
|
|
356 | with dctx.stream_reader(b"foo") as reader: | |
|
344 | 357 | reader.close() |
|
345 | 358 | self.assertTrue(reader.closed) |
|
346 |
with self.assertRaisesRegex |
|
|
359 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
347 | 360 | reader.read(1) |
|
348 | 361 | |
|
349 | 362 | def test_read_sizes(self): |
|
350 | 363 | cctx = zstd.ZstdCompressor() |
|
351 |
foo = cctx.compress(b |
|
|
364 | foo = cctx.compress(b"foo") | |
|
352 | 365 | |
|
353 | 366 | dctx = zstd.ZstdDecompressor() |
|
354 | 367 | |
|
355 | 368 | with dctx.stream_reader(foo) as reader: |
|
356 | with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): | |
|
369 | with self.assertRaisesRegex( | |
|
370 | ValueError, "cannot read negative amounts less than -1" | |
|
371 | ): | |
|
357 | 372 | reader.read(-2) |
|
358 | 373 | |
|
359 |
self.assertEqual(reader.read(0), b |
|
|
360 |
self.assertEqual(reader.read(), b |
|
|
374 | self.assertEqual(reader.read(0), b"") | |
|
375 | self.assertEqual(reader.read(), b"foo") | |
|
361 | 376 | |
|
362 | 377 | def test_read_buffer(self): |
|
363 | 378 | cctx = zstd.ZstdCompressor() |
|
364 | 379 | |
|
365 |
source = b |
|
|
380 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
366 | 381 | frame = cctx.compress(source) |
|
367 | 382 | |
|
368 | 383 | dctx = zstd.ZstdDecompressor() |
@@ -376,14 +391,14 class TestDecompressor_stream_reader(uni | |||
|
376 | 391 | self.assertEqual(reader.tell(), len(source)) |
|
377 | 392 | |
|
378 | 393 | # Read after EOF should return empty bytes. |
|
379 |
self.assertEqual(reader.read(1), b |
|
|
394 | self.assertEqual(reader.read(1), b"") | |
|
380 | 395 | self.assertEqual(reader.tell(), len(result)) |
|
381 | 396 | |
|
382 | 397 | self.assertTrue(reader.closed) |
|
383 | 398 | |
|
384 | 399 | def test_read_buffer_small_chunks(self): |
|
385 | 400 | cctx = zstd.ZstdCompressor() |
|
386 |
source = b |
|
|
401 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
387 | 402 | frame = cctx.compress(source) |
|
388 | 403 | |
|
389 | 404 | dctx = zstd.ZstdDecompressor() |
@@ -398,11 +413,11 class TestDecompressor_stream_reader(uni | |||
|
398 | 413 | chunks.append(chunk) |
|
399 | 414 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
400 | 415 | |
|
401 |
self.assertEqual(b |
|
|
416 | self.assertEqual(b"".join(chunks), source) | |
|
402 | 417 | |
|
403 | 418 | def test_read_stream(self): |
|
404 | 419 | cctx = zstd.ZstdCompressor() |
|
405 |
source = b |
|
|
420 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
406 | 421 | frame = cctx.compress(source) |
|
407 | 422 | |
|
408 | 423 | dctx = zstd.ZstdDecompressor() |
@@ -412,7 +427,7 class TestDecompressor_stream_reader(uni | |||
|
412 | 427 | chunk = reader.read(8192) |
|
413 | 428 | self.assertEqual(chunk, source) |
|
414 | 429 | self.assertEqual(reader.tell(), len(source)) |
|
415 |
self.assertEqual(reader.read(1), b |
|
|
430 | self.assertEqual(reader.read(1), b"") | |
|
416 | 431 | self.assertEqual(reader.tell(), len(source)) |
|
417 | 432 | self.assertFalse(reader.closed) |
|
418 | 433 | |
@@ -420,7 +435,7 class TestDecompressor_stream_reader(uni | |||
|
420 | 435 | |
|
421 | 436 | def test_read_stream_small_chunks(self): |
|
422 | 437 | cctx = zstd.ZstdCompressor() |
|
423 |
source = b |
|
|
438 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
424 | 439 | frame = cctx.compress(source) |
|
425 | 440 | |
|
426 | 441 | dctx = zstd.ZstdDecompressor() |
@@ -435,11 +450,11 class TestDecompressor_stream_reader(uni | |||
|
435 | 450 | chunks.append(chunk) |
|
436 | 451 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
437 | 452 | |
|
438 |
self.assertEqual(b |
|
|
453 | self.assertEqual(b"".join(chunks), source) | |
|
439 | 454 | |
|
440 | 455 | def test_read_after_exit(self): |
|
441 | 456 | cctx = zstd.ZstdCompressor() |
|
442 |
frame = cctx.compress(b |
|
|
457 | frame = cctx.compress(b"foo" * 60) | |
|
443 | 458 | |
|
444 | 459 | dctx = zstd.ZstdDecompressor() |
|
445 | 460 | |
@@ -449,45 +464,46 class TestDecompressor_stream_reader(uni | |||
|
449 | 464 | |
|
450 | 465 | self.assertTrue(reader.closed) |
|
451 | 466 | |
|
452 |
with self.assertRaisesRegex |
|
|
467 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
453 | 468 | reader.read(10) |
|
454 | 469 | |
|
455 | 470 | def test_illegal_seeks(self): |
|
456 | 471 | cctx = zstd.ZstdCompressor() |
|
457 |
frame = cctx.compress(b |
|
|
472 | frame = cctx.compress(b"foo" * 60) | |
|
458 | 473 | |
|
459 | 474 | dctx = zstd.ZstdDecompressor() |
|
460 | 475 | |
|
461 | 476 | with dctx.stream_reader(frame) as reader: |
|
462 |
with self.assertRaisesRegex |
|
|
463 | 'cannot seek to negative position'): | |
|
477 | with self.assertRaisesRegex(ValueError, "cannot seek to negative position"): | |
|
464 | 478 | reader.seek(-1, os.SEEK_SET) |
|
465 | 479 | |
|
466 | 480 | reader.read(1) |
|
467 | 481 | |
|
468 |
with self.assertRaisesRegex |
|
|
469 |
ValueError, |
|
|
482 | with self.assertRaisesRegex( | |
|
483 | ValueError, "cannot seek zstd decompression stream backwards" | |
|
484 | ): | |
|
470 | 485 | reader.seek(0, os.SEEK_SET) |
|
471 | 486 | |
|
472 |
with self.assertRaisesRegex |
|
|
473 |
ValueError, |
|
|
487 | with self.assertRaisesRegex( | |
|
488 | ValueError, "cannot seek zstd decompression stream backwards" | |
|
489 | ): | |
|
474 | 490 | reader.seek(-1, os.SEEK_CUR) |
|
475 | 491 | |
|
476 |
with self.assertRaisesRegex |
|
|
477 | ValueError, | |
|
478 | 'zstd decompression streams cannot be seeked with SEEK_END'): | |
|
492 | with self.assertRaisesRegex( | |
|
493 | ValueError, "zstd decompression streams cannot be seeked with SEEK_END" | |
|
494 | ): | |
|
479 | 495 | reader.seek(0, os.SEEK_END) |
|
480 | 496 | |
|
481 | 497 | reader.close() |
|
482 | 498 | |
|
483 |
with self.assertRaisesRegex |
|
|
499 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
484 | 500 | reader.seek(4, os.SEEK_SET) |
|
485 | 501 | |
|
486 |
with self.assertRaisesRegex |
|
|
502 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
487 | 503 | reader.seek(0) |
|
488 | 504 | |
|
489 | 505 | def test_seek(self): |
|
490 |
source = b |
|
|
506 | source = b"foobar" * 60 | |
|
491 | 507 | cctx = zstd.ZstdCompressor() |
|
492 | 508 | frame = cctx.compress(source) |
|
493 | 509 | |
@@ -495,32 +511,32 class TestDecompressor_stream_reader(uni | |||
|
495 | 511 | |
|
496 | 512 | with dctx.stream_reader(frame) as reader: |
|
497 | 513 | reader.seek(3) |
|
498 |
self.assertEqual(reader.read(3), b |
|
|
514 | self.assertEqual(reader.read(3), b"bar") | |
|
499 | 515 | |
|
500 | 516 | reader.seek(4, os.SEEK_CUR) |
|
501 |
self.assertEqual(reader.read(2), b |
|
|
517 | self.assertEqual(reader.read(2), b"ar") | |
|
502 | 518 | |
|
503 | 519 | def test_no_context_manager(self): |
|
504 |
source = b |
|
|
520 | source = b"foobar" * 60 | |
|
505 | 521 | cctx = zstd.ZstdCompressor() |
|
506 | 522 | frame = cctx.compress(source) |
|
507 | 523 | |
|
508 | 524 | dctx = zstd.ZstdDecompressor() |
|
509 | 525 | reader = dctx.stream_reader(frame) |
|
510 | 526 | |
|
511 |
self.assertEqual(reader.read(6), b |
|
|
512 |
self.assertEqual(reader.read(18), b |
|
|
527 | self.assertEqual(reader.read(6), b"foobar") | |
|
528 | self.assertEqual(reader.read(18), b"foobar" * 3) | |
|
513 | 529 | self.assertFalse(reader.closed) |
|
514 | 530 | |
|
515 | 531 | # Calling close prevents subsequent use. |
|
516 | 532 | reader.close() |
|
517 | 533 | self.assertTrue(reader.closed) |
|
518 | 534 | |
|
519 |
with self.assertRaisesRegex |
|
|
535 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
520 | 536 | reader.read(6) |
|
521 | 537 | |
|
522 | 538 | def test_read_after_error(self): |
|
523 |
source = io.BytesIO(b |
|
|
539 | source = io.BytesIO(b"") | |
|
524 | 540 | dctx = zstd.ZstdDecompressor() |
|
525 | 541 | |
|
526 | 542 | reader = dctx.stream_reader(source) |
@@ -529,7 +545,7 class TestDecompressor_stream_reader(uni | |||
|
529 | 545 | reader.read(0) |
|
530 | 546 | |
|
531 | 547 | with reader: |
|
532 |
with self.assertRaisesRegex |
|
|
548 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
533 | 549 | reader.read(100) |
|
534 | 550 | |
|
535 | 551 | def test_partial_read(self): |
@@ -553,87 +569,87 class TestDecompressor_stream_reader(uni | |||
|
553 | 569 | cctx = zstd.ZstdCompressor() |
|
554 | 570 | source = io.BytesIO() |
|
555 | 571 | writer = cctx.stream_writer(source) |
|
556 |
writer.write(b |
|
|
572 | writer.write(b"foo") | |
|
557 | 573 | writer.flush(zstd.FLUSH_FRAME) |
|
558 |
writer.write(b |
|
|
574 | writer.write(b"bar") | |
|
559 | 575 | writer.flush(zstd.FLUSH_FRAME) |
|
560 | 576 | |
|
561 | 577 | dctx = zstd.ZstdDecompressor() |
|
562 | 578 | |
|
563 | 579 | reader = dctx.stream_reader(source.getvalue()) |
|
564 |
self.assertEqual(reader.read(2), b |
|
|
565 |
self.assertEqual(reader.read(2), b |
|
|
566 |
self.assertEqual(reader.read(2), b |
|
|
567 |
self.assertEqual(reader.read(2), b |
|
|
580 | self.assertEqual(reader.read(2), b"fo") | |
|
581 | self.assertEqual(reader.read(2), b"o") | |
|
582 | self.assertEqual(reader.read(2), b"ba") | |
|
583 | self.assertEqual(reader.read(2), b"r") | |
|
568 | 584 | |
|
569 | 585 | source.seek(0) |
|
570 | 586 | reader = dctx.stream_reader(source) |
|
571 |
self.assertEqual(reader.read(2), b |
|
|
572 |
self.assertEqual(reader.read(2), b |
|
|
573 |
self.assertEqual(reader.read(2), b |
|
|
574 |
self.assertEqual(reader.read(2), b |
|
|
587 | self.assertEqual(reader.read(2), b"fo") | |
|
588 | self.assertEqual(reader.read(2), b"o") | |
|
589 | self.assertEqual(reader.read(2), b"ba") | |
|
590 | self.assertEqual(reader.read(2), b"r") | |
|
575 | 591 | |
|
576 | 592 | reader = dctx.stream_reader(source.getvalue()) |
|
577 |
self.assertEqual(reader.read(3), b |
|
|
578 |
self.assertEqual(reader.read(3), b |
|
|
593 | self.assertEqual(reader.read(3), b"foo") | |
|
594 | self.assertEqual(reader.read(3), b"bar") | |
|
579 | 595 | |
|
580 | 596 | source.seek(0) |
|
581 | 597 | reader = dctx.stream_reader(source) |
|
582 |
self.assertEqual(reader.read(3), b |
|
|
583 |
self.assertEqual(reader.read(3), b |
|
|
598 | self.assertEqual(reader.read(3), b"foo") | |
|
599 | self.assertEqual(reader.read(3), b"bar") | |
|
584 | 600 | |
|
585 | 601 | reader = dctx.stream_reader(source.getvalue()) |
|
586 |
self.assertEqual(reader.read(4), b |
|
|
587 |
self.assertEqual(reader.read(4), b |
|
|
602 | self.assertEqual(reader.read(4), b"foo") | |
|
603 | self.assertEqual(reader.read(4), b"bar") | |
|
588 | 604 | |
|
589 | 605 | source.seek(0) |
|
590 | 606 | reader = dctx.stream_reader(source) |
|
591 |
self.assertEqual(reader.read(4), b |
|
|
592 |
self.assertEqual(reader.read(4), b |
|
|
607 | self.assertEqual(reader.read(4), b"foo") | |
|
608 | self.assertEqual(reader.read(4), b"bar") | |
|
593 | 609 | |
|
594 | 610 | reader = dctx.stream_reader(source.getvalue()) |
|
595 |
self.assertEqual(reader.read(128), b |
|
|
596 |
self.assertEqual(reader.read(128), b |
|
|
611 | self.assertEqual(reader.read(128), b"foo") | |
|
612 | self.assertEqual(reader.read(128), b"bar") | |
|
597 | 613 | |
|
598 | 614 | source.seek(0) |
|
599 | 615 | reader = dctx.stream_reader(source) |
|
600 |
self.assertEqual(reader.read(128), b |
|
|
601 |
self.assertEqual(reader.read(128), b |
|
|
616 | self.assertEqual(reader.read(128), b"foo") | |
|
617 | self.assertEqual(reader.read(128), b"bar") | |
|
602 | 618 | |
|
603 | 619 | # Now tests for reads spanning frames. |
|
604 | 620 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
605 |
self.assertEqual(reader.read(3), b |
|
|
606 |
self.assertEqual(reader.read(3), b |
|
|
621 | self.assertEqual(reader.read(3), b"foo") | |
|
622 | self.assertEqual(reader.read(3), b"bar") | |
|
607 | 623 | |
|
608 | 624 | source.seek(0) |
|
609 | 625 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
610 |
self.assertEqual(reader.read(3), b |
|
|
611 |
self.assertEqual(reader.read(3), b |
|
|
626 | self.assertEqual(reader.read(3), b"foo") | |
|
627 | self.assertEqual(reader.read(3), b"bar") | |
|
612 | 628 | |
|
613 | 629 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
614 |
self.assertEqual(reader.read(6), b |
|
|
630 | self.assertEqual(reader.read(6), b"foobar") | |
|
615 | 631 | |
|
616 | 632 | source.seek(0) |
|
617 | 633 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
618 |
self.assertEqual(reader.read(6), b |
|
|
634 | self.assertEqual(reader.read(6), b"foobar") | |
|
619 | 635 | |
|
620 | 636 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
621 |
self.assertEqual(reader.read(7), b |
|
|
637 | self.assertEqual(reader.read(7), b"foobar") | |
|
622 | 638 | |
|
623 | 639 | source.seek(0) |
|
624 | 640 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
625 |
self.assertEqual(reader.read(7), b |
|
|
641 | self.assertEqual(reader.read(7), b"foobar") | |
|
626 | 642 | |
|
627 | 643 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
628 |
self.assertEqual(reader.read(128), b |
|
|
644 | self.assertEqual(reader.read(128), b"foobar") | |
|
629 | 645 | |
|
630 | 646 | source.seek(0) |
|
631 | 647 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
632 |
self.assertEqual(reader.read(128), b |
|
|
648 | self.assertEqual(reader.read(128), b"foobar") | |
|
633 | 649 | |
|
634 | 650 | def test_readinto(self): |
|
635 | 651 | cctx = zstd.ZstdCompressor() |
|
636 |
foo = cctx.compress(b |
|
|
652 | foo = cctx.compress(b"foo") | |
|
637 | 653 | |
|
638 | 654 | dctx = zstd.ZstdDecompressor() |
|
639 | 655 | |
@@ -641,116 +657,116 class TestDecompressor_stream_reader(uni | |||
|
641 | 657 | # The exact exception varies based on the backend. |
|
642 | 658 | reader = dctx.stream_reader(foo) |
|
643 | 659 | with self.assertRaises(Exception): |
|
644 |
reader.readinto(b |
|
|
660 | reader.readinto(b"foobar") | |
|
645 | 661 | |
|
646 | 662 | # readinto() with sufficiently large destination. |
|
647 | 663 | b = bytearray(1024) |
|
648 | 664 | reader = dctx.stream_reader(foo) |
|
649 | 665 | self.assertEqual(reader.readinto(b), 3) |
|
650 |
self.assertEqual(b[0:3], b |
|
|
666 | self.assertEqual(b[0:3], b"foo") | |
|
651 | 667 | self.assertEqual(reader.readinto(b), 0) |
|
652 |
self.assertEqual(b[0:3], b |
|
|
668 | self.assertEqual(b[0:3], b"foo") | |
|
653 | 669 | |
|
654 | 670 | # readinto() with small reads. |
|
655 | 671 | b = bytearray(1024) |
|
656 | 672 | reader = dctx.stream_reader(foo, read_size=1) |
|
657 | 673 | self.assertEqual(reader.readinto(b), 3) |
|
658 |
self.assertEqual(b[0:3], b |
|
|
674 | self.assertEqual(b[0:3], b"foo") | |
|
659 | 675 | |
|
660 | 676 | # Too small destination buffer. |
|
661 | 677 | b = bytearray(2) |
|
662 | 678 | reader = dctx.stream_reader(foo) |
|
663 | 679 | self.assertEqual(reader.readinto(b), 2) |
|
664 |
self.assertEqual(b[:], b |
|
|
680 | self.assertEqual(b[:], b"fo") | |
|
665 | 681 | |
|
666 | 682 | def test_readinto1(self): |
|
667 | 683 | cctx = zstd.ZstdCompressor() |
|
668 |
foo = cctx.compress(b |
|
|
684 | foo = cctx.compress(b"foo") | |
|
669 | 685 | |
|
670 | 686 | dctx = zstd.ZstdDecompressor() |
|
671 | 687 | |
|
672 | 688 | reader = dctx.stream_reader(foo) |
|
673 | 689 | with self.assertRaises(Exception): |
|
674 |
reader.readinto1(b |
|
|
690 | reader.readinto1(b"foobar") | |
|
675 | 691 | |
|
676 | 692 | # Sufficiently large destination. |
|
677 | 693 | b = bytearray(1024) |
|
678 | 694 | reader = dctx.stream_reader(foo) |
|
679 | 695 | self.assertEqual(reader.readinto1(b), 3) |
|
680 |
self.assertEqual(b[0:3], b |
|
|
696 | self.assertEqual(b[0:3], b"foo") | |
|
681 | 697 | self.assertEqual(reader.readinto1(b), 0) |
|
682 |
self.assertEqual(b[0:3], b |
|
|
698 | self.assertEqual(b[0:3], b"foo") | |
|
683 | 699 | |
|
684 | 700 | # readinto() with small reads. |
|
685 | 701 | b = bytearray(1024) |
|
686 | 702 | reader = dctx.stream_reader(foo, read_size=1) |
|
687 | 703 | self.assertEqual(reader.readinto1(b), 3) |
|
688 |
self.assertEqual(b[0:3], b |
|
|
704 | self.assertEqual(b[0:3], b"foo") | |
|
689 | 705 | |
|
690 | 706 | # Too small destination buffer. |
|
691 | 707 | b = bytearray(2) |
|
692 | 708 | reader = dctx.stream_reader(foo) |
|
693 | 709 | self.assertEqual(reader.readinto1(b), 2) |
|
694 |
self.assertEqual(b[:], b |
|
|
710 | self.assertEqual(b[:], b"fo") | |
|
695 | 711 | |
|
696 | 712 | def test_readall(self): |
|
697 | 713 | cctx = zstd.ZstdCompressor() |
|
698 |
foo = cctx.compress(b |
|
|
714 | foo = cctx.compress(b"foo") | |
|
699 | 715 | |
|
700 | 716 | dctx = zstd.ZstdDecompressor() |
|
701 | 717 | reader = dctx.stream_reader(foo) |
|
702 | 718 | |
|
703 |
self.assertEqual(reader.readall(), b |
|
|
719 | self.assertEqual(reader.readall(), b"foo") | |
|
704 | 720 | |
|
705 | 721 | def test_read1(self): |
|
706 | 722 | cctx = zstd.ZstdCompressor() |
|
707 |
foo = cctx.compress(b |
|
|
723 | foo = cctx.compress(b"foo") | |
|
708 | 724 | |
|
709 | 725 | dctx = zstd.ZstdDecompressor() |
|
710 | 726 | |
|
711 | 727 | b = OpCountingBytesIO(foo) |
|
712 | 728 | reader = dctx.stream_reader(b) |
|
713 | 729 | |
|
714 |
self.assertEqual(reader.read1(), b |
|
|
730 | self.assertEqual(reader.read1(), b"foo") | |
|
715 | 731 | self.assertEqual(b._read_count, 1) |
|
716 | 732 | |
|
717 | 733 | b = OpCountingBytesIO(foo) |
|
718 | 734 | reader = dctx.stream_reader(b) |
|
719 | 735 | |
|
720 |
self.assertEqual(reader.read1(0), b |
|
|
721 |
self.assertEqual(reader.read1(2), b |
|
|
736 | self.assertEqual(reader.read1(0), b"") | |
|
737 | self.assertEqual(reader.read1(2), b"fo") | |
|
722 | 738 | self.assertEqual(b._read_count, 1) |
|
723 |
self.assertEqual(reader.read1(1), b |
|
|
739 | self.assertEqual(reader.read1(1), b"o") | |
|
724 | 740 | self.assertEqual(b._read_count, 1) |
|
725 |
self.assertEqual(reader.read1(1), b |
|
|
741 | self.assertEqual(reader.read1(1), b"") | |
|
726 | 742 | self.assertEqual(b._read_count, 2) |
|
727 | 743 | |
|
728 | 744 | def test_read_lines(self): |
|
729 | 745 | cctx = zstd.ZstdCompressor() |
|
730 |
source = b |
|
|
746 | source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024)) | |
|
731 | 747 | |
|
732 | 748 | frame = cctx.compress(source) |
|
733 | 749 | |
|
734 | 750 | dctx = zstd.ZstdDecompressor() |
|
735 | 751 | reader = dctx.stream_reader(frame) |
|
736 |
tr = io.TextIOWrapper(reader, encoding= |
|
|
752 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
|
737 | 753 | |
|
738 | 754 | lines = [] |
|
739 | 755 | for line in tr: |
|
740 |
lines.append(line.encode( |
|
|
756 | lines.append(line.encode("utf-8")) | |
|
741 | 757 | |
|
742 | 758 | self.assertEqual(len(lines), 1024) |
|
743 |
self.assertEqual(b |
|
|
759 | self.assertEqual(b"".join(lines), source) | |
|
744 | 760 | |
|
745 | 761 | reader = dctx.stream_reader(frame) |
|
746 |
tr = io.TextIOWrapper(reader, encoding= |
|
|
762 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
|
747 | 763 | |
|
748 | 764 | lines = tr.readlines() |
|
749 | 765 | self.assertEqual(len(lines), 1024) |
|
750 |
self.assertEqual( |
|
|
766 | self.assertEqual("".join(lines).encode("utf-8"), source) | |
|
751 | 767 | |
|
752 | 768 | reader = dctx.stream_reader(frame) |
|
753 |
tr = io.TextIOWrapper(reader, encoding= |
|
|
769 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
|
754 | 770 | |
|
755 | 771 | lines = [] |
|
756 | 772 | while True: |
@@ -758,26 +774,26 class TestDecompressor_stream_reader(uni | |||
|
758 | 774 | if not line: |
|
759 | 775 | break |
|
760 | 776 | |
|
761 |
lines.append(line.encode( |
|
|
777 | lines.append(line.encode("utf-8")) | |
|
762 | 778 | |
|
763 | 779 | self.assertEqual(len(lines), 1024) |
|
764 |
self.assertEqual(b |
|
|
780 | self.assertEqual(b"".join(lines), source) | |
|
765 | 781 | |
|
766 | 782 | |
|
767 | 783 | @make_cffi |
|
768 |
class TestDecompressor_decompressobj( |
|
|
784 | class TestDecompressor_decompressobj(TestCase): | |
|
769 | 785 | def test_simple(self): |
|
770 |
data = zstd.ZstdCompressor(level=1).compress(b |
|
|
786 | data = zstd.ZstdCompressor(level=1).compress(b"foobar") | |
|
771 | 787 | |
|
772 | 788 | dctx = zstd.ZstdDecompressor() |
|
773 | 789 | dobj = dctx.decompressobj() |
|
774 |
self.assertEqual(dobj.decompress(data), b |
|
|
790 | self.assertEqual(dobj.decompress(data), b"foobar") | |
|
775 | 791 | self.assertIsNone(dobj.flush()) |
|
776 | 792 | self.assertIsNone(dobj.flush(10)) |
|
777 | 793 | self.assertIsNone(dobj.flush(length=100)) |
|
778 | 794 | |
|
779 | 795 | def test_input_types(self): |
|
780 |
compressed = zstd.ZstdCompressor(level=1).compress(b |
|
|
796 | compressed = zstd.ZstdCompressor(level=1).compress(b"foo") | |
|
781 | 797 | |
|
782 | 798 | dctx = zstd.ZstdDecompressor() |
|
783 | 799 | |
@@ -795,28 +811,28 class TestDecompressor_decompressobj(uni | |||
|
795 | 811 | self.assertIsNone(dobj.flush()) |
|
796 | 812 | self.assertIsNone(dobj.flush(10)) |
|
797 | 813 | self.assertIsNone(dobj.flush(length=100)) |
|
798 |
self.assertEqual(dobj.decompress(source), b |
|
|
814 | self.assertEqual(dobj.decompress(source), b"foo") | |
|
799 | 815 | self.assertIsNone(dobj.flush()) |
|
800 | 816 | |
|
801 | 817 | def test_reuse(self): |
|
802 |
data = zstd.ZstdCompressor(level=1).compress(b |
|
|
818 | data = zstd.ZstdCompressor(level=1).compress(b"foobar") | |
|
803 | 819 | |
|
804 | 820 | dctx = zstd.ZstdDecompressor() |
|
805 | 821 | dobj = dctx.decompressobj() |
|
806 | 822 | dobj.decompress(data) |
|
807 | 823 | |
|
808 |
with self.assertRaisesRegex |
|
|
824 | with self.assertRaisesRegex(zstd.ZstdError, "cannot use a decompressobj"): | |
|
809 | 825 | dobj.decompress(data) |
|
810 | 826 | self.assertIsNone(dobj.flush()) |
|
811 | 827 | |
|
812 | 828 | def test_bad_write_size(self): |
|
813 | 829 | dctx = zstd.ZstdDecompressor() |
|
814 | 830 | |
|
815 |
with self.assertRaisesRegex |
|
|
831 | with self.assertRaisesRegex(ValueError, "write_size must be positive"): | |
|
816 | 832 | dctx.decompressobj(write_size=0) |
|
817 | 833 | |
|
818 | 834 | def test_write_size(self): |
|
819 |
source = b |
|
|
835 | source = b"foo" * 64 + b"bar" * 128 | |
|
820 | 836 | data = zstd.ZstdCompressor(level=1).compress(source) |
|
821 | 837 | |
|
822 | 838 | dctx = zstd.ZstdDecompressor() |
@@ -836,7 +852,7 def decompress_via_writer(data): | |||
|
836 | 852 | |
|
837 | 853 | |
|
838 | 854 | @make_cffi |
|
839 |
class TestDecompressor_stream_writer( |
|
|
855 | class TestDecompressor_stream_writer(TestCase): | |
|
840 | 856 | def test_io_api(self): |
|
841 | 857 | buffer = io.BytesIO() |
|
842 | 858 | dctx = zstd.ZstdDecompressor() |
@@ -908,14 +924,14 class TestDecompressor_stream_writer(uni | |||
|
908 | 924 | writer.fileno() |
|
909 | 925 | |
|
910 | 926 | def test_fileno_file(self): |
|
911 |
with tempfile.TemporaryFile( |
|
|
927 | with tempfile.TemporaryFile("wb") as tf: | |
|
912 | 928 | dctx = zstd.ZstdDecompressor() |
|
913 | 929 | writer = dctx.stream_writer(tf) |
|
914 | 930 | |
|
915 | 931 | self.assertEqual(writer.fileno(), tf.fileno()) |
|
916 | 932 | |
|
917 | 933 | def test_close(self): |
|
918 |
foo = zstd.ZstdCompressor().compress(b |
|
|
934 | foo = zstd.ZstdCompressor().compress(b"foo") | |
|
919 | 935 | |
|
920 | 936 | buffer = NonClosingBytesIO() |
|
921 | 937 | dctx = zstd.ZstdDecompressor() |
@@ -928,17 +944,17 class TestDecompressor_stream_writer(uni | |||
|
928 | 944 | self.assertTrue(writer.closed) |
|
929 | 945 | self.assertTrue(buffer.closed) |
|
930 | 946 | |
|
931 |
with self.assertRaisesRegex |
|
|
932 |
writer.write(b |
|
|
947 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
948 | writer.write(b"") | |
|
933 | 949 | |
|
934 |
with self.assertRaisesRegex |
|
|
950 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
935 | 951 | writer.flush() |
|
936 | 952 | |
|
937 |
with self.assertRaisesRegex |
|
|
953 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
938 | 954 | with writer: |
|
939 | 955 | pass |
|
940 | 956 | |
|
941 |
self.assertEqual(buffer.getvalue(), b |
|
|
957 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
942 | 958 | |
|
943 | 959 | # Context manager exit should close stream. |
|
944 | 960 | buffer = NonClosingBytesIO() |
@@ -948,7 +964,7 class TestDecompressor_stream_writer(uni | |||
|
948 | 964 | writer.write(foo) |
|
949 | 965 | |
|
950 | 966 | self.assertTrue(writer.closed) |
|
951 |
self.assertEqual(buffer.getvalue(), b |
|
|
967 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
952 | 968 | |
|
953 | 969 | def test_flush(self): |
|
954 | 970 | buffer = OpCountingBytesIO() |
@@ -962,12 +978,12 class TestDecompressor_stream_writer(uni | |||
|
962 | 978 | |
|
963 | 979 | def test_empty_roundtrip(self): |
|
964 | 980 | cctx = zstd.ZstdCompressor() |
|
965 |
empty = cctx.compress(b |
|
|
966 |
self.assertEqual(decompress_via_writer(empty), b |
|
|
981 | empty = cctx.compress(b"") | |
|
982 | self.assertEqual(decompress_via_writer(empty), b"") | |
|
967 | 983 | |
|
968 | 984 | def test_input_types(self): |
|
969 | 985 | cctx = zstd.ZstdCompressor(level=1) |
|
970 |
compressed = cctx.compress(b |
|
|
986 | compressed = cctx.compress(b"foo") | |
|
971 | 987 | |
|
972 | 988 | mutable_array = bytearray(len(compressed)) |
|
973 | 989 | mutable_array[:] = compressed |
@@ -984,25 +1000,25 class TestDecompressor_stream_writer(uni | |||
|
984 | 1000 | |
|
985 | 1001 | decompressor = dctx.stream_writer(buffer) |
|
986 | 1002 | decompressor.write(source) |
|
987 |
self.assertEqual(buffer.getvalue(), b |
|
|
1003 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
988 | 1004 | |
|
989 | 1005 | buffer = NonClosingBytesIO() |
|
990 | 1006 | |
|
991 | 1007 | with dctx.stream_writer(buffer) as decompressor: |
|
992 | 1008 | self.assertEqual(decompressor.write(source), 3) |
|
993 | 1009 | |
|
994 |
self.assertEqual(buffer.getvalue(), b |
|
|
1010 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
995 | 1011 | |
|
996 | 1012 | buffer = io.BytesIO() |
|
997 | 1013 | writer = dctx.stream_writer(buffer, write_return_read=True) |
|
998 | 1014 | self.assertEqual(writer.write(source), len(source)) |
|
999 |
self.assertEqual(buffer.getvalue(), b |
|
|
1015 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
1000 | 1016 | |
|
1001 | 1017 | def test_large_roundtrip(self): |
|
1002 | 1018 | chunks = [] |
|
1003 | 1019 | for i in range(255): |
|
1004 |
chunks.append(struct.Struct( |
|
|
1005 |
orig = b |
|
|
1020 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
|
1021 | orig = b"".join(chunks) | |
|
1006 | 1022 | cctx = zstd.ZstdCompressor() |
|
1007 | 1023 | compressed = cctx.compress(orig) |
|
1008 | 1024 | |
@@ -1012,9 +1028,9 class TestDecompressor_stream_writer(uni | |||
|
1012 | 1028 | chunks = [] |
|
1013 | 1029 | for i in range(255): |
|
1014 | 1030 | for j in range(255): |
|
1015 |
chunks.append(struct.Struct( |
|
|
1031 | chunks.append(struct.Struct(">B").pack(j) * i) | |
|
1016 | 1032 | |
|
1017 |
orig = b |
|
|
1033 | orig = b"".join(chunks) | |
|
1018 | 1034 | cctx = zstd.ZstdCompressor() |
|
1019 | 1035 | compressed = cctx.compress(orig) |
|
1020 | 1036 | |
@@ -1042,13 +1058,13 class TestDecompressor_stream_writer(uni | |||
|
1042 | 1058 | def test_dictionary(self): |
|
1043 | 1059 | samples = [] |
|
1044 | 1060 | for i in range(128): |
|
1045 |
samples.append(b |
|
|
1046 |
samples.append(b |
|
|
1047 |
samples.append(b |
|
|
1061 | samples.append(b"foo" * 64) | |
|
1062 | samples.append(b"bar" * 64) | |
|
1063 | samples.append(b"foobar" * 64) | |
|
1048 | 1064 | |
|
1049 | 1065 | d = zstd.train_dictionary(8192, samples) |
|
1050 | 1066 | |
|
1051 |
orig = b |
|
|
1067 | orig = b"foobar" * 16384 | |
|
1052 | 1068 | buffer = NonClosingBytesIO() |
|
1053 | 1069 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
1054 | 1070 | with cctx.stream_writer(buffer) as compressor: |
@@ -1083,22 +1099,22 class TestDecompressor_stream_writer(uni | |||
|
1083 | 1099 | self.assertGreater(size, 100000) |
|
1084 | 1100 | |
|
1085 | 1101 | def test_write_size(self): |
|
1086 |
source = zstd.ZstdCompressor().compress(b |
|
|
1102 | source = zstd.ZstdCompressor().compress(b"foobarfoobar") | |
|
1087 | 1103 | dest = OpCountingBytesIO() |
|
1088 | 1104 | dctx = zstd.ZstdDecompressor() |
|
1089 | 1105 | with dctx.stream_writer(dest, write_size=1) as decompressor: |
|
1090 |
s = struct.Struct( |
|
|
1106 | s = struct.Struct(">B") | |
|
1091 | 1107 | for c in source: |
|
1092 | 1108 | if not isinstance(c, str): |
|
1093 | 1109 | c = s.pack(c) |
|
1094 | 1110 | decompressor.write(c) |
|
1095 | 1111 | |
|
1096 |
self.assertEqual(dest.getvalue(), b |
|
|
1112 | self.assertEqual(dest.getvalue(), b"foobarfoobar") | |
|
1097 | 1113 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
1098 | 1114 | |
|
1099 | 1115 | |
|
1100 | 1116 | @make_cffi |
|
1101 |
class TestDecompressor_read_to_iter( |
|
|
1117 | class TestDecompressor_read_to_iter(TestCase): | |
|
1102 | 1118 | def test_type_validation(self): |
|
1103 | 1119 | dctx = zstd.ZstdDecompressor() |
|
1104 | 1120 | |
@@ -1106,10 +1122,10 class TestDecompressor_read_to_iter(unit | |||
|
1106 | 1122 | dctx.read_to_iter(io.BytesIO()) |
|
1107 | 1123 | |
|
1108 | 1124 | # Buffer protocol works. |
|
1109 |
dctx.read_to_iter(b |
|
|
1125 | dctx.read_to_iter(b"foobar") | |
|
1110 | 1126 | |
|
1111 |
with self.assertRaisesRegex |
|
|
1112 |
b |
|
|
1127 | with self.assertRaisesRegex(ValueError, "must pass an object with a read"): | |
|
1128 | b"".join(dctx.read_to_iter(True)) | |
|
1113 | 1129 | |
|
1114 | 1130 | def test_empty_input(self): |
|
1115 | 1131 | dctx = zstd.ZstdDecompressor() |
@@ -1120,25 +1136,25 class TestDecompressor_read_to_iter(unit | |||
|
1120 | 1136 | with self.assertRaises(StopIteration): |
|
1121 | 1137 | next(it) |
|
1122 | 1138 | |
|
1123 |
it = dctx.read_to_iter(b |
|
|
1139 | it = dctx.read_to_iter(b"") | |
|
1124 | 1140 | with self.assertRaises(StopIteration): |
|
1125 | 1141 | next(it) |
|
1126 | 1142 | |
|
1127 | 1143 | def test_invalid_input(self): |
|
1128 | 1144 | dctx = zstd.ZstdDecompressor() |
|
1129 | 1145 | |
|
1130 |
source = io.BytesIO(b |
|
|
1146 | source = io.BytesIO(b"foobar") | |
|
1131 | 1147 | it = dctx.read_to_iter(source) |
|
1132 |
with self.assertRaisesRegex |
|
|
1148 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
|
1133 | 1149 | next(it) |
|
1134 | 1150 | |
|
1135 |
it = dctx.read_to_iter(b |
|
|
1136 |
with self.assertRaisesRegex |
|
|
1151 | it = dctx.read_to_iter(b"foobar") | |
|
1152 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
|
1137 | 1153 | next(it) |
|
1138 | 1154 | |
|
1139 | 1155 | def test_empty_roundtrip(self): |
|
1140 | 1156 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1141 |
empty = cctx.compress(b |
|
|
1157 | empty = cctx.compress(b"") | |
|
1142 | 1158 | |
|
1143 | 1159 | source = io.BytesIO(empty) |
|
1144 | 1160 | source.seek(0) |
@@ -1157,24 +1173,28 class TestDecompressor_read_to_iter(unit | |||
|
1157 | 1173 | def test_skip_bytes_too_large(self): |
|
1158 | 1174 | dctx = zstd.ZstdDecompressor() |
|
1159 | 1175 | |
|
1160 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): | |
|
1161 | b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1)) | |
|
1176 | with self.assertRaisesRegex( | |
|
1177 | ValueError, "skip_bytes must be smaller than read_size" | |
|
1178 | ): | |
|
1179 | b"".join(dctx.read_to_iter(b"", skip_bytes=1, read_size=1)) | |
|
1162 | 1180 | |
|
1163 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): | |
|
1164 | b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10)) | |
|
1181 | with self.assertRaisesRegex( | |
|
1182 | ValueError, "skip_bytes larger than first input chunk" | |
|
1183 | ): | |
|
1184 | b"".join(dctx.read_to_iter(b"foobar", skip_bytes=10)) | |
|
1165 | 1185 | |
|
1166 | 1186 | def test_skip_bytes(self): |
|
1167 | 1187 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1168 |
compressed = cctx.compress(b |
|
|
1188 | compressed = cctx.compress(b"foobar") | |
|
1169 | 1189 | |
|
1170 | 1190 | dctx = zstd.ZstdDecompressor() |
|
1171 |
output = b |
|
|
1172 |
self.assertEqual(output, b |
|
|
1191 | output = b"".join(dctx.read_to_iter(b"hdr" + compressed, skip_bytes=3)) | |
|
1192 | self.assertEqual(output, b"foobar") | |
|
1173 | 1193 | |
|
1174 | 1194 | def test_large_output(self): |
|
1175 | 1195 | source = io.BytesIO() |
|
1176 |
source.write(b |
|
|
1177 |
source.write(b |
|
|
1196 | source.write(b"f" * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |
|
1197 | source.write(b"o") | |
|
1178 | 1198 | source.seek(0) |
|
1179 | 1199 | |
|
1180 | 1200 | cctx = zstd.ZstdCompressor(level=1) |
@@ -1191,7 +1211,7 class TestDecompressor_read_to_iter(unit | |||
|
1191 | 1211 | with self.assertRaises(StopIteration): |
|
1192 | 1212 | next(it) |
|
1193 | 1213 | |
|
1194 |
decompressed = b |
|
|
1214 | decompressed = b"".join(chunks) | |
|
1195 | 1215 | self.assertEqual(decompressed, source.getvalue()) |
|
1196 | 1216 | |
|
1197 | 1217 | # And again with buffer protocol. |
@@ -1203,12 +1223,12 class TestDecompressor_read_to_iter(unit | |||
|
1203 | 1223 | with self.assertRaises(StopIteration): |
|
1204 | 1224 | next(it) |
|
1205 | 1225 | |
|
1206 |
decompressed = b |
|
|
1226 | decompressed = b"".join(chunks) | |
|
1207 | 1227 | self.assertEqual(decompressed, source.getvalue()) |
|
1208 | 1228 | |
|
1209 |
@unittest.skipUnless( |
|
|
1229 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
1210 | 1230 | def test_large_input(self): |
|
1211 |
bytes = list(struct.Struct( |
|
|
1231 | bytes = list(struct.Struct(">B").pack(i) for i in range(256)) | |
|
1212 | 1232 | compressed = NonClosingBytesIO() |
|
1213 | 1233 | input_size = 0 |
|
1214 | 1234 | cctx = zstd.ZstdCompressor(level=1) |
@@ -1217,14 +1237,18 class TestDecompressor_read_to_iter(unit | |||
|
1217 | 1237 | compressor.write(random.choice(bytes)) |
|
1218 | 1238 | input_size += 1 |
|
1219 | 1239 | |
|
1220 | have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1240 | have_compressed = ( | |
|
1241 | len(compressed.getvalue()) | |
|
1242 | > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1243 | ) | |
|
1221 | 1244 | have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 |
|
1222 | 1245 | if have_compressed and have_raw: |
|
1223 | 1246 | break |
|
1224 | 1247 | |
|
1225 | 1248 | compressed = io.BytesIO(compressed.getvalue()) |
|
1226 |
self.assertGreater( |
|
|
1227 |
|
|
|
1249 | self.assertGreater( | |
|
1250 | len(compressed.getvalue()), zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1251 | ) | |
|
1228 | 1252 | |
|
1229 | 1253 | dctx = zstd.ZstdDecompressor() |
|
1230 | 1254 | it = dctx.read_to_iter(compressed) |
@@ -1237,7 +1261,7 class TestDecompressor_read_to_iter(unit | |||
|
1237 | 1261 | with self.assertRaises(StopIteration): |
|
1238 | 1262 | next(it) |
|
1239 | 1263 | |
|
1240 |
decompressed = b |
|
|
1264 | decompressed = b"".join(chunks) | |
|
1241 | 1265 | self.assertEqual(len(decompressed), input_size) |
|
1242 | 1266 | |
|
1243 | 1267 | # And again with buffer protocol. |
@@ -1251,7 +1275,7 class TestDecompressor_read_to_iter(unit | |||
|
1251 | 1275 | with self.assertRaises(StopIteration): |
|
1252 | 1276 | next(it) |
|
1253 | 1277 | |
|
1254 |
decompressed = b |
|
|
1278 | decompressed = b"".join(chunks) | |
|
1255 | 1279 | self.assertEqual(len(decompressed), input_size) |
|
1256 | 1280 | |
|
1257 | 1281 | def test_interesting(self): |
@@ -1263,22 +1287,23 class TestDecompressor_read_to_iter(unit | |||
|
1263 | 1287 | compressed = NonClosingBytesIO() |
|
1264 | 1288 | with cctx.stream_writer(compressed) as compressor: |
|
1265 | 1289 | for i in range(256): |
|
1266 |
chunk = b |
|
|
1290 | chunk = b"\0" * 1024 | |
|
1267 | 1291 | compressor.write(chunk) |
|
1268 | 1292 | source.write(chunk) |
|
1269 | 1293 | |
|
1270 | 1294 | dctx = zstd.ZstdDecompressor() |
|
1271 | 1295 | |
|
1272 |
simple = dctx.decompress( |
|
|
1273 |
|
|
|
1296 | simple = dctx.decompress( | |
|
1297 | compressed.getvalue(), max_output_size=len(source.getvalue()) | |
|
1298 | ) | |
|
1274 | 1299 | self.assertEqual(simple, source.getvalue()) |
|
1275 | 1300 | |
|
1276 | 1301 | compressed = io.BytesIO(compressed.getvalue()) |
|
1277 |
streamed = b |
|
|
1302 | streamed = b"".join(dctx.read_to_iter(compressed)) | |
|
1278 | 1303 | self.assertEqual(streamed, source.getvalue()) |
|
1279 | 1304 | |
|
1280 | 1305 | def test_read_write_size(self): |
|
1281 |
source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b |
|
|
1306 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) | |
|
1282 | 1307 | dctx = zstd.ZstdDecompressor() |
|
1283 | 1308 | for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): |
|
1284 | 1309 | self.assertEqual(len(chunk), 1) |
@@ -1287,97 +1312,110 class TestDecompressor_read_to_iter(unit | |||
|
1287 | 1312 | |
|
1288 | 1313 | def test_magic_less(self): |
|
1289 | 1314 | params = zstd.CompressionParameters.from_level( |
|
1290 |
1, format=zstd.FORMAT_ZSTD1_MAGICLESS |
|
|
1315 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS | |
|
1316 | ) | |
|
1291 | 1317 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1292 |
frame = cctx.compress(b |
|
|
1318 | frame = cctx.compress(b"foobar") | |
|
1293 | 1319 | |
|
1294 |
self.assertNotEqual(frame[0:4], b |
|
|
1320 | self.assertNotEqual(frame[0:4], b"\x28\xb5\x2f\xfd") | |
|
1295 | 1321 | |
|
1296 | 1322 | dctx = zstd.ZstdDecompressor() |
|
1297 |
with self.assertRaisesRegex |
|
|
1298 |
zstd.ZstdError, |
|
|
1323 | with self.assertRaisesRegex( | |
|
1324 | zstd.ZstdError, "error determining content size from frame header" | |
|
1325 | ): | |
|
1299 | 1326 | dctx.decompress(frame) |
|
1300 | 1327 | |
|
1301 | 1328 | dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) |
|
1302 |
res = b |
|
|
1303 |
self.assertEqual(res, b |
|
|
1329 | res = b"".join(dctx.read_to_iter(frame)) | |
|
1330 | self.assertEqual(res, b"foobar") | |
|
1304 | 1331 | |
|
1305 | 1332 | |
|
1306 | 1333 | @make_cffi |
|
1307 |
class TestDecompressor_content_dict_chain( |
|
|
1334 | class TestDecompressor_content_dict_chain(TestCase): | |
|
1308 | 1335 | def test_bad_inputs_simple(self): |
|
1309 | 1336 | dctx = zstd.ZstdDecompressor() |
|
1310 | 1337 | |
|
1311 | 1338 | with self.assertRaises(TypeError): |
|
1312 |
dctx.decompress_content_dict_chain(b |
|
|
1339 | dctx.decompress_content_dict_chain(b"foo") | |
|
1313 | 1340 | |
|
1314 | 1341 | with self.assertRaises(TypeError): |
|
1315 |
dctx.decompress_content_dict_chain((b |
|
|
1342 | dctx.decompress_content_dict_chain((b"foo", b"bar")) | |
|
1316 | 1343 | |
|
1317 |
with self.assertRaisesRegex |
|
|
1344 | with self.assertRaisesRegex(ValueError, "empty input chain"): | |
|
1318 | 1345 | dctx.decompress_content_dict_chain([]) |
|
1319 | 1346 | |
|
1320 |
with self.assertRaisesRegex |
|
|
1321 |
dctx.decompress_content_dict_chain([u |
|
|
1347 | with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): | |
|
1348 | dctx.decompress_content_dict_chain([u"foo"]) | |
|
1322 | 1349 | |
|
1323 |
with self.assertRaisesRegex |
|
|
1350 | with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): | |
|
1324 | 1351 | dctx.decompress_content_dict_chain([True]) |
|
1325 | 1352 | |
|
1326 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): | |
|
1353 | with self.assertRaisesRegex( | |
|
1354 | ValueError, "chunk 0 is too small to contain a zstd frame" | |
|
1355 | ): | |
|
1327 | 1356 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) |
|
1328 | 1357 | |
|
1329 |
with self.assertRaisesRegex |
|
|
1330 |
dctx.decompress_content_dict_chain([b |
|
|
1358 | with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"): | |
|
1359 | dctx.decompress_content_dict_chain([b"foo" * 8]) | |
|
1331 | 1360 | |
|
1332 |
no_size = zstd.ZstdCompressor(write_content_size=False).compress(b |
|
|
1361 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) | |
|
1333 | 1362 | |
|
1334 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): | |
|
1363 | with self.assertRaisesRegex( | |
|
1364 | ValueError, "chunk 0 missing content size in frame" | |
|
1365 | ): | |
|
1335 | 1366 | dctx.decompress_content_dict_chain([no_size]) |
|
1336 | 1367 | |
|
1337 | 1368 | # Corrupt first frame. |
|
1338 |
frame = zstd.ZstdCompressor().compress(b |
|
|
1369 | frame = zstd.ZstdCompressor().compress(b"foo" * 64) | |
|
1339 | 1370 | frame = frame[0:12] + frame[15:] |
|
1340 |
with self.assertRaisesRegex |
|
|
1341 |
|
|
|
1371 | with self.assertRaisesRegex( | |
|
1372 | zstd.ZstdError, "chunk 0 did not decompress full frame" | |
|
1373 | ): | |
|
1342 | 1374 | dctx.decompress_content_dict_chain([frame]) |
|
1343 | 1375 | |
|
1344 | 1376 | def test_bad_subsequent_input(self): |
|
1345 |
initial = zstd.ZstdCompressor().compress(b |
|
|
1377 | initial = zstd.ZstdCompressor().compress(b"foo" * 64) | |
|
1346 | 1378 | |
|
1347 | 1379 | dctx = zstd.ZstdDecompressor() |
|
1348 | 1380 | |
|
1349 |
with self.assertRaisesRegex |
|
|
1350 |
dctx.decompress_content_dict_chain([initial, u |
|
|
1381 | with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): | |
|
1382 | dctx.decompress_content_dict_chain([initial, u"foo"]) | |
|
1351 | 1383 | |
|
1352 |
with self.assertRaisesRegex |
|
|
1384 | with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): | |
|
1353 | 1385 | dctx.decompress_content_dict_chain([initial, None]) |
|
1354 | 1386 | |
|
1355 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): | |
|
1387 | with self.assertRaisesRegex( | |
|
1388 | ValueError, "chunk 1 is too small to contain a zstd frame" | |
|
1389 | ): | |
|
1356 | 1390 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) |
|
1357 | 1391 | |
|
1358 |
with self.assertRaisesRegex |
|
|
1359 |
dctx.decompress_content_dict_chain([initial, b |
|
|
1392 | with self.assertRaisesRegex(ValueError, "chunk 1 is not a valid zstd frame"): | |
|
1393 | dctx.decompress_content_dict_chain([initial, b"foo" * 8]) | |
|
1360 | 1394 | |
|
1361 |
no_size = zstd.ZstdCompressor(write_content_size=False).compress(b |
|
|
1395 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) | |
|
1362 | 1396 | |
|
1363 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): | |
|
1397 | with self.assertRaisesRegex( | |
|
1398 | ValueError, "chunk 1 missing content size in frame" | |
|
1399 | ): | |
|
1364 | 1400 | dctx.decompress_content_dict_chain([initial, no_size]) |
|
1365 | 1401 | |
|
1366 | 1402 | # Corrupt second frame. |
|
1367 |
cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b |
|
|
1368 |
frame = cctx.compress(b |
|
|
1403 | cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b"foo" * 64)) | |
|
1404 | frame = cctx.compress(b"bar" * 64) | |
|
1369 | 1405 | frame = frame[0:12] + frame[15:] |
|
1370 | 1406 | |
|
1371 | with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'): | |
|
1407 | with self.assertRaisesRegex( | |
|
1408 | zstd.ZstdError, "chunk 1 did not decompress full frame" | |
|
1409 | ): | |
|
1372 | 1410 | dctx.decompress_content_dict_chain([initial, frame]) |
|
1373 | 1411 | |
|
1374 | 1412 | def test_simple(self): |
|
1375 | 1413 | original = [ |
|
1376 |
b |
|
|
1377 |
b |
|
|
1378 |
b |
|
|
1379 |
b |
|
|
1380 |
b |
|
|
1414 | b"foo" * 64, | |
|
1415 | b"foobar" * 64, | |
|
1416 | b"baz" * 64, | |
|
1417 | b"foobaz" * 64, | |
|
1418 | b"foobarbaz" * 64, | |
|
1381 | 1419 | ] |
|
1382 | 1420 | |
|
1383 | 1421 | chunks = [] |
@@ -1396,12 +1434,12 class TestDecompressor_content_dict_chai | |||
|
1396 | 1434 | |
|
1397 | 1435 | |
|
1398 | 1436 | # TODO enable for CFFI |
|
1399 |
class TestDecompressor_multi_decompress_to_buffer( |
|
|
1437 | class TestDecompressor_multi_decompress_to_buffer(TestCase): | |
|
1400 | 1438 | def test_invalid_inputs(self): |
|
1401 | 1439 | dctx = zstd.ZstdDecompressor() |
|
1402 | 1440 | |
|
1403 |
if not hasattr(dctx, |
|
|
1404 |
self.skipTest( |
|
|
1441 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1442 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1405 | 1443 | |
|
1406 | 1444 | with self.assertRaises(TypeError): |
|
1407 | 1445 | dctx.multi_decompress_to_buffer(True) |
@@ -1409,22 +1447,24 class TestDecompressor_multi_decompress_ | |||
|
1409 | 1447 | with self.assertRaises(TypeError): |
|
1410 | 1448 | dctx.multi_decompress_to_buffer((1, 2)) |
|
1411 | 1449 | |
|
1412 |
with self.assertRaisesRegex |
|
|
1413 |
dctx.multi_decompress_to_buffer([u |
|
|
1450 | with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): | |
|
1451 | dctx.multi_decompress_to_buffer([u"foo"]) | |
|
1414 | 1452 | |
|
1415 | with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'): | |
|
1416 | dctx.multi_decompress_to_buffer([b'foobarbaz']) | |
|
1453 | with self.assertRaisesRegex( | |
|
1454 | ValueError, "could not determine decompressed size of item 0" | |
|
1455 | ): | |
|
1456 | dctx.multi_decompress_to_buffer([b"foobarbaz"]) | |
|
1417 | 1457 | |
|
1418 | 1458 | def test_list_input(self): |
|
1419 | 1459 | cctx = zstd.ZstdCompressor() |
|
1420 | 1460 | |
|
1421 |
original = [b |
|
|
1461 | original = [b"foo" * 4, b"bar" * 6] | |
|
1422 | 1462 | frames = [cctx.compress(d) for d in original] |
|
1423 | 1463 | |
|
1424 | 1464 | dctx = zstd.ZstdDecompressor() |
|
1425 | 1465 | |
|
1426 |
if not hasattr(dctx, |
|
|
1427 |
self.skipTest( |
|
|
1466 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1467 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1428 | 1468 | |
|
1429 | 1469 | result = dctx.multi_decompress_to_buffer(frames) |
|
1430 | 1470 | |
@@ -1442,14 +1482,14 class TestDecompressor_multi_decompress_ | |||
|
1442 | 1482 | def test_list_input_frame_sizes(self): |
|
1443 | 1483 | cctx = zstd.ZstdCompressor() |
|
1444 | 1484 | |
|
1445 |
original = [b |
|
|
1485 | original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] | |
|
1446 | 1486 | frames = [cctx.compress(d) for d in original] |
|
1447 |
sizes = struct.pack( |
|
|
1487 | sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) | |
|
1448 | 1488 | |
|
1449 | 1489 | dctx = zstd.ZstdDecompressor() |
|
1450 | 1490 | |
|
1451 |
if not hasattr(dctx, |
|
|
1452 |
self.skipTest( |
|
|
1491 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1492 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1453 | 1493 | |
|
1454 | 1494 | result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) |
|
1455 | 1495 | |
@@ -1462,16 +1502,18 class TestDecompressor_multi_decompress_ | |||
|
1462 | 1502 | def test_buffer_with_segments_input(self): |
|
1463 | 1503 | cctx = zstd.ZstdCompressor() |
|
1464 | 1504 | |
|
1465 |
original = [b |
|
|
1505 | original = [b"foo" * 4, b"bar" * 6] | |
|
1466 | 1506 | frames = [cctx.compress(d) for d in original] |
|
1467 | 1507 | |
|
1468 | 1508 | dctx = zstd.ZstdDecompressor() |
|
1469 | 1509 | |
|
1470 |
if not hasattr(dctx, |
|
|
1471 |
self.skipTest( |
|
|
1510 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1511 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1472 | 1512 | |
|
1473 | segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1])) | |
|
1474 | b = zstd.BufferWithSegments(b''.join(frames), segments) | |
|
1513 | segments = struct.pack( | |
|
1514 | "=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]) | |
|
1515 | ) | |
|
1516 | b = zstd.BufferWithSegments(b"".join(frames), segments) | |
|
1475 | 1517 | |
|
1476 | 1518 | result = dctx.multi_decompress_to_buffer(b) |
|
1477 | 1519 | |
@@ -1483,19 +1525,25 class TestDecompressor_multi_decompress_ | |||
|
1483 | 1525 | |
|
1484 | 1526 | def test_buffer_with_segments_sizes(self): |
|
1485 | 1527 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1486 |
original = [b |
|
|
1528 | original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] | |
|
1487 | 1529 | frames = [cctx.compress(d) for d in original] |
|
1488 |
sizes = struct.pack( |
|
|
1530 | sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) | |
|
1489 | 1531 | |
|
1490 | 1532 | dctx = zstd.ZstdDecompressor() |
|
1491 | 1533 | |
|
1492 |
if not hasattr(dctx, |
|
|
1493 |
self.skipTest( |
|
|
1534 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1535 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1494 | 1536 | |
|
1495 |
segments = struct.pack( |
|
|
1496 | len(frames[0]), len(frames[1]), | |
|
1497 | len(frames[0]) + len(frames[1]), len(frames[2])) | |
|
1498 | b = zstd.BufferWithSegments(b''.join(frames), segments) | |
|
1537 | segments = struct.pack( | |
|
1538 | "=QQQQQQ", | |
|
1539 | 0, | |
|
1540 | len(frames[0]), | |
|
1541 | len(frames[0]), | |
|
1542 | len(frames[1]), | |
|
1543 | len(frames[0]) + len(frames[1]), | |
|
1544 | len(frames[2]), | |
|
1545 | ) | |
|
1546 | b = zstd.BufferWithSegments(b"".join(frames), segments) | |
|
1499 | 1547 | |
|
1500 | 1548 | result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) |
|
1501 | 1549 | |
@@ -1509,15 +1557,15 class TestDecompressor_multi_decompress_ | |||
|
1509 | 1557 | cctx = zstd.ZstdCompressor() |
|
1510 | 1558 | |
|
1511 | 1559 | original = [ |
|
1512 |
b |
|
|
1513 |
b |
|
|
1514 |
b |
|
|
1515 |
b |
|
|
1516 |
b |
|
|
1560 | b"foo0" * 2, | |
|
1561 | b"foo1" * 3, | |
|
1562 | b"foo2" * 4, | |
|
1563 | b"foo3" * 5, | |
|
1564 | b"foo4" * 6, | |
|
1517 | 1565 | ] |
|
1518 | 1566 | |
|
1519 |
if not hasattr(cctx, |
|
|
1520 |
self.skipTest( |
|
|
1567 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1568 | self.skipTest("multi_compress_to_buffer not available") | |
|
1521 | 1569 | |
|
1522 | 1570 | frames = cctx.multi_compress_to_buffer(original) |
|
1523 | 1571 | |
@@ -1532,16 +1580,24 class TestDecompressor_multi_decompress_ | |||
|
1532 | 1580 | self.assertEqual(data, decompressed[i].tobytes()) |
|
1533 | 1581 | |
|
1534 | 1582 | # And a manual mode. |
|
1535 |
b = b |
|
|
1536 |
b1 = zstd.BufferWithSegments( |
|
|
1537 | 0, len(frames[0]), | |
|
1538 | len(frames[0]), len(frames[1]))) | |
|
1583 | b = b"".join([frames[0].tobytes(), frames[1].tobytes()]) | |
|
1584 | b1 = zstd.BufferWithSegments( | |
|
1585 | b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])) | |
|
1586 | ) | |
|
1539 | 1587 | |
|
1540 |
b = b |
|
|
1541 |
b2 = zstd.BufferWithSegments( |
|
|
1542 | 0, len(frames[2]), | |
|
1543 | len(frames[2]), len(frames[3]), | |
|
1544 | len(frames[2]) + len(frames[3]), len(frames[4]))) | |
|
1588 | b = b"".join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) | |
|
1589 | b2 = zstd.BufferWithSegments( | |
|
1590 | b, | |
|
1591 | struct.pack( | |
|
1592 | "=QQQQQQ", | |
|
1593 | 0, | |
|
1594 | len(frames[2]), | |
|
1595 | len(frames[2]), | |
|
1596 | len(frames[3]), | |
|
1597 | len(frames[2]) + len(frames[3]), | |
|
1598 | len(frames[4]), | |
|
1599 | ), | |
|
1600 | ) | |
|
1545 | 1601 | |
|
1546 | 1602 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1547 | 1603 | |
@@ -1560,8 +1616,8 class TestDecompressor_multi_decompress_ | |||
|
1560 | 1616 | |
|
1561 | 1617 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1562 | 1618 | |
|
1563 |
if not hasattr(dctx, |
|
|
1564 |
self.skipTest( |
|
|
1619 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1620 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1565 | 1621 | |
|
1566 | 1622 | result = dctx.multi_decompress_to_buffer(frames) |
|
1567 | 1623 | |
@@ -1571,41 +1627,44 class TestDecompressor_multi_decompress_ | |||
|
1571 | 1627 | cctx = zstd.ZstdCompressor() |
|
1572 | 1628 | |
|
1573 | 1629 | frames = [] |
|
1574 |
frames.extend(cctx.compress(b |
|
|
1575 |
frames.extend(cctx.compress(b |
|
|
1630 | frames.extend(cctx.compress(b"x" * 64) for i in range(256)) | |
|
1631 | frames.extend(cctx.compress(b"y" * 64) for i in range(256)) | |
|
1576 | 1632 | |
|
1577 | 1633 | dctx = zstd.ZstdDecompressor() |
|
1578 | 1634 | |
|
1579 |
if not hasattr(dctx, |
|
|
1580 |
self.skipTest( |
|
|
1635 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1636 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1581 | 1637 | |
|
1582 | 1638 | result = dctx.multi_decompress_to_buffer(frames, threads=-1) |
|
1583 | 1639 | |
|
1584 | 1640 | self.assertEqual(len(result), len(frames)) |
|
1585 | 1641 | self.assertEqual(result.size(), 2 * 64 * 256) |
|
1586 |
self.assertEqual(result[0].tobytes(), b |
|
|
1587 |
self.assertEqual(result[256].tobytes(), b |
|
|
1642 | self.assertEqual(result[0].tobytes(), b"x" * 64) | |
|
1643 | self.assertEqual(result[256].tobytes(), b"y" * 64) | |
|
1588 | 1644 | |
|
1589 | 1645 | def test_item_failure(self): |
|
1590 | 1646 | cctx = zstd.ZstdCompressor() |
|
1591 |
frames = [cctx.compress(b |
|
|
1647 | frames = [cctx.compress(b"x" * 128), cctx.compress(b"y" * 128)] | |
|
1592 | 1648 | |
|
1593 |
frames[1] = frames[1][0:15] + b |
|
|
1649 | frames[1] = frames[1][0:15] + b"extra" + frames[1][15:] | |
|
1594 | 1650 | |
|
1595 | 1651 | dctx = zstd.ZstdDecompressor() |
|
1596 | 1652 | |
|
1597 |
if not hasattr(dctx, |
|
|
1598 |
self.skipTest( |
|
|
1653 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1654 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1599 | 1655 | |
|
1600 |
with self.assertRaisesRegex |
|
|
1601 | 'error decompressing item 1: (' | |
|
1602 | 'Corrupted block|' | |
|
1603 | 'Destination buffer is too small)'): | |
|
1656 | with self.assertRaisesRegex( | |
|
1657 | zstd.ZstdError, | |
|
1658 | "error decompressing item 1: (" | |
|
1659 | "Corrupted block|" | |
|
1660 | "Destination buffer is too small)", | |
|
1661 | ): | |
|
1604 | 1662 | dctx.multi_decompress_to_buffer(frames) |
|
1605 | 1663 | |
|
1606 |
with self.assertRaisesRegex |
|
|
1607 | 'error decompressing item 1: (' | |
|
1608 | 'Corrupted block|' | |
|
1609 | 'Destination buffer is too small)'): | |
|
1664 | with self.assertRaisesRegex( | |
|
1665 | zstd.ZstdError, | |
|
1666 | "error decompressing item 1: (" | |
|
1667 | "Corrupted block|" | |
|
1668 | "Destination buffer is too small)", | |
|
1669 | ): | |
|
1610 | 1670 | dctx.multi_decompress_to_buffer(frames, threads=2) |
|
1611 |
@@ -6,29 +6,37 try: | |||
|
6 | 6 | import hypothesis |
|
7 | 7 | import hypothesis.strategies as strategies |
|
8 | 8 | except ImportError: |
|
9 |
raise unittest.SkipTest( |
|
|
9 | raise unittest.SkipTest("hypothesis not available") | |
|
10 | 10 | |
|
11 | 11 | import zstandard as zstd |
|
12 | 12 | |
|
13 |
from . |
|
|
13 | from .common import ( | |
|
14 | 14 | make_cffi, |
|
15 | 15 | NonClosingBytesIO, |
|
16 | 16 | random_input_data, |
|
17 | TestCase, | |
|
17 | 18 | ) |
|
18 | 19 | |
|
19 | 20 | |
|
20 |
@unittest.skipUnless( |
|
|
21 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
21 | 22 | @make_cffi |
|
22 |
class TestDecompressor_stream_reader_fuzzing( |
|
|
23 | class TestDecompressor_stream_reader_fuzzing(TestCase): | |
|
23 | 24 | @hypothesis.settings( |
|
24 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
25 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
26 | level=strategies.integers(min_value=1, max_value=5), | |
|
27 | streaming=strategies.booleans(), | |
|
28 | source_read_size=strategies.integers(1, 1048576), | |
|
29 | read_sizes=strategies.data()) | |
|
30 | def test_stream_source_read_variance(self, original, level, streaming, | |
|
31 | source_read_size, read_sizes): | |
|
25 | suppress_health_check=[ | |
|
26 | hypothesis.HealthCheck.large_base_example, | |
|
27 | hypothesis.HealthCheck.too_slow, | |
|
28 | ] | |
|
29 | ) | |
|
30 | @hypothesis.given( | |
|
31 | original=strategies.sampled_from(random_input_data()), | |
|
32 | level=strategies.integers(min_value=1, max_value=5), | |
|
33 | streaming=strategies.booleans(), | |
|
34 | source_read_size=strategies.integers(1, 1048576), | |
|
35 | read_sizes=strategies.data(), | |
|
36 | ) | |
|
37 | def test_stream_source_read_variance( | |
|
38 | self, original, level, streaming, source_read_size, read_sizes | |
|
39 | ): | |
|
32 | 40 | cctx = zstd.ZstdCompressor(level=level) |
|
33 | 41 | |
|
34 | 42 | if streaming: |
@@ -53,18 +61,22 class TestDecompressor_stream_reader_fuz | |||
|
53 | 61 | |
|
54 | 62 | chunks.append(chunk) |
|
55 | 63 | |
|
56 |
self.assertEqual(b |
|
|
64 | self.assertEqual(b"".join(chunks), original) | |
|
57 | 65 | |
|
58 | 66 | # Similar to above except we have a constant read() size. |
|
59 | 67 | @hypothesis.settings( |
|
60 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
61 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
62 | level=strategies.integers(min_value=1, max_value=5), | |
|
63 | streaming=strategies.booleans(), | |
|
64 | source_read_size=strategies.integers(1, 1048576), | |
|
65 | read_size=strategies.integers(-1, 131072)) | |
|
66 | def test_stream_source_read_size(self, original, level, streaming, | |
|
67 | source_read_size, read_size): | |
|
68 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
69 | ) | |
|
70 | @hypothesis.given( | |
|
71 | original=strategies.sampled_from(random_input_data()), | |
|
72 | level=strategies.integers(min_value=1, max_value=5), | |
|
73 | streaming=strategies.booleans(), | |
|
74 | source_read_size=strategies.integers(1, 1048576), | |
|
75 | read_size=strategies.integers(-1, 131072), | |
|
76 | ) | |
|
77 | def test_stream_source_read_size( | |
|
78 | self, original, level, streaming, source_read_size, read_size | |
|
79 | ): | |
|
68 | 80 | if read_size == 0: |
|
69 | 81 | read_size = 1 |
|
70 | 82 | |
@@ -91,17 +103,24 class TestDecompressor_stream_reader_fuz | |||
|
91 | 103 | |
|
92 | 104 | chunks.append(chunk) |
|
93 | 105 | |
|
94 |
self.assertEqual(b |
|
|
106 | self.assertEqual(b"".join(chunks), original) | |
|
95 | 107 | |
|
96 | 108 | @hypothesis.settings( |
|
97 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
98 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
99 | level=strategies.integers(min_value=1, max_value=5), | |
|
100 | streaming=strategies.booleans(), | |
|
101 | source_read_size=strategies.integers(1, 1048576), | |
|
102 | read_sizes=strategies.data()) | |
|
103 | def test_buffer_source_read_variance(self, original, level, streaming, | |
|
104 | source_read_size, read_sizes): | |
|
109 | suppress_health_check=[ | |
|
110 | hypothesis.HealthCheck.large_base_example, | |
|
111 | hypothesis.HealthCheck.too_slow, | |
|
112 | ] | |
|
113 | ) | |
|
114 | @hypothesis.given( | |
|
115 | original=strategies.sampled_from(random_input_data()), | |
|
116 | level=strategies.integers(min_value=1, max_value=5), | |
|
117 | streaming=strategies.booleans(), | |
|
118 | source_read_size=strategies.integers(1, 1048576), | |
|
119 | read_sizes=strategies.data(), | |
|
120 | ) | |
|
121 | def test_buffer_source_read_variance( | |
|
122 | self, original, level, streaming, source_read_size, read_sizes | |
|
123 | ): | |
|
105 | 124 | cctx = zstd.ZstdCompressor(level=level) |
|
106 | 125 | |
|
107 | 126 | if streaming: |
@@ -125,18 +144,22 class TestDecompressor_stream_reader_fuz | |||
|
125 | 144 | |
|
126 | 145 | chunks.append(chunk) |
|
127 | 146 | |
|
128 |
self.assertEqual(b |
|
|
147 | self.assertEqual(b"".join(chunks), original) | |
|
129 | 148 | |
|
130 | 149 | # Similar to above except we have a constant read() size. |
|
131 | 150 | @hypothesis.settings( |
|
132 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
133 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
134 | level=strategies.integers(min_value=1, max_value=5), | |
|
135 | streaming=strategies.booleans(), | |
|
136 | source_read_size=strategies.integers(1, 1048576), | |
|
137 | read_size=strategies.integers(-1, 131072)) | |
|
138 | def test_buffer_source_constant_read_size(self, original, level, streaming, | |
|
139 | source_read_size, read_size): | |
|
151 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
152 | ) | |
|
153 | @hypothesis.given( | |
|
154 | original=strategies.sampled_from(random_input_data()), | |
|
155 | level=strategies.integers(min_value=1, max_value=5), | |
|
156 | streaming=strategies.booleans(), | |
|
157 | source_read_size=strategies.integers(1, 1048576), | |
|
158 | read_size=strategies.integers(-1, 131072), | |
|
159 | ) | |
|
160 | def test_buffer_source_constant_read_size( | |
|
161 | self, original, level, streaming, source_read_size, read_size | |
|
162 | ): | |
|
140 | 163 | if read_size == 0: |
|
141 | 164 | read_size = -1 |
|
142 | 165 | |
@@ -162,16 +185,18 class TestDecompressor_stream_reader_fuz | |||
|
162 | 185 | |
|
163 | 186 | chunks.append(chunk) |
|
164 | 187 | |
|
165 |
self.assertEqual(b |
|
|
188 | self.assertEqual(b"".join(chunks), original) | |
|
166 | 189 | |
|
167 | 190 | @hypothesis.settings( |
|
168 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
169 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
170 | level=strategies.integers(min_value=1, max_value=5), | |
|
171 | streaming=strategies.booleans(), | |
|
172 | source_read_size=strategies.integers(1, 1048576)) | |
|
173 | def test_stream_source_readall(self, original, level, streaming, | |
|
174 | source_read_size): | |
|
191 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
192 | ) | |
|
193 | @hypothesis.given( | |
|
194 | original=strategies.sampled_from(random_input_data()), | |
|
195 | level=strategies.integers(min_value=1, max_value=5), | |
|
196 | streaming=strategies.booleans(), | |
|
197 | source_read_size=strategies.integers(1, 1048576), | |
|
198 | ) | |
|
199 | def test_stream_source_readall(self, original, level, streaming, source_read_size): | |
|
175 | 200 | cctx = zstd.ZstdCompressor(level=level) |
|
176 | 201 | |
|
177 | 202 | if streaming: |
@@ -190,14 +215,21 class TestDecompressor_stream_reader_fuz | |||
|
190 | 215 | self.assertEqual(data, original) |
|
191 | 216 | |
|
192 | 217 | @hypothesis.settings( |
|
193 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
194 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
195 | level=strategies.integers(min_value=1, max_value=5), | |
|
196 | streaming=strategies.booleans(), | |
|
197 | source_read_size=strategies.integers(1, 1048576), | |
|
198 | read_sizes=strategies.data()) | |
|
199 | def test_stream_source_read1_variance(self, original, level, streaming, | |
|
200 | source_read_size, read_sizes): | |
|
218 | suppress_health_check=[ | |
|
219 | hypothesis.HealthCheck.large_base_example, | |
|
220 | hypothesis.HealthCheck.too_slow, | |
|
221 | ] | |
|
222 | ) | |
|
223 | @hypothesis.given( | |
|
224 | original=strategies.sampled_from(random_input_data()), | |
|
225 | level=strategies.integers(min_value=1, max_value=5), | |
|
226 | streaming=strategies.booleans(), | |
|
227 | source_read_size=strategies.integers(1, 1048576), | |
|
228 | read_sizes=strategies.data(), | |
|
229 | ) | |
|
230 | def test_stream_source_read1_variance( | |
|
231 | self, original, level, streaming, source_read_size, read_sizes | |
|
232 | ): | |
|
201 | 233 | cctx = zstd.ZstdCompressor(level=level) |
|
202 | 234 | |
|
203 | 235 | if streaming: |
@@ -222,17 +254,24 class TestDecompressor_stream_reader_fuz | |||
|
222 | 254 | |
|
223 | 255 | chunks.append(chunk) |
|
224 | 256 | |
|
225 |
self.assertEqual(b |
|
|
257 | self.assertEqual(b"".join(chunks), original) | |
|
226 | 258 | |
|
227 | 259 | @hypothesis.settings( |
|
228 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
229 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
230 | level=strategies.integers(min_value=1, max_value=5), | |
|
231 | streaming=strategies.booleans(), | |
|
232 | source_read_size=strategies.integers(1, 1048576), | |
|
233 | read_sizes=strategies.data()) | |
|
234 | def test_stream_source_readinto1_variance(self, original, level, streaming, | |
|
235 | source_read_size, read_sizes): | |
|
260 | suppress_health_check=[ | |
|
261 | hypothesis.HealthCheck.large_base_example, | |
|
262 | hypothesis.HealthCheck.too_slow, | |
|
263 | ] | |
|
264 | ) | |
|
265 | @hypothesis.given( | |
|
266 | original=strategies.sampled_from(random_input_data()), | |
|
267 | level=strategies.integers(min_value=1, max_value=5), | |
|
268 | streaming=strategies.booleans(), | |
|
269 | source_read_size=strategies.integers(1, 1048576), | |
|
270 | read_sizes=strategies.data(), | |
|
271 | ) | |
|
272 | def test_stream_source_readinto1_variance( | |
|
273 | self, original, level, streaming, source_read_size, read_sizes | |
|
274 | ): | |
|
236 | 275 | cctx = zstd.ZstdCompressor(level=level) |
|
237 | 276 | |
|
238 | 277 | if streaming: |
@@ -259,18 +298,24 class TestDecompressor_stream_reader_fuz | |||
|
259 | 298 | |
|
260 | 299 | chunks.append(bytes(b[0:count])) |
|
261 | 300 | |
|
262 |
self.assertEqual(b |
|
|
301 | self.assertEqual(b"".join(chunks), original) | |
|
263 | 302 | |
|
264 | 303 | @hypothesis.settings( |
|
265 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
304 | suppress_health_check=[ | |
|
305 | hypothesis.HealthCheck.large_base_example, | |
|
306 | hypothesis.HealthCheck.too_slow, | |
|
307 | ] | |
|
308 | ) | |
|
266 | 309 | @hypothesis.given( |
|
267 | 310 | original=strategies.sampled_from(random_input_data()), |
|
268 | 311 | level=strategies.integers(min_value=1, max_value=5), |
|
269 | 312 | source_read_size=strategies.integers(1, 1048576), |
|
270 | 313 | seek_amounts=strategies.data(), |
|
271 |
read_sizes=strategies.data() |
|
|
272 | def test_relative_seeks(self, original, level, source_read_size, seek_amounts, | |
|
273 | read_sizes): | |
|
314 | read_sizes=strategies.data(), | |
|
315 | ) | |
|
316 | def test_relative_seeks( | |
|
317 | self, original, level, source_read_size, seek_amounts, read_sizes | |
|
318 | ): | |
|
274 | 319 | cctx = zstd.ZstdCompressor(level=level) |
|
275 | 320 | frame = cctx.compress(original) |
|
276 | 321 | |
@@ -288,18 +333,24 class TestDecompressor_stream_reader_fuz | |||
|
288 | 333 | if not chunk: |
|
289 | 334 | break |
|
290 | 335 | |
|
291 | self.assertEqual(original[offset:offset + len(chunk)], chunk) | |
|
336 | self.assertEqual(original[offset : offset + len(chunk)], chunk) | |
|
292 | 337 | |
|
293 | 338 | @hypothesis.settings( |
|
294 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
339 | suppress_health_check=[ | |
|
340 | hypothesis.HealthCheck.large_base_example, | |
|
341 | hypothesis.HealthCheck.too_slow, | |
|
342 | ] | |
|
343 | ) | |
|
295 | 344 | @hypothesis.given( |
|
296 | 345 | originals=strategies.data(), |
|
297 | 346 | frame_count=strategies.integers(min_value=2, max_value=10), |
|
298 | 347 | level=strategies.integers(min_value=1, max_value=5), |
|
299 | 348 | source_read_size=strategies.integers(1, 1048576), |
|
300 |
read_sizes=strategies.data() |
|
|
301 | def test_multiple_frames(self, originals, frame_count, level, | |
|
302 | source_read_size, read_sizes): | |
|
349 | read_sizes=strategies.data(), | |
|
350 | ) | |
|
351 | def test_multiple_frames( | |
|
352 | self, originals, frame_count, level, source_read_size, read_sizes | |
|
353 | ): | |
|
303 | 354 | |
|
304 | 355 | cctx = zstd.ZstdCompressor(level=level) |
|
305 | 356 | source = io.BytesIO() |
@@ -314,8 +365,9 class TestDecompressor_stream_reader_fuz | |||
|
314 | 365 | |
|
315 | 366 | dctx = zstd.ZstdDecompressor() |
|
316 | 367 | buffer.seek(0) |
|
317 |
reader = dctx.stream_reader( |
|
|
318 | read_across_frames=True) | |
|
368 | reader = dctx.stream_reader( | |
|
369 | buffer, read_size=source_read_size, read_across_frames=True | |
|
370 | ) | |
|
319 | 371 | |
|
320 | 372 | chunks = [] |
|
321 | 373 | |
@@ -328,16 +380,24 class TestDecompressor_stream_reader_fuz | |||
|
328 | 380 | |
|
329 | 381 | chunks.append(chunk) |
|
330 | 382 | |
|
331 |
self.assertEqual(source.getvalue(), b |
|
|
383 | self.assertEqual(source.getvalue(), b"".join(chunks)) | |
|
332 | 384 | |
|
333 | 385 | |
|
334 |
@unittest.skipUnless( |
|
|
386 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
335 | 387 | @make_cffi |
|
336 |
class TestDecompressor_stream_writer_fuzzing( |
|
|
337 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
338 | level=strategies.integers(min_value=1, max_value=5), | |
|
339 | write_size=strategies.integers(min_value=1, max_value=8192), | |
|
340 | input_sizes=strategies.data()) | |
|
388 | class TestDecompressor_stream_writer_fuzzing(TestCase): | |
|
389 | @hypothesis.settings( | |
|
390 | suppress_health_check=[ | |
|
391 | hypothesis.HealthCheck.large_base_example, | |
|
392 | hypothesis.HealthCheck.too_slow, | |
|
393 | ] | |
|
394 | ) | |
|
395 | @hypothesis.given( | |
|
396 | original=strategies.sampled_from(random_input_data()), | |
|
397 | level=strategies.integers(min_value=1, max_value=5), | |
|
398 | write_size=strategies.integers(min_value=1, max_value=8192), | |
|
399 | input_sizes=strategies.data(), | |
|
400 | ) | |
|
341 | 401 | def test_write_size_variance(self, original, level, write_size, input_sizes): |
|
342 | 402 | cctx = zstd.ZstdCompressor(level=level) |
|
343 | 403 | frame = cctx.compress(original) |
@@ -358,13 +418,21 class TestDecompressor_stream_writer_fuz | |||
|
358 | 418 | self.assertEqual(dest.getvalue(), original) |
|
359 | 419 | |
|
360 | 420 | |
|
361 |
@unittest.skipUnless( |
|
|
421 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
362 | 422 | @make_cffi |
|
363 |
class TestDecompressor_copy_stream_fuzzing( |
|
|
364 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
365 | level=strategies.integers(min_value=1, max_value=5), | |
|
366 | read_size=strategies.integers(min_value=1, max_value=8192), | |
|
367 | write_size=strategies.integers(min_value=1, max_value=8192)) | |
|
423 | class TestDecompressor_copy_stream_fuzzing(TestCase): | |
|
424 | @hypothesis.settings( | |
|
425 | suppress_health_check=[ | |
|
426 | hypothesis.HealthCheck.large_base_example, | |
|
427 | hypothesis.HealthCheck.too_slow, | |
|
428 | ] | |
|
429 | ) | |
|
430 | @hypothesis.given( | |
|
431 | original=strategies.sampled_from(random_input_data()), | |
|
432 | level=strategies.integers(min_value=1, max_value=5), | |
|
433 | read_size=strategies.integers(min_value=1, max_value=8192), | |
|
434 | write_size=strategies.integers(min_value=1, max_value=8192), | |
|
435 | ) | |
|
368 | 436 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
369 | 437 | cctx = zstd.ZstdCompressor(level=level) |
|
370 | 438 | frame = cctx.compress(original) |
@@ -378,12 +446,20 class TestDecompressor_copy_stream_fuzzi | |||
|
378 | 446 | self.assertEqual(dest.getvalue(), original) |
|
379 | 447 | |
|
380 | 448 | |
|
381 |
@unittest.skipUnless( |
|
|
449 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
382 | 450 | @make_cffi |
|
383 |
class TestDecompressor_decompressobj_fuzzing( |
|
|
384 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
385 | level=strategies.integers(min_value=1, max_value=5), | |
|
386 | chunk_sizes=strategies.data()) | |
|
451 | class TestDecompressor_decompressobj_fuzzing(TestCase): | |
|
452 | @hypothesis.settings( | |
|
453 | suppress_health_check=[ | |
|
454 | hypothesis.HealthCheck.large_base_example, | |
|
455 | hypothesis.HealthCheck.too_slow, | |
|
456 | ] | |
|
457 | ) | |
|
458 | @hypothesis.given( | |
|
459 | original=strategies.sampled_from(random_input_data()), | |
|
460 | level=strategies.integers(min_value=1, max_value=5), | |
|
461 | chunk_sizes=strategies.data(), | |
|
462 | ) | |
|
387 | 463 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
388 | 464 | cctx = zstd.ZstdCompressor(level=level) |
|
389 | 465 | frame = cctx.compress(original) |
@@ -402,13 +478,22 class TestDecompressor_decompressobj_fuz | |||
|
402 | 478 | |
|
403 | 479 | chunks.append(dobj.decompress(chunk)) |
|
404 | 480 | |
|
405 |
self.assertEqual(b |
|
|
481 | self.assertEqual(b"".join(chunks), original) | |
|
406 | 482 | |
|
407 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
408 | level=strategies.integers(min_value=1, max_value=5), | |
|
409 | write_size=strategies.integers(min_value=1, | |
|
410 | max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
411 | chunk_sizes=strategies.data()) | |
|
483 | @hypothesis.settings( | |
|
484 | suppress_health_check=[ | |
|
485 | hypothesis.HealthCheck.large_base_example, | |
|
486 | hypothesis.HealthCheck.too_slow, | |
|
487 | ] | |
|
488 | ) | |
|
489 | @hypothesis.given( | |
|
490 | original=strategies.sampled_from(random_input_data()), | |
|
491 | level=strategies.integers(min_value=1, max_value=5), | |
|
492 | write_size=strategies.integers( | |
|
493 | min_value=1, max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
|
494 | ), | |
|
495 | chunk_sizes=strategies.data(), | |
|
496 | ) | |
|
412 | 497 | def test_random_output_sizes(self, original, level, write_size, chunk_sizes): |
|
413 | 498 | cctx = zstd.ZstdCompressor(level=level) |
|
414 | 499 | frame = cctx.compress(original) |
@@ -427,16 +512,18 class TestDecompressor_decompressobj_fuz | |||
|
427 | 512 | |
|
428 | 513 | chunks.append(dobj.decompress(chunk)) |
|
429 | 514 | |
|
430 |
self.assertEqual(b |
|
|
515 | self.assertEqual(b"".join(chunks), original) | |
|
431 | 516 | |
|
432 | 517 | |
|
433 |
@unittest.skipUnless( |
|
|
518 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
434 | 519 | @make_cffi |
|
435 |
class TestDecompressor_read_to_iter_fuzzing( |
|
|
436 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
437 | level=strategies.integers(min_value=1, max_value=5), | |
|
438 |
|
|
|
439 |
|
|
|
520 | class TestDecompressor_read_to_iter_fuzzing(TestCase): | |
|
521 | @hypothesis.given( | |
|
522 | original=strategies.sampled_from(random_input_data()), | |
|
523 | level=strategies.integers(min_value=1, max_value=5), | |
|
524 | read_size=strategies.integers(min_value=1, max_value=4096), | |
|
525 | write_size=strategies.integers(min_value=1, max_value=4096), | |
|
526 | ) | |
|
440 | 527 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
441 | 528 | cctx = zstd.ZstdCompressor(level=level) |
|
442 | 529 | frame = cctx.compress(original) |
@@ -444,29 +531,33 class TestDecompressor_read_to_iter_fuzz | |||
|
444 | 531 | source = io.BytesIO(frame) |
|
445 | 532 | |
|
446 | 533 | dctx = zstd.ZstdDecompressor() |
|
447 | chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size)) | |
|
534 | chunks = list( | |
|
535 | dctx.read_to_iter(source, read_size=read_size, write_size=write_size) | |
|
536 | ) | |
|
448 | 537 | |
|
449 |
self.assertEqual(b |
|
|
538 | self.assertEqual(b"".join(chunks), original) | |
|
450 | 539 | |
|
451 | 540 | |
|
452 |
@unittest.skipUnless( |
|
|
453 |
class TestDecompressor_multi_decompress_to_buffer_fuzzing( |
|
|
454 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), | |
|
455 | min_size=1, max_size=1024), | |
|
456 | threads=strategies.integers(min_value=1, max_value=8), | |
|
457 | use_dict=strategies.booleans()) | |
|
541 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
542 | class TestDecompressor_multi_decompress_to_buffer_fuzzing(TestCase): | |
|
543 | @hypothesis.given( | |
|
544 | original=strategies.lists( | |
|
545 | strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 | |
|
546 | ), | |
|
547 | threads=strategies.integers(min_value=1, max_value=8), | |
|
548 | use_dict=strategies.booleans(), | |
|
549 | ) | |
|
458 | 550 | def test_data_equivalence(self, original, threads, use_dict): |
|
459 | 551 | kwargs = {} |
|
460 | 552 | if use_dict: |
|
461 |
kwargs[ |
|
|
553 | kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) | |
|
462 | 554 | |
|
463 |
cctx = zstd.ZstdCompressor( |
|
|
464 | write_content_size=True, | |
|
465 | write_checksum=True, | |
|
466 | **kwargs) | |
|
555 | cctx = zstd.ZstdCompressor( | |
|
556 | level=1, write_content_size=True, write_checksum=True, **kwargs | |
|
557 | ) | |
|
467 | 558 | |
|
468 |
if not hasattr(cctx, |
|
|
469 |
self.skipTest( |
|
|
559 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
560 | self.skipTest("multi_compress_to_buffer not available") | |
|
470 | 561 | |
|
471 | 562 | frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) |
|
472 | 563 |
@@ -2,14 +2,14 import unittest | |||
|
2 | 2 | |
|
3 | 3 | import zstandard as zstd |
|
4 | 4 | |
|
5 |
from . |
|
|
5 | from .common import ( | |
|
6 | 6 | make_cffi, |
|
7 | TestCase, | |
|
7 | 8 | ) |
|
8 | 9 | |
|
9 | 10 | |
|
10 | 11 | @make_cffi |
|
11 |
class TestSizes( |
|
|
12 | class TestSizes(TestCase): | |
|
12 | 13 | def test_decompression_size(self): |
|
13 | 14 | size = zstd.estimate_decompression_context_size() |
|
14 | 15 | self.assertGreater(size, 100000) |
|
15 |
@@ -4,65 +4,66 import unittest | |||
|
4 | 4 | |
|
5 | 5 | import zstandard as zstd |
|
6 | 6 | |
|
7 |
from . |
|
|
7 | from .common import ( | |
|
8 | 8 | make_cffi, |
|
9 | TestCase, | |
|
9 | 10 | ) |
|
10 | 11 | |
|
11 | 12 | |
|
12 | 13 | @make_cffi |
|
13 |
class TestModuleAttributes( |
|
|
14 | class TestModuleAttributes(TestCase): | |
|
14 | 15 | def test_version(self): |
|
15 |
self.assertEqual(zstd.ZSTD_VERSION, (1, 4, |
|
|
16 | self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 4)) | |
|
16 | 17 | |
|
17 |
self.assertEqual(zstd.__version__, |
|
|
18 | self.assertEqual(zstd.__version__, "0.13.0") | |
|
18 | 19 | |
|
19 | 20 | def test_constants(self): |
|
20 | 21 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
|
21 |
self.assertEqual(zstd.FRAME_HEADER, b |
|
|
22 | self.assertEqual(zstd.FRAME_HEADER, b"\x28\xb5\x2f\xfd") | |
|
22 | 23 | |
|
23 | 24 | def test_hasattr(self): |
|
24 | 25 | attrs = ( |
|
25 |
|
|
|
26 |
|
|
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
26 | "CONTENTSIZE_UNKNOWN", | |
|
27 | "CONTENTSIZE_ERROR", | |
|
28 | "COMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
29 | "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
30 | "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
31 | "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
32 | "MAGIC_NUMBER", | |
|
33 | "FLUSH_BLOCK", | |
|
34 | "FLUSH_FRAME", | |
|
35 | "BLOCKSIZELOG_MAX", | |
|
36 | "BLOCKSIZE_MAX", | |
|
37 | "WINDOWLOG_MIN", | |
|
38 | "WINDOWLOG_MAX", | |
|
39 | "CHAINLOG_MIN", | |
|
40 | "CHAINLOG_MAX", | |
|
41 | "HASHLOG_MIN", | |
|
42 | "HASHLOG_MAX", | |
|
43 | "HASHLOG3_MAX", | |
|
44 | "MINMATCH_MIN", | |
|
45 | "MINMATCH_MAX", | |
|
46 | "SEARCHLOG_MIN", | |
|
47 | "SEARCHLOG_MAX", | |
|
48 | "SEARCHLENGTH_MIN", | |
|
49 | "SEARCHLENGTH_MAX", | |
|
50 | "TARGETLENGTH_MIN", | |
|
51 | "TARGETLENGTH_MAX", | |
|
52 | "LDM_MINMATCH_MIN", | |
|
53 | "LDM_MINMATCH_MAX", | |
|
54 | "LDM_BUCKETSIZELOG_MAX", | |
|
55 | "STRATEGY_FAST", | |
|
56 | "STRATEGY_DFAST", | |
|
57 | "STRATEGY_GREEDY", | |
|
58 | "STRATEGY_LAZY", | |
|
59 | "STRATEGY_LAZY2", | |
|
60 | "STRATEGY_BTLAZY2", | |
|
61 | "STRATEGY_BTOPT", | |
|
62 | "STRATEGY_BTULTRA", | |
|
63 | "STRATEGY_BTULTRA2", | |
|
64 | "DICT_TYPE_AUTO", | |
|
65 | "DICT_TYPE_RAWCONTENT", | |
|
66 | "DICT_TYPE_FULLDICT", | |
|
66 | 67 | ) |
|
67 | 68 | |
|
68 | 69 | for a in attrs: |
@@ -4,10 +4,11 import unittest | |||
|
4 | 4 | |
|
5 | 5 | import zstandard as zstd |
|
6 | 6 | |
|
7 |
from . |
|
|
7 | from .common import ( | |
|
8 | 8 | generate_samples, |
|
9 | 9 | make_cffi, |
|
10 | 10 | random_input_data, |
|
11 | TestCase, | |
|
11 | 12 | ) |
|
12 | 13 | |
|
13 | 14 | if sys.version_info[0] >= 3: |
@@ -17,24 +18,24 else: | |||
|
17 | 18 | |
|
18 | 19 | |
|
19 | 20 | @make_cffi |
|
20 |
class TestTrainDictionary( |
|
|
21 | class TestTrainDictionary(TestCase): | |
|
21 | 22 | def test_no_args(self): |
|
22 | 23 | with self.assertRaises(TypeError): |
|
23 | 24 | zstd.train_dictionary() |
|
24 | 25 | |
|
25 | 26 | def test_bad_args(self): |
|
26 | 27 | with self.assertRaises(TypeError): |
|
27 |
zstd.train_dictionary(8192, u |
|
|
28 | zstd.train_dictionary(8192, u"foo") | |
|
28 | 29 | |
|
29 | 30 | with self.assertRaises(ValueError): |
|
30 |
zstd.train_dictionary(8192, [u |
|
|
31 | zstd.train_dictionary(8192, [u"foo"]) | |
|
31 | 32 | |
|
32 | 33 | def test_no_params(self): |
|
33 | 34 | d = zstd.train_dictionary(8192, random_input_data()) |
|
34 | 35 | self.assertIsInstance(d.dict_id(), int_type) |
|
35 | 36 | |
|
36 | 37 | # The dictionary ID may be different across platforms. |
|
37 |
expected = b |
|
|
38 | expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id()) | |
|
38 | 39 | |
|
39 | 40 | data = d.as_bytes() |
|
40 | 41 | self.assertEqual(data[0:8], expected) |
@@ -44,46 +45,48 class TestTrainDictionary(unittest.TestC | |||
|
44 | 45 | self.assertIsInstance(d.dict_id(), int_type) |
|
45 | 46 | |
|
46 | 47 | data = d.as_bytes() |
|
47 |
self.assertEqual(data[0:4], b |
|
|
48 | self.assertEqual(data[0:4], b"\x37\xa4\x30\xec") | |
|
48 | 49 | |
|
49 | 50 | self.assertEqual(d.k, 64) |
|
50 | 51 | self.assertEqual(d.d, 16) |
|
51 | 52 | |
|
52 | 53 | def test_set_dict_id(self): |
|
53 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, | |
|
54 | dict_id=42) | |
|
54 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, dict_id=42) | |
|
55 | 55 | self.assertEqual(d.dict_id(), 42) |
|
56 | 56 | |
|
57 | 57 | def test_optimize(self): |
|
58 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, | |
|
59 | d=16) | |
|
58 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16) | |
|
60 | 59 | |
|
61 | 60 | # This varies by platform. |
|
62 | 61 | self.assertIn(d.k, (50, 2000)) |
|
63 | 62 | self.assertEqual(d.d, 16) |
|
64 | 63 | |
|
64 | ||
|
65 | 65 | @make_cffi |
|
66 |
class TestCompressionDict( |
|
|
66 | class TestCompressionDict(TestCase): | |
|
67 | 67 | def test_bad_mode(self): |
|
68 |
with self.assertRaisesRegex |
|
|
69 |
zstd.ZstdCompressionDict(b |
|
|
68 | with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"): | |
|
69 | zstd.ZstdCompressionDict(b"foo", dict_type=42) | |
|
70 | 70 | |
|
71 | 71 | def test_bad_precompute_compress(self): |
|
72 | 72 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) |
|
73 | 73 | |
|
74 |
with self.assertRaisesRegex |
|
|
74 | with self.assertRaisesRegex(ValueError, "must specify one of level or "): | |
|
75 | 75 | d.precompute_compress() |
|
76 | 76 | |
|
77 |
with self.assertRaisesRegex |
|
|
78 |
d.precompute_compress( |
|
|
79 |
|
|
|
77 | with self.assertRaisesRegex(ValueError, "must only specify one of level or "): | |
|
78 | d.precompute_compress( | |
|
79 | level=3, compression_params=zstd.CompressionParameters() | |
|
80 | ) | |
|
80 | 81 | |
|
81 | 82 | def test_precompute_compress_rawcontent(self): |
|
82 |
d = zstd.ZstdCompressionDict( |
|
|
83 |
|
|
|
83 | d = zstd.ZstdCompressionDict( | |
|
84 | b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT | |
|
85 | ) | |
|
84 | 86 | d.precompute_compress(level=1) |
|
85 | 87 | |
|
86 |
d = zstd.ZstdCompressionDict( |
|
|
87 |
|
|
|
88 | with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'): | |
|
88 | d = zstd.ZstdCompressionDict( | |
|
89 | b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT | |
|
90 | ) | |
|
91 | with self.assertRaisesRegex(zstd.ZstdError, "unable to precompute dictionary"): | |
|
89 | 92 | d.precompute_compress(level=1) |
@@ -28,38 +28,48 import platform | |||
|
28 | 28 | # defining a variable and `setup.py` could write the file with whatever |
|
29 | 29 | # policy was specified at build time. Until someone needs it, we go with |
|
30 | 30 | # the hacky but simple environment variable approach. |
|
31 |
_module_policy = os.environ.get( |
|
|
31 | _module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default") | |
|
32 | 32 | |
|
33 |
if _module_policy == |
|
|
34 |
if platform.python_implementation() in ( |
|
|
33 | if _module_policy == "default": | |
|
34 | if platform.python_implementation() in ("CPython",): | |
|
35 | 35 | from zstd import * |
|
36 | backend = 'cext' | |
|
37 | elif platform.python_implementation() in ('PyPy',): | |
|
36 | ||
|
37 | backend = "cext" | |
|
38 | elif platform.python_implementation() in ("PyPy",): | |
|
38 | 39 | from .cffi import * |
|
39 | backend = 'cffi' | |
|
40 | ||
|
41 | backend = "cffi" | |
|
40 | 42 | else: |
|
41 | 43 | try: |
|
42 | 44 | from zstd import * |
|
43 | backend = 'cext' | |
|
45 | ||
|
46 | backend = "cext" | |
|
44 | 47 | except ImportError: |
|
45 | 48 | from .cffi import * |
|
46 | backend = 'cffi' | |
|
47 | elif _module_policy == 'cffi_fallback': | |
|
49 | ||
|
50 | backend = "cffi" | |
|
51 | elif _module_policy == "cffi_fallback": | |
|
48 | 52 | try: |
|
49 | 53 | from zstd import * |
|
50 | backend = 'cext' | |
|
54 | ||
|
55 | backend = "cext" | |
|
51 | 56 | except ImportError: |
|
52 | 57 | from .cffi import * |
|
53 | backend = 'cffi' | |
|
54 | elif _module_policy == 'cext': | |
|
58 | ||
|
59 | backend = "cffi" | |
|
60 | elif _module_policy == "cext": | |
|
55 | 61 | from zstd import * |
|
56 | backend = 'cext' | |
|
57 | elif _module_policy == 'cffi': | |
|
62 | ||
|
63 | backend = "cext" | |
|
64 | elif _module_policy == "cffi": | |
|
58 | 65 | from .cffi import * |
|
59 | backend = 'cffi' | |
|
66 | ||
|
67 | backend = "cffi" | |
|
60 | 68 | else: |
|
61 | raise ImportError('unknown module import policy: %s; use default, cffi_fallback, ' | |
|
62 | 'cext, or cffi' % _module_policy) | |
|
69 | raise ImportError( | |
|
70 | "unknown module import policy: %s; use default, cffi_fallback, " | |
|
71 | "cext, or cffi" % _module_policy | |
|
72 | ) | |
|
63 | 73 | |
|
64 | 74 | # Keep this in sync with python-zstandard.h. |
|
65 |
__version__ = |
|
|
75 | __version__ = "0.13.0" |
This diff has been collapsed as it changes many lines, (1196 lines changed) Show them Hide them | |||
@@ -14,68 +14,67 from __future__ import absolute_import, | |||
|
14 | 14 | #'BufferSegments', |
|
15 | 15 | #'BufferWithSegments', |
|
16 | 16 | #'BufferWithSegmentsCollection', |
|
17 |
|
|
|
18 |
|
|
|
19 |
|
|
|
20 |
|
|
|
21 |
|
|
|
22 |
|
|
|
23 |
|
|
|
24 |
|
|
|
25 |
|
|
|
26 |
|
|
|
27 |
|
|
|
28 |
|
|
|
29 | ||
|
17 | "CompressionParameters", | |
|
18 | "ZstdCompressionDict", | |
|
19 | "ZstdCompressionParameters", | |
|
20 | "ZstdCompressor", | |
|
21 | "ZstdError", | |
|
22 | "ZstdDecompressor", | |
|
23 | "FrameParameters", | |
|
24 | "estimate_decompression_context_size", | |
|
25 | "frame_content_size", | |
|
26 | "frame_header_size", | |
|
27 | "get_frame_parameters", | |
|
28 | "train_dictionary", | |
|
30 | 29 | # Constants. |
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
|
73 |
|
|
|
74 |
|
|
|
75 |
|
|
|
76 |
|
|
|
77 |
|
|
|
78 |
|
|
|
30 | "FLUSH_BLOCK", | |
|
31 | "FLUSH_FRAME", | |
|
32 | "COMPRESSOBJ_FLUSH_FINISH", | |
|
33 | "COMPRESSOBJ_FLUSH_BLOCK", | |
|
34 | "ZSTD_VERSION", | |
|
35 | "FRAME_HEADER", | |
|
36 | "CONTENTSIZE_UNKNOWN", | |
|
37 | "CONTENTSIZE_ERROR", | |
|
38 | "MAX_COMPRESSION_LEVEL", | |
|
39 | "COMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
40 | "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
41 | "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
42 | "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
43 | "MAGIC_NUMBER", | |
|
44 | "BLOCKSIZELOG_MAX", | |
|
45 | "BLOCKSIZE_MAX", | |
|
46 | "WINDOWLOG_MIN", | |
|
47 | "WINDOWLOG_MAX", | |
|
48 | "CHAINLOG_MIN", | |
|
49 | "CHAINLOG_MAX", | |
|
50 | "HASHLOG_MIN", | |
|
51 | "HASHLOG_MAX", | |
|
52 | "HASHLOG3_MAX", | |
|
53 | "MINMATCH_MIN", | |
|
54 | "MINMATCH_MAX", | |
|
55 | "SEARCHLOG_MIN", | |
|
56 | "SEARCHLOG_MAX", | |
|
57 | "SEARCHLENGTH_MIN", | |
|
58 | "SEARCHLENGTH_MAX", | |
|
59 | "TARGETLENGTH_MIN", | |
|
60 | "TARGETLENGTH_MAX", | |
|
61 | "LDM_MINMATCH_MIN", | |
|
62 | "LDM_MINMATCH_MAX", | |
|
63 | "LDM_BUCKETSIZELOG_MAX", | |
|
64 | "STRATEGY_FAST", | |
|
65 | "STRATEGY_DFAST", | |
|
66 | "STRATEGY_GREEDY", | |
|
67 | "STRATEGY_LAZY", | |
|
68 | "STRATEGY_LAZY2", | |
|
69 | "STRATEGY_BTLAZY2", | |
|
70 | "STRATEGY_BTOPT", | |
|
71 | "STRATEGY_BTULTRA", | |
|
72 | "STRATEGY_BTULTRA2", | |
|
73 | "DICT_TYPE_AUTO", | |
|
74 | "DICT_TYPE_RAWCONTENT", | |
|
75 | "DICT_TYPE_FULLDICT", | |
|
76 | "FORMAT_ZSTD1", | |
|
77 | "FORMAT_ZSTD1_MAGICLESS", | |
|
79 | 78 | ] |
|
80 | 79 | |
|
81 | 80 | import io |
@@ -105,10 +104,14 new_nonzero = ffi.new_allocator(should_c | |||
|
105 | 104 | |
|
106 | 105 | MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel() |
|
107 | 106 | MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER |
|
108 |
FRAME_HEADER = b |
|
|
107 | FRAME_HEADER = b"\x28\xb5\x2f\xfd" | |
|
109 | 108 | CONTENTSIZE_UNKNOWN = lib.ZSTD_CONTENTSIZE_UNKNOWN |
|
110 | 109 | CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR |
|
111 | ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE) | |
|
110 | ZSTD_VERSION = ( | |
|
111 | lib.ZSTD_VERSION_MAJOR, | |
|
112 | lib.ZSTD_VERSION_MINOR, | |
|
113 | lib.ZSTD_VERSION_RELEASE, | |
|
114 | ) | |
|
112 | 115 | |
|
113 | 116 | BLOCKSIZELOG_MAX = lib.ZSTD_BLOCKSIZELOG_MAX |
|
114 | 117 | BLOCKSIZE_MAX = lib.ZSTD_BLOCKSIZE_MAX |
@@ -165,9 +168,9 def _cpu_count(): | |||
|
165 | 168 | # Linux. |
|
166 | 169 | try: |
|
167 | 170 | if sys.version_info[0] == 2: |
|
168 |
return os.sysconf(b |
|
|
171 | return os.sysconf(b"SC_NPROCESSORS_ONLN") | |
|
169 | 172 | else: |
|
170 |
return os.sysconf( |
|
|
173 | return os.sysconf("SC_NPROCESSORS_ONLN") | |
|
171 | 174 | except (AttributeError, ValueError): |
|
172 | 175 | pass |
|
173 | 176 | |
@@ -183,7 +186,8 def _zstd_error(zresult): | |||
|
183 | 186 | # Resolves to bytes on Python 2 and 3. We use the string for formatting |
|
184 | 187 | # into error messages, which will be literal unicode. So convert it to |
|
185 | 188 | # unicode. |
|
186 |
return ffi.string(lib.ZSTD_getErrorName(zresult)).decode( |
|
|
189 | return ffi.string(lib.ZSTD_getErrorName(zresult)).decode("utf-8") | |
|
190 | ||
|
187 | 191 | |
|
188 | 192 | def _make_cctx_params(params): |
|
189 | 193 | res = lib.ZSTD_createCCtxParams() |
@@ -221,19 +225,20 def _make_cctx_params(params): | |||
|
221 | 225 | |
|
222 | 226 | return res |
|
223 | 227 | |
|
228 | ||
|
224 | 229 | class ZstdCompressionParameters(object): |
|
225 | 230 | @staticmethod |
|
226 | 231 | def from_level(level, source_size=0, dict_size=0, **kwargs): |
|
227 | 232 | params = lib.ZSTD_getCParams(level, source_size, dict_size) |
|
228 | 233 | |
|
229 | 234 | args = { |
|
230 |
|
|
|
231 |
|
|
|
232 |
|
|
|
233 |
|
|
|
234 |
|
|
|
235 |
|
|
|
236 |
|
|
|
235 | "window_log": "windowLog", | |
|
236 | "chain_log": "chainLog", | |
|
237 | "hash_log": "hashLog", | |
|
238 | "search_log": "searchLog", | |
|
239 | "min_match": "minMatch", | |
|
240 | "target_length": "targetLength", | |
|
241 | "compression_strategy": "strategy", | |
|
237 | 242 | } |
|
238 | 243 | |
|
239 | 244 | for arg, attr in args.items(): |
@@ -242,14 +247,33 class ZstdCompressionParameters(object): | |||
|
242 | 247 | |
|
243 | 248 | return ZstdCompressionParameters(**kwargs) |
|
244 | 249 | |
|
245 | def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0, | |
|
246 | chain_log=0, search_log=0, min_match=0, target_length=0, | |
|
247 | strategy=-1, compression_strategy=-1, | |
|
248 | write_content_size=1, write_checksum=0, | |
|
249 | write_dict_id=0, job_size=0, overlap_log=-1, | |
|
250 | overlap_size_log=-1, force_max_window=0, enable_ldm=0, | |
|
251 | ldm_hash_log=0, ldm_min_match=0, ldm_bucket_size_log=0, | |
|
252 | ldm_hash_rate_log=-1, ldm_hash_every_log=-1, threads=0): | |
|
250 | def __init__( | |
|
251 | self, | |
|
252 | format=0, | |
|
253 | compression_level=0, | |
|
254 | window_log=0, | |
|
255 | hash_log=0, | |
|
256 | chain_log=0, | |
|
257 | search_log=0, | |
|
258 | min_match=0, | |
|
259 | target_length=0, | |
|
260 | strategy=-1, | |
|
261 | compression_strategy=-1, | |
|
262 | write_content_size=1, | |
|
263 | write_checksum=0, | |
|
264 | write_dict_id=0, | |
|
265 | job_size=0, | |
|
266 | overlap_log=-1, | |
|
267 | overlap_size_log=-1, | |
|
268 | force_max_window=0, | |
|
269 | enable_ldm=0, | |
|
270 | ldm_hash_log=0, | |
|
271 | ldm_min_match=0, | |
|
272 | ldm_bucket_size_log=0, | |
|
273 | ldm_hash_rate_log=-1, | |
|
274 | ldm_hash_every_log=-1, | |
|
275 | threads=0, | |
|
276 | ): | |
|
253 | 277 | |
|
254 | 278 | params = lib.ZSTD_createCCtxParams() |
|
255 | 279 | if params == ffi.NULL: |
@@ -267,7 +291,9 class ZstdCompressionParameters(object): | |||
|
267 | 291 | _set_compression_parameter(params, lib.ZSTD_c_nbWorkers, threads) |
|
268 | 292 | |
|
269 | 293 | _set_compression_parameter(params, lib.ZSTD_c_format, format) |
|
270 | _set_compression_parameter(params, lib.ZSTD_c_compressionLevel, compression_level) | |
|
294 | _set_compression_parameter( | |
|
295 | params, lib.ZSTD_c_compressionLevel, compression_level | |
|
296 | ) | |
|
271 | 297 | _set_compression_parameter(params, lib.ZSTD_c_windowLog, window_log) |
|
272 | 298 | _set_compression_parameter(params, lib.ZSTD_c_hashLog, hash_log) |
|
273 | 299 | _set_compression_parameter(params, lib.ZSTD_c_chainLog, chain_log) |
@@ -276,7 +302,7 class ZstdCompressionParameters(object): | |||
|
276 | 302 | _set_compression_parameter(params, lib.ZSTD_c_targetLength, target_length) |
|
277 | 303 | |
|
278 | 304 | if strategy != -1 and compression_strategy != -1: |
|
279 |
raise ValueError( |
|
|
305 | raise ValueError("cannot specify both compression_strategy and strategy") | |
|
280 | 306 | |
|
281 | 307 | if compression_strategy != -1: |
|
282 | 308 | strategy = compression_strategy |
@@ -284,13 +310,15 class ZstdCompressionParameters(object): | |||
|
284 | 310 | strategy = 0 |
|
285 | 311 | |
|
286 | 312 | _set_compression_parameter(params, lib.ZSTD_c_strategy, strategy) |
|
287 | _set_compression_parameter(params, lib.ZSTD_c_contentSizeFlag, write_content_size) | |
|
313 | _set_compression_parameter( | |
|
314 | params, lib.ZSTD_c_contentSizeFlag, write_content_size | |
|
315 | ) | |
|
288 | 316 | _set_compression_parameter(params, lib.ZSTD_c_checksumFlag, write_checksum) |
|
289 | 317 | _set_compression_parameter(params, lib.ZSTD_c_dictIDFlag, write_dict_id) |
|
290 | 318 | _set_compression_parameter(params, lib.ZSTD_c_jobSize, job_size) |
|
291 | 319 | |
|
292 | 320 | if overlap_log != -1 and overlap_size_log != -1: |
|
293 |
raise ValueError( |
|
|
321 | raise ValueError("cannot specify both overlap_log and overlap_size_log") | |
|
294 | 322 | |
|
295 | 323 | if overlap_size_log != -1: |
|
296 | 324 | overlap_log = overlap_size_log |
@@ -299,13 +327,19 class ZstdCompressionParameters(object): | |||
|
299 | 327 | |
|
300 | 328 | _set_compression_parameter(params, lib.ZSTD_c_overlapLog, overlap_log) |
|
301 | 329 | _set_compression_parameter(params, lib.ZSTD_c_forceMaxWindow, force_max_window) |
|
302 | _set_compression_parameter(params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm) | |
|
330 | _set_compression_parameter( | |
|
331 | params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm | |
|
332 | ) | |
|
303 | 333 | _set_compression_parameter(params, lib.ZSTD_c_ldmHashLog, ldm_hash_log) |
|
304 | 334 | _set_compression_parameter(params, lib.ZSTD_c_ldmMinMatch, ldm_min_match) |
|
305 | _set_compression_parameter(params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log) | |
|
335 | _set_compression_parameter( | |
|
336 | params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log | |
|
337 | ) | |
|
306 | 338 | |
|
307 | 339 | if ldm_hash_rate_log != -1 and ldm_hash_every_log != -1: |
|
308 | raise ValueError('cannot specify both ldm_hash_rate_log and ldm_hash_every_log') | |
|
340 | raise ValueError( | |
|
341 | "cannot specify both ldm_hash_rate_log and ldm_hash_every_log" | |
|
342 | ) | |
|
309 | 343 | |
|
310 | 344 | if ldm_hash_every_log != -1: |
|
311 | 345 | ldm_hash_rate_log = ldm_hash_every_log |
@@ -380,7 +414,9 class ZstdCompressionParameters(object): | |||
|
380 | 414 | |
|
381 | 415 | @property |
|
382 | 416 | def enable_ldm(self): |
|
383 |
return _get_compression_parameter( |
|
|
417 | return _get_compression_parameter( | |
|
418 | self._params, lib.ZSTD_c_enableLongDistanceMatching | |
|
419 | ) | |
|
384 | 420 | |
|
385 | 421 | @property |
|
386 | 422 | def ldm_hash_log(self): |
@@ -409,8 +445,10 class ZstdCompressionParameters(object): | |||
|
409 | 445 | def estimated_compression_context_size(self): |
|
410 | 446 | return lib.ZSTD_estimateCCtxSize_usingCCtxParams(self._params) |
|
411 | 447 | |
|
448 | ||
|
412 | 449 | CompressionParameters = ZstdCompressionParameters |
|
413 | 450 | |
|
451 | ||
|
414 | 452 | def estimate_decompression_context_size(): |
|
415 | 453 | return lib.ZSTD_estimateDCtxSize() |
|
416 | 454 | |
@@ -418,24 +456,25 def estimate_decompression_context_size( | |||
|
418 | 456 | def _set_compression_parameter(params, param, value): |
|
419 | 457 | zresult = lib.ZSTD_CCtxParams_setParameter(params, param, value) |
|
420 | 458 | if lib.ZSTD_isError(zresult): |
|
421 | raise ZstdError('unable to set compression context parameter: %s' % | |
|
422 |
|
|
|
459 | raise ZstdError( | |
|
460 | "unable to set compression context parameter: %s" % _zstd_error(zresult) | |
|
461 | ) | |
|
423 | 462 | |
|
424 | 463 | |
|
425 | 464 | def _get_compression_parameter(params, param): |
|
426 |
result = ffi.new( |
|
|
465 | result = ffi.new("int *") | |
|
427 | 466 | |
|
428 | 467 | zresult = lib.ZSTD_CCtxParams_getParameter(params, param, result) |
|
429 | 468 | if lib.ZSTD_isError(zresult): |
|
430 | raise ZstdError('unable to get compression context parameter: %s' % | |
|
431 |
|
|
|
469 | raise ZstdError( | |
|
470 | "unable to get compression context parameter: %s" % _zstd_error(zresult) | |
|
471 | ) | |
|
432 | 472 | |
|
433 | 473 | return result[0] |
|
434 | 474 | |
|
435 | 475 | |
|
436 | 476 | class ZstdCompressionWriter(object): |
|
437 | def __init__(self, compressor, writer, source_size, write_size, | |
|
438 | write_return_read): | |
|
477 | def __init__(self, compressor, writer, source_size, write_size, write_return_read): | |
|
439 | 478 | self._compressor = compressor |
|
440 | 479 | self._writer = writer |
|
441 | 480 | self._write_size = write_size |
@@ -444,24 +483,22 class ZstdCompressionWriter(object): | |||
|
444 | 483 | self._closed = False |
|
445 | 484 | self._bytes_compressed = 0 |
|
446 | 485 | |
|
447 |
self._dst_buffer = ffi.new( |
|
|
448 |
self._out_buffer = ffi.new( |
|
|
486 | self._dst_buffer = ffi.new("char[]", write_size) | |
|
487 | self._out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
449 | 488 | self._out_buffer.dst = self._dst_buffer |
|
450 | 489 | self._out_buffer.size = len(self._dst_buffer) |
|
451 | 490 | self._out_buffer.pos = 0 |
|
452 | 491 | |
|
453 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx, | |
|
454 | source_size) | |
|
492 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx, source_size) | |
|
455 | 493 | if lib.ZSTD_isError(zresult): |
|
456 |
raise ZstdError( |
|
|
457 | _zstd_error(zresult)) | |
|
494 | raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) | |
|
458 | 495 | |
|
459 | 496 | def __enter__(self): |
|
460 | 497 | if self._closed: |
|
461 |
raise ValueError( |
|
|
498 | raise ValueError("stream is closed") | |
|
462 | 499 | |
|
463 | 500 | if self._entered: |
|
464 |
raise ZstdError( |
|
|
501 | raise ZstdError("cannot __enter__ multiple times") | |
|
465 | 502 | |
|
466 | 503 | self._entered = True |
|
467 | 504 | return self |
@@ -480,11 +517,11 class ZstdCompressionWriter(object): | |||
|
480 | 517 | return lib.ZSTD_sizeof_CCtx(self._compressor._cctx) |
|
481 | 518 | |
|
482 | 519 | def fileno(self): |
|
483 |
f = getattr(self._writer, |
|
|
520 | f = getattr(self._writer, "fileno", None) | |
|
484 | 521 | if f: |
|
485 | 522 | return f() |
|
486 | 523 | else: |
|
487 |
raise OSError( |
|
|
524 | raise OSError("fileno not available on underlying writer") | |
|
488 | 525 | |
|
489 | 526 | def close(self): |
|
490 | 527 | if self._closed: |
@@ -496,7 +533,7 class ZstdCompressionWriter(object): | |||
|
496 | 533 | self._closed = True |
|
497 | 534 | |
|
498 | 535 | # Call close() on underlying stream as well. |
|
499 |
f = getattr(self._writer, |
|
|
536 | f = getattr(self._writer, "close", None) | |
|
500 | 537 | if f: |
|
501 | 538 | f() |
|
502 | 539 | |
@@ -529,7 +566,7 class ZstdCompressionWriter(object): | |||
|
529 | 566 | return True |
|
530 | 567 | |
|
531 | 568 | def writelines(self, lines): |
|
532 |
raise NotImplementedError( |
|
|
569 | raise NotImplementedError("writelines() is not yet implemented") | |
|
533 | 570 | |
|
534 | 571 | def read(self, size=-1): |
|
535 | 572 | raise io.UnsupportedOperation() |
@@ -542,13 +579,13 class ZstdCompressionWriter(object): | |||
|
542 | 579 | |
|
543 | 580 | def write(self, data): |
|
544 | 581 | if self._closed: |
|
545 |
raise ValueError( |
|
|
582 | raise ValueError("stream is closed") | |
|
546 | 583 | |
|
547 | 584 | total_write = 0 |
|
548 | 585 | |
|
549 | 586 | data_buffer = ffi.from_buffer(data) |
|
550 | 587 | |
|
551 |
in_buffer = ffi.new( |
|
|
588 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
552 | 589 | in_buffer.src = data_buffer |
|
553 | 590 | in_buffer.size = len(data_buffer) |
|
554 | 591 | in_buffer.pos = 0 |
@@ -557,12 +594,11 class ZstdCompressionWriter(object): | |||
|
557 | 594 | out_buffer.pos = 0 |
|
558 | 595 | |
|
559 | 596 | while in_buffer.pos < in_buffer.size: |
|
560 |
zresult = lib.ZSTD_compressStream2( |
|
|
561 | out_buffer, in_buffer, | |
|
562 | lib.ZSTD_e_continue) | |
|
597 | zresult = lib.ZSTD_compressStream2( | |
|
598 | self._compressor._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue | |
|
599 | ) | |
|
563 | 600 | if lib.ZSTD_isError(zresult): |
|
564 |
raise ZstdError( |
|
|
565 | _zstd_error(zresult)) | |
|
601 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
566 | 602 | |
|
567 | 603 | if out_buffer.pos: |
|
568 | 604 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
@@ -581,28 +617,27 class ZstdCompressionWriter(object): | |||
|
581 | 617 | elif flush_mode == FLUSH_FRAME: |
|
582 | 618 | flush = lib.ZSTD_e_end |
|
583 | 619 | else: |
|
584 |
raise ValueError( |
|
|
620 | raise ValueError("unknown flush_mode: %r" % flush_mode) | |
|
585 | 621 | |
|
586 | 622 | if self._closed: |
|
587 |
raise ValueError( |
|
|
623 | raise ValueError("stream is closed") | |
|
588 | 624 | |
|
589 | 625 | total_write = 0 |
|
590 | 626 | |
|
591 | 627 | out_buffer = self._out_buffer |
|
592 | 628 | out_buffer.pos = 0 |
|
593 | 629 | |
|
594 |
in_buffer = ffi.new( |
|
|
630 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
595 | 631 | in_buffer.src = ffi.NULL |
|
596 | 632 | in_buffer.size = 0 |
|
597 | 633 | in_buffer.pos = 0 |
|
598 | 634 | |
|
599 | 635 | while True: |
|
600 |
zresult = lib.ZSTD_compressStream2( |
|
|
601 | out_buffer, in_buffer, | |
|
602 | flush) | |
|
636 | zresult = lib.ZSTD_compressStream2( | |
|
637 | self._compressor._cctx, out_buffer, in_buffer, flush | |
|
638 | ) | |
|
603 | 639 | if lib.ZSTD_isError(zresult): |
|
604 |
raise ZstdError( |
|
|
605 | _zstd_error(zresult)) | |
|
640 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
606 | 641 | |
|
607 | 642 | if out_buffer.pos: |
|
608 | 643 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
@@ -622,10 +657,10 class ZstdCompressionWriter(object): | |||
|
622 | 657 | class ZstdCompressionObj(object): |
|
623 | 658 | def compress(self, data): |
|
624 | 659 | if self._finished: |
|
625 |
raise ZstdError( |
|
|
660 | raise ZstdError("cannot call compress() after compressor finished") | |
|
626 | 661 | |
|
627 | 662 | data_buffer = ffi.from_buffer(data) |
|
628 |
source = ffi.new( |
|
|
663 | source = ffi.new("ZSTD_inBuffer *") | |
|
629 | 664 | source.src = data_buffer |
|
630 | 665 | source.size = len(data_buffer) |
|
631 | 666 | source.pos = 0 |
@@ -633,26 +668,24 class ZstdCompressionObj(object): | |||
|
633 | 668 | chunks = [] |
|
634 | 669 | |
|
635 | 670 | while source.pos < len(data): |
|
636 |
zresult = lib.ZSTD_compressStream2( |
|
|
637 | self._out, | |
|
638 | source, | |
|
639 | lib.ZSTD_e_continue) | |
|
671 | zresult = lib.ZSTD_compressStream2( | |
|
672 | self._compressor._cctx, self._out, source, lib.ZSTD_e_continue | |
|
673 | ) | |
|
640 | 674 | if lib.ZSTD_isError(zresult): |
|
641 |
raise ZstdError( |
|
|
642 | _zstd_error(zresult)) | |
|
675 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
643 | 676 | |
|
644 | 677 | if self._out.pos: |
|
645 | 678 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) |
|
646 | 679 | self._out.pos = 0 |
|
647 | 680 | |
|
648 |
return b |
|
|
681 | return b"".join(chunks) | |
|
649 | 682 | |
|
650 | 683 | def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH): |
|
651 | 684 | if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK): |
|
652 |
raise ValueError( |
|
|
685 | raise ValueError("flush mode not recognized") | |
|
653 | 686 | |
|
654 | 687 | if self._finished: |
|
655 |
raise ZstdError( |
|
|
688 | raise ZstdError("compressor object already finished") | |
|
656 | 689 | |
|
657 | 690 | if flush_mode == COMPRESSOBJ_FLUSH_BLOCK: |
|
658 | 691 | z_flush_mode = lib.ZSTD_e_flush |
@@ -660,11 +693,11 class ZstdCompressionObj(object): | |||
|
660 | 693 | z_flush_mode = lib.ZSTD_e_end |
|
661 | 694 | self._finished = True |
|
662 | 695 | else: |
|
663 |
raise ZstdError( |
|
|
696 | raise ZstdError("unhandled flush mode") | |
|
664 | 697 | |
|
665 | 698 | assert self._out.pos == 0 |
|
666 | 699 | |
|
667 |
in_buffer = ffi.new( |
|
|
700 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
668 | 701 | in_buffer.src = ffi.NULL |
|
669 | 702 | in_buffer.size = 0 |
|
670 | 703 | in_buffer.pos = 0 |
@@ -672,13 +705,13 class ZstdCompressionObj(object): | |||
|
672 | 705 | chunks = [] |
|
673 | 706 | |
|
674 | 707 | while True: |
|
675 |
zresult = lib.ZSTD_compressStream2( |
|
|
676 | self._out, | |
|
677 | in_buffer, | |
|
678 | z_flush_mode) | |
|
708 | zresult = lib.ZSTD_compressStream2( | |
|
709 | self._compressor._cctx, self._out, in_buffer, z_flush_mode | |
|
710 | ) | |
|
679 | 711 | if lib.ZSTD_isError(zresult): |
|
680 |
raise ZstdError( |
|
|
681 |
|
|
|
712 | raise ZstdError( | |
|
713 | "error ending compression stream: %s" % _zstd_error(zresult) | |
|
714 | ) | |
|
682 | 715 | |
|
683 | 716 | if self._out.pos: |
|
684 | 717 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) |
@@ -687,19 +720,19 class ZstdCompressionObj(object): | |||
|
687 | 720 | if not zresult: |
|
688 | 721 | break |
|
689 | 722 | |
|
690 |
return b |
|
|
723 | return b"".join(chunks) | |
|
691 | 724 | |
|
692 | 725 | |
|
693 | 726 | class ZstdCompressionChunker(object): |
|
694 | 727 | def __init__(self, compressor, chunk_size): |
|
695 | 728 | self._compressor = compressor |
|
696 |
self._out = ffi.new( |
|
|
697 |
self._dst_buffer = ffi.new( |
|
|
729 | self._out = ffi.new("ZSTD_outBuffer *") | |
|
730 | self._dst_buffer = ffi.new("char[]", chunk_size) | |
|
698 | 731 | self._out.dst = self._dst_buffer |
|
699 | 732 | self._out.size = chunk_size |
|
700 | 733 | self._out.pos = 0 |
|
701 | 734 | |
|
702 |
self._in = ffi.new( |
|
|
735 | self._in = ffi.new("ZSTD_inBuffer *") | |
|
703 | 736 | self._in.src = ffi.NULL |
|
704 | 737 | self._in.size = 0 |
|
705 | 738 | self._in.pos = 0 |
@@ -707,11 +740,13 class ZstdCompressionChunker(object): | |||
|
707 | 740 | |
|
708 | 741 | def compress(self, data): |
|
709 | 742 | if self._finished: |
|
710 |
raise ZstdError( |
|
|
743 | raise ZstdError("cannot call compress() after compression finished") | |
|
711 | 744 | |
|
712 | 745 | if self._in.src != ffi.NULL: |
|
713 | raise ZstdError('cannot perform operation before consuming output ' | |
|
714 | 'from previous operation') | |
|
746 | raise ZstdError( | |
|
747 | "cannot perform operation before consuming output " | |
|
748 | "from previous operation" | |
|
749 | ) | |
|
715 | 750 | |
|
716 | 751 | data_buffer = ffi.from_buffer(data) |
|
717 | 752 | |
@@ -723,10 +758,9 class ZstdCompressionChunker(object): | |||
|
723 | 758 | self._in.pos = 0 |
|
724 | 759 | |
|
725 | 760 | while self._in.pos < self._in.size: |
|
726 |
zresult = lib.ZSTD_compressStream2( |
|
|
727 | self._out, | |
|
728 | self._in, | |
|
729 | lib.ZSTD_e_continue) | |
|
761 | zresult = lib.ZSTD_compressStream2( | |
|
762 | self._compressor._cctx, self._out, self._in, lib.ZSTD_e_continue | |
|
763 | ) | |
|
730 | 764 | |
|
731 | 765 | if self._in.pos == self._in.size: |
|
732 | 766 | self._in.src = ffi.NULL |
@@ -734,8 +768,7 class ZstdCompressionChunker(object): | |||
|
734 | 768 | self._in.pos = 0 |
|
735 | 769 | |
|
736 | 770 | if lib.ZSTD_isError(zresult): |
|
737 |
raise ZstdError( |
|
|
738 | _zstd_error(zresult)) | |
|
771 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
739 | 772 | |
|
740 | 773 | if self._out.pos == self._out.size: |
|
741 | 774 | yield ffi.buffer(self._out.dst, self._out.pos)[:] |
@@ -743,18 +776,19 class ZstdCompressionChunker(object): | |||
|
743 | 776 | |
|
744 | 777 | def flush(self): |
|
745 | 778 | if self._finished: |
|
746 |
raise ZstdError( |
|
|
779 | raise ZstdError("cannot call flush() after compression finished") | |
|
747 | 780 | |
|
748 | 781 | if self._in.src != ffi.NULL: |
|
749 | raise ZstdError('cannot call flush() before consuming output from ' | |
|
750 |
|
|
|
782 | raise ZstdError( | |
|
783 | "cannot call flush() before consuming output from " "previous operation" | |
|
784 | ) | |
|
751 | 785 | |
|
752 | 786 | while True: |
|
753 |
zresult = lib.ZSTD_compressStream2( |
|
|
754 | self._out, self._in, | |
|
755 | lib.ZSTD_e_flush) | |
|
787 | zresult = lib.ZSTD_compressStream2( | |
|
788 | self._compressor._cctx, self._out, self._in, lib.ZSTD_e_flush | |
|
789 | ) | |
|
756 | 790 | if lib.ZSTD_isError(zresult): |
|
757 |
raise ZstdError( |
|
|
791 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
758 | 792 | |
|
759 | 793 | if self._out.pos: |
|
760 | 794 | yield ffi.buffer(self._out.dst, self._out.pos)[:] |
@@ -765,18 +799,20 class ZstdCompressionChunker(object): | |||
|
765 | 799 | |
|
766 | 800 | def finish(self): |
|
767 | 801 | if self._finished: |
|
768 |
raise ZstdError( |
|
|
802 | raise ZstdError("cannot call finish() after compression finished") | |
|
769 | 803 | |
|
770 | 804 | if self._in.src != ffi.NULL: |
|
771 | raise ZstdError('cannot call finish() before consuming output from ' | |
|
772 | 'previous operation') | |
|
805 | raise ZstdError( | |
|
806 | "cannot call finish() before consuming output from " | |
|
807 | "previous operation" | |
|
808 | ) | |
|
773 | 809 | |
|
774 | 810 | while True: |
|
775 |
zresult = lib.ZSTD_compressStream2( |
|
|
776 | self._out, self._in, | |
|
777 | lib.ZSTD_e_end) | |
|
811 | zresult = lib.ZSTD_compressStream2( | |
|
812 | self._compressor._cctx, self._out, self._in, lib.ZSTD_e_end | |
|
813 | ) | |
|
778 | 814 | if lib.ZSTD_isError(zresult): |
|
779 |
raise ZstdError( |
|
|
815 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
780 | 816 | |
|
781 | 817 | if self._out.pos: |
|
782 | 818 | yield ffi.buffer(self._out.dst, self._out.pos)[:] |
@@ -798,13 +834,13 class ZstdCompressionReader(object): | |||
|
798 | 834 | self._finished_input = False |
|
799 | 835 | self._finished_output = False |
|
800 | 836 | |
|
801 |
self._in_buffer = ffi.new( |
|
|
837 | self._in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
802 | 838 | # Holds a ref so backing bytes in self._in_buffer stay alive. |
|
803 | 839 | self._source_buffer = None |
|
804 | 840 | |
|
805 | 841 | def __enter__(self): |
|
806 | 842 | if self._entered: |
|
807 |
raise ValueError( |
|
|
843 | raise ValueError("cannot __enter__ multiple times") | |
|
808 | 844 | |
|
809 | 845 | self._entered = True |
|
810 | 846 | return self |
@@ -833,10 +869,10 class ZstdCompressionReader(object): | |||
|
833 | 869 | raise io.UnsupportedOperation() |
|
834 | 870 | |
|
835 | 871 | def write(self, data): |
|
836 |
raise OSError( |
|
|
872 | raise OSError("stream is not writable") | |
|
837 | 873 | |
|
838 | 874 | def writelines(self, ignored): |
|
839 |
raise OSError( |
|
|
875 | raise OSError("stream is not writable") | |
|
840 | 876 | |
|
841 | 877 | def isatty(self): |
|
842 | 878 | return False |
@@ -865,7 +901,7 class ZstdCompressionReader(object): | |||
|
865 | 901 | |
|
866 | 902 | chunks.append(chunk) |
|
867 | 903 | |
|
868 |
return b |
|
|
904 | return b"".join(chunks) | |
|
869 | 905 | |
|
870 | 906 | def __iter__(self): |
|
871 | 907 | raise io.UnsupportedOperation() |
@@ -879,7 +915,7 class ZstdCompressionReader(object): | |||
|
879 | 915 | if self._finished_input: |
|
880 | 916 | return |
|
881 | 917 | |
|
882 |
if hasattr(self._source, |
|
|
918 | if hasattr(self._source, "read"): | |
|
883 | 919 | data = self._source.read(self._read_size) |
|
884 | 920 | |
|
885 | 921 | if not data: |
@@ -902,9 +938,9 class ZstdCompressionReader(object): | |||
|
902 | 938 | |
|
903 | 939 | old_pos = out_buffer.pos |
|
904 | 940 | |
|
905 |
zresult = lib.ZSTD_compressStream2( |
|
|
906 | out_buffer, self._in_buffer, | |
|
907 | lib.ZSTD_e_continue) | |
|
941 | zresult = lib.ZSTD_compressStream2( | |
|
942 | self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_continue | |
|
943 | ) | |
|
908 | 944 | |
|
909 | 945 | self._bytes_compressed += out_buffer.pos - old_pos |
|
910 | 946 | |
@@ -914,31 +950,30 class ZstdCompressionReader(object): | |||
|
914 | 950 | self._in_buffer.size = 0 |
|
915 | 951 | self._source_buffer = None |
|
916 | 952 | |
|
917 |
if not hasattr(self._source, |
|
|
953 | if not hasattr(self._source, "read"): | |
|
918 | 954 | self._finished_input = True |
|
919 | 955 | |
|
920 | 956 | if lib.ZSTD_isError(zresult): |
|
921 |
raise ZstdError( |
|
|
922 | _zstd_error(zresult)) | |
|
957 | raise ZstdError("zstd compress error: %s", _zstd_error(zresult)) | |
|
923 | 958 | |
|
924 | 959 | return out_buffer.pos and out_buffer.pos == out_buffer.size |
|
925 | 960 | |
|
926 | 961 | def read(self, size=-1): |
|
927 | 962 | if self._closed: |
|
928 |
raise ValueError( |
|
|
963 | raise ValueError("stream is closed") | |
|
929 | 964 | |
|
930 | 965 | if size < -1: |
|
931 |
raise ValueError( |
|
|
966 | raise ValueError("cannot read negative amounts less than -1") | |
|
932 | 967 | |
|
933 | 968 | if size == -1: |
|
934 | 969 | return self.readall() |
|
935 | 970 | |
|
936 | 971 | if self._finished_output or size == 0: |
|
937 |
return b |
|
|
972 | return b"" | |
|
938 | 973 | |
|
939 | 974 | # Need a dedicated ref to dest buffer otherwise it gets collected. |
|
940 |
dst_buffer = ffi.new( |
|
|
941 |
out_buffer = ffi.new( |
|
|
975 | dst_buffer = ffi.new("char[]", size) | |
|
976 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
942 | 977 | out_buffer.dst = dst_buffer |
|
943 | 978 | out_buffer.size = size |
|
944 | 979 | out_buffer.pos = 0 |
@@ -955,15 +990,14 class ZstdCompressionReader(object): | |||
|
955 | 990 | # EOF |
|
956 | 991 | old_pos = out_buffer.pos |
|
957 | 992 | |
|
958 |
zresult = lib.ZSTD_compressStream2( |
|
|
959 | out_buffer, self._in_buffer, | |
|
960 | lib.ZSTD_e_end) | |
|
993 | zresult = lib.ZSTD_compressStream2( | |
|
994 | self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end | |
|
995 | ) | |
|
961 | 996 | |
|
962 | 997 | self._bytes_compressed += out_buffer.pos - old_pos |
|
963 | 998 | |
|
964 | 999 | if lib.ZSTD_isError(zresult): |
|
965 |
raise ZstdError( |
|
|
966 | _zstd_error(zresult)) | |
|
1000 | raise ZstdError("error ending compression stream: %s", _zstd_error(zresult)) | |
|
967 | 1001 | |
|
968 | 1002 | if zresult == 0: |
|
969 | 1003 | self._finished_output = True |
@@ -972,20 +1006,20 class ZstdCompressionReader(object): | |||
|
972 | 1006 | |
|
973 | 1007 | def read1(self, size=-1): |
|
974 | 1008 | if self._closed: |
|
975 |
raise ValueError( |
|
|
1009 | raise ValueError("stream is closed") | |
|
976 | 1010 | |
|
977 | 1011 | if size < -1: |
|
978 |
raise ValueError( |
|
|
1012 | raise ValueError("cannot read negative amounts less than -1") | |
|
979 | 1013 | |
|
980 | 1014 | if self._finished_output or size == 0: |
|
981 |
return b |
|
|
1015 | return b"" | |
|
982 | 1016 | |
|
983 | 1017 | # -1 returns arbitrary number of bytes. |
|
984 | 1018 | if size == -1: |
|
985 | 1019 | size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
986 | 1020 | |
|
987 |
dst_buffer = ffi.new( |
|
|
988 |
out_buffer = ffi.new( |
|
|
1021 | dst_buffer = ffi.new("char[]", size) | |
|
1022 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
989 | 1023 | out_buffer.dst = dst_buffer |
|
990 | 1024 | out_buffer.size = size |
|
991 | 1025 | out_buffer.pos = 0 |
@@ -1020,15 +1054,16 class ZstdCompressionReader(object): | |||
|
1020 | 1054 | # EOF. |
|
1021 | 1055 | old_pos = out_buffer.pos |
|
1022 | 1056 | |
|
1023 |
zresult = lib.ZSTD_compressStream2( |
|
|
1024 | out_buffer, self._in_buffer, | |
|
1025 | lib.ZSTD_e_end) | |
|
1057 | zresult = lib.ZSTD_compressStream2( | |
|
1058 | self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end | |
|
1059 | ) | |
|
1026 | 1060 | |
|
1027 | 1061 | self._bytes_compressed += out_buffer.pos - old_pos |
|
1028 | 1062 | |
|
1029 | 1063 | if lib.ZSTD_isError(zresult): |
|
1030 | raise ZstdError('error ending compression stream: %s' % | |
|
1031 |
|
|
|
1064 | raise ZstdError( | |
|
1065 | "error ending compression stream: %s" % _zstd_error(zresult) | |
|
1066 | ) | |
|
1032 | 1067 | |
|
1033 | 1068 | if zresult == 0: |
|
1034 | 1069 | self._finished_output = True |
@@ -1037,15 +1072,15 class ZstdCompressionReader(object): | |||
|
1037 | 1072 | |
|
1038 | 1073 | def readinto(self, b): |
|
1039 | 1074 | if self._closed: |
|
1040 |
raise ValueError( |
|
|
1075 | raise ValueError("stream is closed") | |
|
1041 | 1076 | |
|
1042 | 1077 | if self._finished_output: |
|
1043 | 1078 | return 0 |
|
1044 | 1079 | |
|
1045 | 1080 | # TODO use writable=True once we require CFFI >= 1.12. |
|
1046 | 1081 | dest_buffer = ffi.from_buffer(b) |
|
1047 |
ffi.memmove(b, b |
|
|
1048 |
out_buffer = ffi.new( |
|
|
1082 | ffi.memmove(b, b"", 0) | |
|
1083 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1049 | 1084 | out_buffer.dst = dest_buffer |
|
1050 | 1085 | out_buffer.size = len(dest_buffer) |
|
1051 | 1086 | out_buffer.pos = 0 |
@@ -1060,15 +1095,14 class ZstdCompressionReader(object): | |||
|
1060 | 1095 | |
|
1061 | 1096 | # EOF. |
|
1062 | 1097 | old_pos = out_buffer.pos |
|
1063 |
zresult = lib.ZSTD_compressStream2( |
|
|
1064 | out_buffer, self._in_buffer, | |
|
1065 | lib.ZSTD_e_end) | |
|
1098 | zresult = lib.ZSTD_compressStream2( | |
|
1099 | self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end | |
|
1100 | ) | |
|
1066 | 1101 | |
|
1067 | 1102 | self._bytes_compressed += out_buffer.pos - old_pos |
|
1068 | 1103 | |
|
1069 | 1104 | if lib.ZSTD_isError(zresult): |
|
1070 |
raise ZstdError( |
|
|
1071 | _zstd_error(zresult)) | |
|
1105 | raise ZstdError("error ending compression stream: %s", _zstd_error(zresult)) | |
|
1072 | 1106 | |
|
1073 | 1107 | if zresult == 0: |
|
1074 | 1108 | self._finished_output = True |
@@ -1077,16 +1111,16 class ZstdCompressionReader(object): | |||
|
1077 | 1111 | |
|
1078 | 1112 | def readinto1(self, b): |
|
1079 | 1113 | if self._closed: |
|
1080 |
raise ValueError( |
|
|
1114 | raise ValueError("stream is closed") | |
|
1081 | 1115 | |
|
1082 | 1116 | if self._finished_output: |
|
1083 | 1117 | return 0 |
|
1084 | 1118 | |
|
1085 | 1119 | # TODO use writable=True once we require CFFI >= 1.12. |
|
1086 | 1120 | dest_buffer = ffi.from_buffer(b) |
|
1087 |
ffi.memmove(b, b |
|
|
1088 | ||
|
1089 |
out_buffer = ffi.new( |
|
|
1121 | ffi.memmove(b, b"", 0) | |
|
1122 | ||
|
1123 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1090 | 1124 | out_buffer.dst = dest_buffer |
|
1091 | 1125 | out_buffer.size = len(dest_buffer) |
|
1092 | 1126 | out_buffer.pos = 0 |
@@ -1107,15 +1141,16 class ZstdCompressionReader(object): | |||
|
1107 | 1141 | # EOF. |
|
1108 | 1142 | old_pos = out_buffer.pos |
|
1109 | 1143 | |
|
1110 |
zresult = lib.ZSTD_compressStream2( |
|
|
1111 | out_buffer, self._in_buffer, | |
|
1112 | lib.ZSTD_e_end) | |
|
1144 | zresult = lib.ZSTD_compressStream2( | |
|
1145 | self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end | |
|
1146 | ) | |
|
1113 | 1147 | |
|
1114 | 1148 | self._bytes_compressed += out_buffer.pos - old_pos |
|
1115 | 1149 | |
|
1116 | 1150 | if lib.ZSTD_isError(zresult): |
|
1117 | raise ZstdError('error ending compression stream: %s' % | |
|
1118 |
|
|
|
1151 | raise ZstdError( | |
|
1152 | "error ending compression stream: %s" % _zstd_error(zresult) | |
|
1153 | ) | |
|
1119 | 1154 | |
|
1120 | 1155 | if zresult == 0: |
|
1121 | 1156 | self._finished_output = True |
@@ -1124,29 +1159,35 class ZstdCompressionReader(object): | |||
|
1124 | 1159 | |
|
1125 | 1160 | |
|
1126 | 1161 | class ZstdCompressor(object): |
|
1127 | def __init__(self, level=3, dict_data=None, compression_params=None, | |
|
1128 | write_checksum=None, write_content_size=None, | |
|
1129 | write_dict_id=None, threads=0): | |
|
1162 | def __init__( | |
|
1163 | self, | |
|
1164 | level=3, | |
|
1165 | dict_data=None, | |
|
1166 | compression_params=None, | |
|
1167 | write_checksum=None, | |
|
1168 | write_content_size=None, | |
|
1169 | write_dict_id=None, | |
|
1170 | threads=0, | |
|
1171 | ): | |
|
1130 | 1172 | if level > lib.ZSTD_maxCLevel(): |
|
1131 |
raise ValueError( |
|
|
1173 | raise ValueError("level must be less than %d" % lib.ZSTD_maxCLevel()) | |
|
1132 | 1174 | |
|
1133 | 1175 | if threads < 0: |
|
1134 | 1176 | threads = _cpu_count() |
|
1135 | 1177 | |
|
1136 | 1178 | if compression_params and write_checksum is not None: |
|
1137 |
raise ValueError( |
|
|
1138 | 'write_checksum') | |
|
1179 | raise ValueError("cannot define compression_params and " "write_checksum") | |
|
1139 | 1180 | |
|
1140 | 1181 | if compression_params and write_content_size is not None: |
|
1141 | raise ValueError('cannot define compression_params and ' | |
|
1142 |
|
|
|
1182 | raise ValueError( | |
|
1183 | "cannot define compression_params and " "write_content_size" | |
|
1184 | ) | |
|
1143 | 1185 | |
|
1144 | 1186 | if compression_params and write_dict_id is not None: |
|
1145 |
raise ValueError( |
|
|
1146 | 'write_dict_id') | |
|
1187 | raise ValueError("cannot define compression_params and " "write_dict_id") | |
|
1147 | 1188 | |
|
1148 | 1189 | if compression_params and threads: |
|
1149 |
raise ValueError( |
|
|
1190 | raise ValueError("cannot define compression_params and threads") | |
|
1150 | 1191 | |
|
1151 | 1192 | if compression_params: |
|
1152 | 1193 | self._params = _make_cctx_params(compression_params) |
@@ -1160,27 +1201,24 class ZstdCompressor(object): | |||
|
1160 | 1201 | |
|
1161 | 1202 | self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams) |
|
1162 | 1203 | |
|
1163 | _set_compression_parameter(self._params, | |
|
1164 | lib.ZSTD_c_compressionLevel, | |
|
1165 | level) | |
|
1204 | _set_compression_parameter(self._params, lib.ZSTD_c_compressionLevel, level) | |
|
1166 | 1205 | |
|
1167 | 1206 | _set_compression_parameter( |
|
1168 | 1207 | self._params, |
|
1169 | 1208 | lib.ZSTD_c_contentSizeFlag, |
|
1170 |
write_content_size if write_content_size is not None else 1 |
|
|
1171 | ||
|
1172 | _set_compression_parameter(self._params, | |
|
1173 | lib.ZSTD_c_checksumFlag, | |
|
1174 | 1 if write_checksum else 0) | |
|
1175 | ||
|
1176 | _set_compression_parameter(self._params, | |
|
1177 | lib.ZSTD_c_dictIDFlag, | |
|
1178 |
|
|
|
1209 | write_content_size if write_content_size is not None else 1, | |
|
1210 | ) | |
|
1211 | ||
|
1212 | _set_compression_parameter( | |
|
1213 | self._params, lib.ZSTD_c_checksumFlag, 1 if write_checksum else 0 | |
|
1214 | ) | |
|
1215 | ||
|
1216 | _set_compression_parameter( | |
|
1217 | self._params, lib.ZSTD_c_dictIDFlag, 1 if write_dict_id else 0 | |
|
1218 | ) | |
|
1179 | 1219 | |
|
1180 | 1220 | if threads: |
|
1181 | _set_compression_parameter(self._params, | |
|
1182 | lib.ZSTD_c_nbWorkers, | |
|
1183 | threads) | |
|
1221 | _set_compression_parameter(self._params, lib.ZSTD_c_nbWorkers, threads) | |
|
1184 | 1222 | |
|
1185 | 1223 | cctx = lib.ZSTD_createCCtx() |
|
1186 | 1224 | if cctx == ffi.NULL: |
@@ -1194,15 +1232,16 class ZstdCompressor(object): | |||
|
1194 | 1232 | try: |
|
1195 | 1233 | self._setup_cctx() |
|
1196 | 1234 | finally: |
|
1197 |
self._cctx = ffi.gc( |
|
|
1198 |
|
|
|
1235 | self._cctx = ffi.gc( | |
|
1236 | cctx, lib.ZSTD_freeCCtx, size=lib.ZSTD_sizeof_CCtx(cctx) | |
|
1237 | ) | |
|
1199 | 1238 | |
|
1200 | 1239 | def _setup_cctx(self): |
|
1201 | zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx, | |
|
1202 | self._params) | |
|
1240 | zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx, self._params) | |
|
1203 | 1241 | if lib.ZSTD_isError(zresult): |
|
1204 | raise ZstdError('could not set compression parameters: %s' % | |
|
1205 |
|
|
|
1242 | raise ZstdError( | |
|
1243 | "could not set compression parameters: %s" % _zstd_error(zresult) | |
|
1244 | ) | |
|
1206 | 1245 | |
|
1207 | 1246 | dict_data = self._dict_data |
|
1208 | 1247 | |
@@ -1211,12 +1250,17 class ZstdCompressor(object): | |||
|
1211 | 1250 | zresult = lib.ZSTD_CCtx_refCDict(self._cctx, dict_data._cdict) |
|
1212 | 1251 | else: |
|
1213 | 1252 | zresult = lib.ZSTD_CCtx_loadDictionary_advanced( |
|
1214 |
self._cctx, |
|
|
1215 |
|
|
|
1253 | self._cctx, | |
|
1254 | dict_data.as_bytes(), | |
|
1255 | len(dict_data), | |
|
1256 | lib.ZSTD_dlm_byRef, | |
|
1257 | dict_data._dict_type, | |
|
1258 | ) | |
|
1216 | 1259 | |
|
1217 | 1260 | if lib.ZSTD_isError(zresult): |
|
1218 | raise ZstdError('could not load compression dictionary: %s' % | |
|
1219 |
|
|
|
1261 | raise ZstdError( | |
|
1262 | "could not load compression dictionary: %s" % _zstd_error(zresult) | |
|
1263 | ) | |
|
1220 | 1264 | |
|
1221 | 1265 | def memory_size(self): |
|
1222 | 1266 | return lib.ZSTD_sizeof_CCtx(self._cctx) |
@@ -1227,15 +1271,14 class ZstdCompressor(object): | |||
|
1227 | 1271 | data_buffer = ffi.from_buffer(data) |
|
1228 | 1272 | |
|
1229 | 1273 | dest_size = lib.ZSTD_compressBound(len(data_buffer)) |
|
1230 |
out = new_nonzero( |
|
|
1274 | out = new_nonzero("char[]", dest_size) | |
|
1231 | 1275 | |
|
1232 | 1276 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, len(data_buffer)) |
|
1233 | 1277 | if lib.ZSTD_isError(zresult): |
|
1234 |
raise ZstdError( |
|
|
1235 | _zstd_error(zresult)) | |
|
1236 | ||
|
1237 |
|
|
|
1238 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
|
1278 | raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) | |
|
1279 | ||
|
1280 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1281 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
1239 | 1282 | |
|
1240 | 1283 | out_buffer.dst = out |
|
1241 | 1284 | out_buffer.size = dest_size |
@@ -1245,16 +1288,14 class ZstdCompressor(object): | |||
|
1245 | 1288 | in_buffer.size = len(data_buffer) |
|
1246 | 1289 | in_buffer.pos = 0 |
|
1247 | 1290 | |
|
1248 |
zresult = lib.ZSTD_compressStream2( |
|
|
1249 | out_buffer, | |
|
1250 | in_buffer, | |
|
1251 | lib.ZSTD_e_end) | |
|
1291 | zresult = lib.ZSTD_compressStream2( | |
|
1292 | self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end | |
|
1293 | ) | |
|
1252 | 1294 | |
|
1253 | 1295 | if lib.ZSTD_isError(zresult): |
|
1254 |
raise ZstdError( |
|
|
1255 | _zstd_error(zresult)) | |
|
1296 | raise ZstdError("cannot compress: %s" % _zstd_error(zresult)) | |
|
1256 | 1297 | elif zresult: |
|
1257 |
raise ZstdError( |
|
|
1298 | raise ZstdError("unexpected partial frame flush") | |
|
1258 | 1299 | |
|
1259 | 1300 | return ffi.buffer(out, out_buffer.pos)[:] |
|
1260 | 1301 | |
@@ -1266,12 +1307,11 class ZstdCompressor(object): | |||
|
1266 | 1307 | |
|
1267 | 1308 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) |
|
1268 | 1309 | if lib.ZSTD_isError(zresult): |
|
1269 |
raise ZstdError( |
|
|
1270 | _zstd_error(zresult)) | |
|
1310 | raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) | |
|
1271 | 1311 | |
|
1272 | 1312 | cobj = ZstdCompressionObj() |
|
1273 |
cobj._out = ffi.new( |
|
|
1274 |
cobj._dst_buffer = ffi.new( |
|
|
1313 | cobj._out = ffi.new("ZSTD_outBuffer *") | |
|
1314 | cobj._dst_buffer = ffi.new("char[]", COMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |
|
1275 | 1315 | cobj._out.dst = cobj._dst_buffer |
|
1276 | 1316 | cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1277 | 1317 | cobj._out.pos = 0 |
@@ -1288,19 +1328,23 class ZstdCompressor(object): | |||
|
1288 | 1328 | |
|
1289 | 1329 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) |
|
1290 | 1330 | if lib.ZSTD_isError(zresult): |
|
1291 |
raise ZstdError( |
|
|
1292 | _zstd_error(zresult)) | |
|
1331 | raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) | |
|
1293 | 1332 | |
|
1294 | 1333 | return ZstdCompressionChunker(self, chunk_size=chunk_size) |
|
1295 | 1334 | |
|
1296 |
def copy_stream( |
|
|
1297 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
1298 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
|
1299 | ||
|
1300 | if not hasattr(ifh, 'read'): | |
|
1301 | raise ValueError('first argument must have a read() method') | |
|
1302 | if not hasattr(ofh, 'write'): | |
|
1303 | raise ValueError('second argument must have a write() method') | |
|
1335 | def copy_stream( | |
|
1336 | self, | |
|
1337 | ifh, | |
|
1338 | ofh, | |
|
1339 | size=-1, | |
|
1340 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
1341 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
1342 | ): | |
|
1343 | ||
|
1344 | if not hasattr(ifh, "read"): | |
|
1345 | raise ValueError("first argument must have a read() method") | |
|
1346 | if not hasattr(ofh, "write"): | |
|
1347 | raise ValueError("second argument must have a write() method") | |
|
1304 | 1348 | |
|
1305 | 1349 | lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) |
|
1306 | 1350 | |
@@ -1309,13 +1353,12 class ZstdCompressor(object): | |||
|
1309 | 1353 | |
|
1310 | 1354 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) |
|
1311 | 1355 | if lib.ZSTD_isError(zresult): |
|
1312 |
raise ZstdError( |
|
|
1313 | _zstd_error(zresult)) | |
|
1314 | ||
|
1315 |
|
|
|
1316 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
|
1317 | ||
|
1318 | dst_buffer = ffi.new('char[]', write_size) | |
|
1356 | raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) | |
|
1357 | ||
|
1358 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
1359 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1360 | ||
|
1361 | dst_buffer = ffi.new("char[]", write_size) | |
|
1319 | 1362 | out_buffer.dst = dst_buffer |
|
1320 | 1363 | out_buffer.size = write_size |
|
1321 | 1364 | out_buffer.pos = 0 |
@@ -1334,13 +1377,11 class ZstdCompressor(object): | |||
|
1334 | 1377 | in_buffer.pos = 0 |
|
1335 | 1378 | |
|
1336 | 1379 | while in_buffer.pos < in_buffer.size: |
|
1337 |
zresult = lib.ZSTD_compressStream2( |
|
|
1338 | out_buffer, | |
|
1339 | in_buffer, | |
|
1340 | lib.ZSTD_e_continue) | |
|
1380 | zresult = lib.ZSTD_compressStream2( | |
|
1381 | self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue | |
|
1382 | ) | |
|
1341 | 1383 | if lib.ZSTD_isError(zresult): |
|
1342 |
raise ZstdError( |
|
|
1343 | _zstd_error(zresult)) | |
|
1384 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
1344 | 1385 | |
|
1345 | 1386 | if out_buffer.pos: |
|
1346 | 1387 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
@@ -1349,13 +1390,13 class ZstdCompressor(object): | |||
|
1349 | 1390 | |
|
1350 | 1391 | # We've finished reading. Flush the compressor. |
|
1351 | 1392 | while True: |
|
1352 |
zresult = lib.ZSTD_compressStream2( |
|
|
1353 | out_buffer, | |
|
1354 | in_buffer, | |
|
1355 | lib.ZSTD_e_end) | |
|
1393 | zresult = lib.ZSTD_compressStream2( | |
|
1394 | self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end | |
|
1395 | ) | |
|
1356 | 1396 | if lib.ZSTD_isError(zresult): |
|
1357 |
raise ZstdError( |
|
|
1358 |
|
|
|
1397 | raise ZstdError( | |
|
1398 | "error ending compression stream: %s" % _zstd_error(zresult) | |
|
1399 | ) | |
|
1359 | 1400 | |
|
1360 | 1401 | if out_buffer.pos: |
|
1361 | 1402 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
@@ -1367,8 +1408,9 class ZstdCompressor(object): | |||
|
1367 | 1408 | |
|
1368 | 1409 | return total_read, total_write |
|
1369 | 1410 | |
|
1370 |
def stream_reader( |
|
|
1371 |
|
|
|
1411 | def stream_reader( | |
|
1412 | self, source, size=-1, read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1413 | ): | |
|
1372 | 1414 | lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) |
|
1373 | 1415 | |
|
1374 | 1416 | try: |
@@ -1381,40 +1423,48 class ZstdCompressor(object): | |||
|
1381 | 1423 | |
|
1382 | 1424 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) |
|
1383 | 1425 | if lib.ZSTD_isError(zresult): |
|
1384 |
raise ZstdError( |
|
|
1385 | _zstd_error(zresult)) | |
|
1426 | raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) | |
|
1386 | 1427 | |
|
1387 | 1428 | return ZstdCompressionReader(self, source, read_size) |
|
1388 | 1429 | |
|
1389 |
def stream_writer( |
|
|
1390 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
1391 | write_return_read=False): | |
|
1392 | ||
|
1393 | if not hasattr(writer, 'write'): | |
|
1394 | raise ValueError('must pass an object with a write() method') | |
|
1430 | def stream_writer( | |
|
1431 | self, | |
|
1432 | writer, | |
|
1433 | size=-1, | |
|
1434 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
1435 | write_return_read=False, | |
|
1436 | ): | |
|
1437 | ||
|
1438 | if not hasattr(writer, "write"): | |
|
1439 | raise ValueError("must pass an object with a write() method") | |
|
1395 | 1440 | |
|
1396 | 1441 | lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) |
|
1397 | 1442 | |
|
1398 | 1443 | if size < 0: |
|
1399 | 1444 | size = lib.ZSTD_CONTENTSIZE_UNKNOWN |
|
1400 | 1445 | |
|
1401 | return ZstdCompressionWriter(self, writer, size, write_size, | |
|
1402 | write_return_read) | |
|
1446 | return ZstdCompressionWriter(self, writer, size, write_size, write_return_read) | |
|
1403 | 1447 | |
|
1404 | 1448 | write_to = stream_writer |
|
1405 | 1449 | |
|
1406 |
def read_to_iter( |
|
|
1407 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
1408 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
|
1409 | if hasattr(reader, 'read'): | |
|
1450 | def read_to_iter( | |
|
1451 | self, | |
|
1452 | reader, | |
|
1453 | size=-1, | |
|
1454 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
1455 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
1456 | ): | |
|
1457 | if hasattr(reader, "read"): | |
|
1410 | 1458 | have_read = True |
|
1411 |
elif hasattr(reader, |
|
|
1459 | elif hasattr(reader, "__getitem__"): | |
|
1412 | 1460 | have_read = False |
|
1413 | 1461 | buffer_offset = 0 |
|
1414 | 1462 | size = len(reader) |
|
1415 | 1463 | else: |
|
1416 | raise ValueError('must pass an object with a read() method or ' | |
|
1417 | 'conforms to buffer protocol') | |
|
1464 | raise ValueError( | |
|
1465 | "must pass an object with a read() method or " | |
|
1466 | "conforms to buffer protocol" | |
|
1467 | ) | |
|
1418 | 1468 | |
|
1419 | 1469 | lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only) |
|
1420 | 1470 | |
@@ -1423,17 +1473,16 class ZstdCompressor(object): | |||
|
1423 | 1473 | |
|
1424 | 1474 | zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size) |
|
1425 | 1475 | if lib.ZSTD_isError(zresult): |
|
1426 |
raise ZstdError( |
|
|
1427 | _zstd_error(zresult)) | |
|
1428 | ||
|
1429 |
|
|
|
1430 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
|
1476 | raise ZstdError("error setting source size: %s" % _zstd_error(zresult)) | |
|
1477 | ||
|
1478 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
1479 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1431 | 1480 | |
|
1432 | 1481 | in_buffer.src = ffi.NULL |
|
1433 | 1482 | in_buffer.size = 0 |
|
1434 | 1483 | in_buffer.pos = 0 |
|
1435 | 1484 | |
|
1436 |
dst_buffer = ffi.new( |
|
|
1485 | dst_buffer = ffi.new("char[]", write_size) | |
|
1437 | 1486 | out_buffer.dst = dst_buffer |
|
1438 | 1487 | out_buffer.size = write_size |
|
1439 | 1488 | out_buffer.pos = 0 |
@@ -1449,7 +1498,7 class ZstdCompressor(object): | |||
|
1449 | 1498 | else: |
|
1450 | 1499 | remaining = len(reader) - buffer_offset |
|
1451 | 1500 | slice_size = min(remaining, read_size) |
|
1452 | read_result = reader[buffer_offset:buffer_offset + slice_size] | |
|
1501 | read_result = reader[buffer_offset : buffer_offset + slice_size] | |
|
1453 | 1502 | buffer_offset += slice_size |
|
1454 | 1503 | |
|
1455 | 1504 | # No new input data. Break out of the read loop. |
@@ -1464,11 +1513,11 class ZstdCompressor(object): | |||
|
1464 | 1513 | in_buffer.pos = 0 |
|
1465 | 1514 | |
|
1466 | 1515 | while in_buffer.pos < in_buffer.size: |
|
1467 |
zresult = lib.ZSTD_compressStream2( |
|
|
1468 | lib.ZSTD_e_continue) | |
|
1516 | zresult = lib.ZSTD_compressStream2( | |
|
1517 | self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue | |
|
1518 | ) | |
|
1469 | 1519 | if lib.ZSTD_isError(zresult): |
|
1470 |
raise ZstdError( |
|
|
1471 | _zstd_error(zresult)) | |
|
1520 | raise ZstdError("zstd compress error: %s" % _zstd_error(zresult)) | |
|
1472 | 1521 | |
|
1473 | 1522 | if out_buffer.pos: |
|
1474 | 1523 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] |
@@ -1484,13 +1533,13 class ZstdCompressor(object): | |||
|
1484 | 1533 | # remains. |
|
1485 | 1534 | while True: |
|
1486 | 1535 | assert out_buffer.pos == 0 |
|
1487 |
zresult = lib.ZSTD_compressStream2( |
|
|
1488 | out_buffer, | |
|
1489 | in_buffer, | |
|
1490 | lib.ZSTD_e_end) | |
|
1536 | zresult = lib.ZSTD_compressStream2( | |
|
1537 | self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end | |
|
1538 | ) | |
|
1491 | 1539 | if lib.ZSTD_isError(zresult): |
|
1492 |
raise ZstdError( |
|
|
1493 |
|
|
|
1540 | raise ZstdError( | |
|
1541 | "error ending compression stream: %s" % _zstd_error(zresult) | |
|
1542 | ) | |
|
1494 | 1543 | |
|
1495 | 1544 | if out_buffer.pos: |
|
1496 | 1545 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] |
@@ -1522,7 +1571,7 def frame_content_size(data): | |||
|
1522 | 1571 | size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer)) |
|
1523 | 1572 | |
|
1524 | 1573 | if size == lib.ZSTD_CONTENTSIZE_ERROR: |
|
1525 |
raise ZstdError( |
|
|
1574 | raise ZstdError("error when determining content size") | |
|
1526 | 1575 | elif size == lib.ZSTD_CONTENTSIZE_UNKNOWN: |
|
1527 | 1576 | return -1 |
|
1528 | 1577 | else: |
@@ -1534,24 +1583,23 def frame_header_size(data): | |||
|
1534 | 1583 | |
|
1535 | 1584 | zresult = lib.ZSTD_frameHeaderSize(data_buffer, len(data_buffer)) |
|
1536 | 1585 | if lib.ZSTD_isError(zresult): |
|
1537 | raise ZstdError('could not determine frame header size: %s' % | |
|
1538 |
|
|
|
1586 | raise ZstdError( | |
|
1587 | "could not determine frame header size: %s" % _zstd_error(zresult) | |
|
1588 | ) | |
|
1539 | 1589 | |
|
1540 | 1590 | return zresult |
|
1541 | 1591 | |
|
1542 | 1592 | |
|
1543 | 1593 | def get_frame_parameters(data): |
|
1544 |
params = ffi.new( |
|
|
1594 | params = ffi.new("ZSTD_frameHeader *") | |
|
1545 | 1595 | |
|
1546 | 1596 | data_buffer = ffi.from_buffer(data) |
|
1547 | 1597 | zresult = lib.ZSTD_getFrameHeader(params, data_buffer, len(data_buffer)) |
|
1548 | 1598 | if lib.ZSTD_isError(zresult): |
|
1549 |
raise ZstdError( |
|
|
1550 | _zstd_error(zresult)) | |
|
1599 | raise ZstdError("cannot get frame parameters: %s" % _zstd_error(zresult)) | |
|
1551 | 1600 | |
|
1552 | 1601 | if zresult: |
|
1553 |
raise ZstdError( |
|
|
1554 | zresult) | |
|
1602 | raise ZstdError("not enough data for frame parameters; need %d bytes" % zresult) | |
|
1555 | 1603 | |
|
1556 | 1604 | return FrameParameters(params[0]) |
|
1557 | 1605 | |
@@ -1563,10 +1611,10 class ZstdCompressionDict(object): | |||
|
1563 | 1611 | self.k = k |
|
1564 | 1612 | self.d = d |
|
1565 | 1613 | |
|
1566 | if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT, | |
|
1567 | DICT_TYPE_FULLDICT): | |
|
1568 |
|
|
|
1569 | 'DICT_TYPE_* constants') | |
|
1614 | if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT, DICT_TYPE_FULLDICT): | |
|
1615 | raise ValueError( | |
|
1616 | "invalid dictionary load mode: %d; must use " "DICT_TYPE_* constants" | |
|
1617 | ) | |
|
1570 | 1618 | |
|
1571 | 1619 | self._dict_type = dict_type |
|
1572 | 1620 | self._cdict = None |
@@ -1582,16 +1630,15 class ZstdCompressionDict(object): | |||
|
1582 | 1630 | |
|
1583 | 1631 | def precompute_compress(self, level=0, compression_params=None): |
|
1584 | 1632 | if level and compression_params: |
|
1585 |
raise ValueError( |
|
|
1586 | 'compression_params') | |
|
1633 | raise ValueError("must only specify one of level or " "compression_params") | |
|
1587 | 1634 | |
|
1588 | 1635 | if not level and not compression_params: |
|
1589 |
raise ValueError( |
|
|
1636 | raise ValueError("must specify one of level or compression_params") | |
|
1590 | 1637 | |
|
1591 | 1638 | if level: |
|
1592 | 1639 | cparams = lib.ZSTD_getCParams(level, 0, len(self._data)) |
|
1593 | 1640 | else: |
|
1594 |
cparams = ffi.new( |
|
|
1641 | cparams = ffi.new("ZSTD_compressionParameters") | |
|
1595 | 1642 | cparams.chainLog = compression_params.chain_log |
|
1596 | 1643 | cparams.hashLog = compression_params.hash_log |
|
1597 | 1644 | cparams.minMatch = compression_params.min_match |
@@ -1600,59 +1647,75 class ZstdCompressionDict(object): | |||
|
1600 | 1647 | cparams.targetLength = compression_params.target_length |
|
1601 | 1648 | cparams.windowLog = compression_params.window_log |
|
1602 | 1649 | |
|
1603 |
cdict = lib.ZSTD_createCDict_advanced( |
|
|
1604 | lib.ZSTD_dlm_byRef, | |
|
1605 | self._dict_type, | |
|
1606 | cparams, | |
|
1607 | lib.ZSTD_defaultCMem) | |
|
1650 | cdict = lib.ZSTD_createCDict_advanced( | |
|
1651 | self._data, | |
|
1652 | len(self._data), | |
|
1653 | lib.ZSTD_dlm_byRef, | |
|
1654 | self._dict_type, | |
|
1655 | cparams, | |
|
1656 | lib.ZSTD_defaultCMem, | |
|
1657 | ) | |
|
1608 | 1658 | if cdict == ffi.NULL: |
|
1609 |
raise ZstdError( |
|
|
1610 | ||
|
1611 |
self._cdict = ffi.gc( |
|
|
1612 |
|
|
|
1659 | raise ZstdError("unable to precompute dictionary") | |
|
1660 | ||
|
1661 | self._cdict = ffi.gc( | |
|
1662 | cdict, lib.ZSTD_freeCDict, size=lib.ZSTD_sizeof_CDict(cdict) | |
|
1663 | ) | |
|
1613 | 1664 | |
|
1614 | 1665 | @property |
|
1615 | 1666 | def _ddict(self): |
|
1616 |
ddict = lib.ZSTD_createDDict_advanced( |
|
|
1617 | lib.ZSTD_dlm_byRef, | |
|
1618 | self._dict_type, | |
|
1619 | lib.ZSTD_defaultCMem) | |
|
1667 | ddict = lib.ZSTD_createDDict_advanced( | |
|
1668 | self._data, | |
|
1669 | len(self._data), | |
|
1670 | lib.ZSTD_dlm_byRef, | |
|
1671 | self._dict_type, | |
|
1672 | lib.ZSTD_defaultCMem, | |
|
1673 | ) | |
|
1620 | 1674 | |
|
1621 | 1675 | if ddict == ffi.NULL: |
|
1622 |
raise ZstdError( |
|
|
1623 | ||
|
1624 | ddict = ffi.gc(ddict, lib.ZSTD_freeDDict, | |
|
1625 | size=lib.ZSTD_sizeof_DDict(ddict)) | |
|
1626 | self.__dict__['_ddict'] = ddict | |
|
1676 | raise ZstdError("could not create decompression dict") | |
|
1677 | ||
|
1678 | ddict = ffi.gc(ddict, lib.ZSTD_freeDDict, size=lib.ZSTD_sizeof_DDict(ddict)) | |
|
1679 | self.__dict__["_ddict"] = ddict | |
|
1627 | 1680 | |
|
1628 | 1681 | return ddict |
|
1629 | 1682 | |
|
1630 | def train_dictionary(dict_size, samples, k=0, d=0, notifications=0, dict_id=0, | |
|
1631 | level=0, steps=0, threads=0): | |
|
1683 | ||
|
1684 | def train_dictionary( | |
|
1685 | dict_size, | |
|
1686 | samples, | |
|
1687 | k=0, | |
|
1688 | d=0, | |
|
1689 | notifications=0, | |
|
1690 | dict_id=0, | |
|
1691 | level=0, | |
|
1692 | steps=0, | |
|
1693 | threads=0, | |
|
1694 | ): | |
|
1632 | 1695 | if not isinstance(samples, list): |
|
1633 |
raise TypeError( |
|
|
1696 | raise TypeError("samples must be a list") | |
|
1634 | 1697 | |
|
1635 | 1698 | if threads < 0: |
|
1636 | 1699 | threads = _cpu_count() |
|
1637 | 1700 | |
|
1638 | 1701 | total_size = sum(map(len, samples)) |
|
1639 | 1702 | |
|
1640 |
samples_buffer = new_nonzero( |
|
|
1641 |
sample_sizes = new_nonzero( |
|
|
1703 | samples_buffer = new_nonzero("char[]", total_size) | |
|
1704 | sample_sizes = new_nonzero("size_t[]", len(samples)) | |
|
1642 | 1705 | |
|
1643 | 1706 | offset = 0 |
|
1644 | 1707 | for i, sample in enumerate(samples): |
|
1645 | 1708 | if not isinstance(sample, bytes_type): |
|
1646 |
raise ValueError( |
|
|
1709 | raise ValueError("samples must be bytes") | |
|
1647 | 1710 | |
|
1648 | 1711 | l = len(sample) |
|
1649 | 1712 | ffi.memmove(samples_buffer + offset, sample, l) |
|
1650 | 1713 | offset += l |
|
1651 | 1714 | sample_sizes[i] = l |
|
1652 | 1715 | |
|
1653 |
dict_data = new_nonzero( |
|
|
1654 | ||
|
1655 |
dparams = ffi.new( |
|
|
1716 | dict_data = new_nonzero("char[]", dict_size) | |
|
1717 | ||
|
1718 | dparams = ffi.new("ZDICT_cover_params_t *")[0] | |
|
1656 | 1719 | dparams.k = k |
|
1657 | 1720 | dparams.d = d |
|
1658 | 1721 | dparams.steps = steps |
@@ -1661,34 +1724,51 def train_dictionary(dict_size, samples, | |||
|
1661 | 1724 | dparams.zParams.dictID = dict_id |
|
1662 | 1725 | dparams.zParams.compressionLevel = level |
|
1663 | 1726 | |
|
1664 | if (not dparams.k and not dparams.d and not dparams.steps | |
|
1665 | and not dparams.nbThreads and not dparams.zParams.notificationLevel | |
|
1727 | if ( | |
|
1728 | not dparams.k | |
|
1729 | and not dparams.d | |
|
1730 | and not dparams.steps | |
|
1731 | and not dparams.nbThreads | |
|
1732 | and not dparams.zParams.notificationLevel | |
|
1666 | 1733 | and not dparams.zParams.dictID |
|
1667 |
and not dparams.zParams.compressionLevel |
|
|
1734 | and not dparams.zParams.compressionLevel | |
|
1735 | ): | |
|
1668 | 1736 | zresult = lib.ZDICT_trainFromBuffer( |
|
1669 |
ffi.addressof(dict_data), |
|
|
1737 | ffi.addressof(dict_data), | |
|
1738 | dict_size, | |
|
1670 | 1739 | ffi.addressof(samples_buffer), |
|
1671 |
ffi.addressof(sample_sizes, 0), |
|
|
1740 | ffi.addressof(sample_sizes, 0), | |
|
1741 | len(samples), | |
|
1742 | ) | |
|
1672 | 1743 | elif dparams.steps or dparams.nbThreads: |
|
1673 | 1744 | zresult = lib.ZDICT_optimizeTrainFromBuffer_cover( |
|
1674 |
ffi.addressof(dict_data), |
|
|
1745 | ffi.addressof(dict_data), | |
|
1746 | dict_size, | |
|
1675 | 1747 | ffi.addressof(samples_buffer), |
|
1676 |
ffi.addressof(sample_sizes, 0), |
|
|
1677 | ffi.addressof(dparams)) | |
|
1748 | ffi.addressof(sample_sizes, 0), | |
|
1749 | len(samples), | |
|
1750 | ffi.addressof(dparams), | |
|
1751 | ) | |
|
1678 | 1752 | else: |
|
1679 | 1753 | zresult = lib.ZDICT_trainFromBuffer_cover( |
|
1680 |
ffi.addressof(dict_data), |
|
|
1754 | ffi.addressof(dict_data), | |
|
1755 | dict_size, | |
|
1681 | 1756 | ffi.addressof(samples_buffer), |
|
1682 |
ffi.addressof(sample_sizes, 0), |
|
|
1683 |
|
|
|
1757 | ffi.addressof(sample_sizes, 0), | |
|
1758 | len(samples), | |
|
1759 | dparams, | |
|
1760 | ) | |
|
1684 | 1761 | |
|
1685 | 1762 | if lib.ZDICT_isError(zresult): |
|
1686 |
msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode( |
|
|
1687 |
raise ZstdError( |
|
|
1688 | ||
|
1689 |
return ZstdCompressionDict( |
|
|
1690 | dict_type=DICT_TYPE_FULLDICT, | |
|
1691 | k=dparams.k, d=dparams.d) | |
|
1763 | msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode("utf-8") | |
|
1764 | raise ZstdError("cannot train dict: %s" % msg) | |
|
1765 | ||
|
1766 | return ZstdCompressionDict( | |
|
1767 | ffi.buffer(dict_data, zresult)[:], | |
|
1768 | dict_type=DICT_TYPE_FULLDICT, | |
|
1769 | k=dparams.k, | |
|
1770 | d=dparams.d, | |
|
1771 | ) | |
|
1692 | 1772 | |
|
1693 | 1773 | |
|
1694 | 1774 | class ZstdDecompressionObj(object): |
@@ -1699,21 +1779,21 class ZstdDecompressionObj(object): | |||
|
1699 | 1779 | |
|
1700 | 1780 | def decompress(self, data): |
|
1701 | 1781 | if self._finished: |
|
1702 |
raise ZstdError( |
|
|
1703 | ||
|
1704 |
in_buffer = ffi.new( |
|
|
1705 |
out_buffer = ffi.new( |
|
|
1782 | raise ZstdError("cannot use a decompressobj multiple times") | |
|
1783 | ||
|
1784 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
1785 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1706 | 1786 | |
|
1707 | 1787 | data_buffer = ffi.from_buffer(data) |
|
1708 | 1788 | |
|
1709 | 1789 | if len(data_buffer) == 0: |
|
1710 |
return b |
|
|
1790 | return b"" | |
|
1711 | 1791 | |
|
1712 | 1792 | in_buffer.src = data_buffer |
|
1713 | 1793 | in_buffer.size = len(data_buffer) |
|
1714 | 1794 | in_buffer.pos = 0 |
|
1715 | 1795 | |
|
1716 |
dst_buffer = ffi.new( |
|
|
1796 | dst_buffer = ffi.new("char[]", self._write_size) | |
|
1717 | 1797 | out_buffer.dst = dst_buffer |
|
1718 | 1798 | out_buffer.size = len(dst_buffer) |
|
1719 | 1799 | out_buffer.pos = 0 |
@@ -1721,11 +1801,11 class ZstdDecompressionObj(object): | |||
|
1721 | 1801 | chunks = [] |
|
1722 | 1802 | |
|
1723 | 1803 | while True: |
|
1724 |
zresult = lib.ZSTD_decompressStream( |
|
|
1725 | out_buffer, in_buffer) | |
|
1804 | zresult = lib.ZSTD_decompressStream( | |
|
1805 | self._decompressor._dctx, out_buffer, in_buffer | |
|
1806 | ) | |
|
1726 | 1807 | if lib.ZSTD_isError(zresult): |
|
1727 |
raise ZstdError( |
|
|
1728 | _zstd_error(zresult)) | |
|
1808 | raise ZstdError("zstd decompressor error: %s" % _zstd_error(zresult)) | |
|
1729 | 1809 | |
|
1730 | 1810 | if zresult == 0: |
|
1731 | 1811 | self._finished = True |
@@ -1734,13 +1814,14 class ZstdDecompressionObj(object): | |||
|
1734 | 1814 | if out_buffer.pos: |
|
1735 | 1815 | chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
|
1736 | 1816 | |
|
1737 |
if |
|
|
1738 |
|
|
|
1817 | if zresult == 0 or ( | |
|
1818 | in_buffer.pos == in_buffer.size and out_buffer.pos == 0 | |
|
1819 | ): | |
|
1739 | 1820 | break |
|
1740 | 1821 | |
|
1741 | 1822 | out_buffer.pos = 0 |
|
1742 | 1823 | |
|
1743 |
return b |
|
|
1824 | return b"".join(chunks) | |
|
1744 | 1825 | |
|
1745 | 1826 | def flush(self, length=0): |
|
1746 | 1827 | pass |
@@ -1757,13 +1838,13 class ZstdDecompressionReader(object): | |||
|
1757 | 1838 | self._bytes_decompressed = 0 |
|
1758 | 1839 | self._finished_input = False |
|
1759 | 1840 | self._finished_output = False |
|
1760 |
self._in_buffer = ffi.new( |
|
|
1841 | self._in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
1761 | 1842 | # Holds a ref to self._in_buffer.src. |
|
1762 | 1843 | self._source_buffer = None |
|
1763 | 1844 | |
|
1764 | 1845 | def __enter__(self): |
|
1765 | 1846 | if self._entered: |
|
1766 |
raise ValueError( |
|
|
1847 | raise ValueError("cannot __enter__ multiple times") | |
|
1767 | 1848 | |
|
1768 | 1849 | self._entered = True |
|
1769 | 1850 | return self |
@@ -1824,7 +1905,7 class ZstdDecompressionReader(object): | |||
|
1824 | 1905 | |
|
1825 | 1906 | chunks.append(chunk) |
|
1826 | 1907 | |
|
1827 |
return b |
|
|
1908 | return b"".join(chunks) | |
|
1828 | 1909 | |
|
1829 | 1910 | def __iter__(self): |
|
1830 | 1911 | raise io.UnsupportedOperation() |
@@ -1844,7 +1925,7 class ZstdDecompressionReader(object): | |||
|
1844 | 1925 | return |
|
1845 | 1926 | |
|
1846 | 1927 | # Else populate the input buffer from our source. |
|
1847 |
if hasattr(self._source, |
|
|
1928 | if hasattr(self._source, "read"): | |
|
1848 | 1929 | data = self._source.read(self._read_size) |
|
1849 | 1930 | |
|
1850 | 1931 | if not data: |
@@ -1866,8 +1947,9 class ZstdDecompressionReader(object): | |||
|
1866 | 1947 | |
|
1867 | 1948 | Returns True if data in output buffer should be emitted. |
|
1868 | 1949 | """ |
|
1869 |
zresult = lib.ZSTD_decompressStream( |
|
|
1870 | out_buffer, self._in_buffer) | |
|
1950 | zresult = lib.ZSTD_decompressStream( | |
|
1951 | self._decompressor._dctx, out_buffer, self._in_buffer | |
|
1952 | ) | |
|
1871 | 1953 | |
|
1872 | 1954 | if self._in_buffer.pos == self._in_buffer.size: |
|
1873 | 1955 | self._in_buffer.src = ffi.NULL |
@@ -1875,38 +1957,39 class ZstdDecompressionReader(object): | |||
|
1875 | 1957 | self._in_buffer.size = 0 |
|
1876 | 1958 | self._source_buffer = None |
|
1877 | 1959 | |
|
1878 |
if not hasattr(self._source, |
|
|
1960 | if not hasattr(self._source, "read"): | |
|
1879 | 1961 | self._finished_input = True |
|
1880 | 1962 | |
|
1881 | 1963 | if lib.ZSTD_isError(zresult): |
|
1882 |
raise ZstdError( |
|
|
1883 | _zstd_error(zresult)) | |
|
1964 | raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult)) | |
|
1884 | 1965 | |
|
1885 | 1966 | # Emit data if there is data AND either: |
|
1886 | 1967 | # a) output buffer is full (read amount is satisfied) |
|
1887 | 1968 | # b) we're at end of a frame and not in frame spanning mode |
|
1888 |
return |
|
|
1889 |
|
|
|
1890 | zresult == 0 and not self._read_across_frames)) | |
|
1969 | return out_buffer.pos and ( | |
|
1970 | out_buffer.pos == out_buffer.size | |
|
1971 | or zresult == 0 | |
|
1972 | and not self._read_across_frames | |
|
1973 | ) | |
|
1891 | 1974 | |
|
1892 | 1975 | def read(self, size=-1): |
|
1893 | 1976 | if self._closed: |
|
1894 |
raise ValueError( |
|
|
1977 | raise ValueError("stream is closed") | |
|
1895 | 1978 | |
|
1896 | 1979 | if size < -1: |
|
1897 |
raise ValueError( |
|
|
1980 | raise ValueError("cannot read negative amounts less than -1") | |
|
1898 | 1981 | |
|
1899 | 1982 | if size == -1: |
|
1900 | 1983 | # This is recursive. But it gets the job done. |
|
1901 | 1984 | return self.readall() |
|
1902 | 1985 | |
|
1903 | 1986 | if self._finished_output or size == 0: |
|
1904 |
return b |
|
|
1987 | return b"" | |
|
1905 | 1988 | |
|
1906 | 1989 | # We /could/ call into readinto() here. But that introduces more |
|
1907 | 1990 | # overhead. |
|
1908 |
dst_buffer = ffi.new( |
|
|
1909 |
out_buffer = ffi.new( |
|
|
1991 | dst_buffer = ffi.new("char[]", size) | |
|
1992 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1910 | 1993 | out_buffer.dst = dst_buffer |
|
1911 | 1994 | out_buffer.size = size |
|
1912 | 1995 | out_buffer.pos = 0 |
@@ -1927,15 +2010,15 class ZstdDecompressionReader(object): | |||
|
1927 | 2010 | |
|
1928 | 2011 | def readinto(self, b): |
|
1929 | 2012 | if self._closed: |
|
1930 |
raise ValueError( |
|
|
2013 | raise ValueError("stream is closed") | |
|
1931 | 2014 | |
|
1932 | 2015 | if self._finished_output: |
|
1933 | 2016 | return 0 |
|
1934 | 2017 | |
|
1935 | 2018 | # TODO use writable=True once we require CFFI >= 1.12. |
|
1936 | 2019 | dest_buffer = ffi.from_buffer(b) |
|
1937 |
ffi.memmove(b, b |
|
|
1938 |
out_buffer = ffi.new( |
|
|
2020 | ffi.memmove(b, b"", 0) | |
|
2021 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1939 | 2022 | out_buffer.dst = dest_buffer |
|
1940 | 2023 | out_buffer.size = len(dest_buffer) |
|
1941 | 2024 | out_buffer.pos = 0 |
@@ -1956,20 +2039,20 class ZstdDecompressionReader(object): | |||
|
1956 | 2039 | |
|
1957 | 2040 | def read1(self, size=-1): |
|
1958 | 2041 | if self._closed: |
|
1959 |
raise ValueError( |
|
|
2042 | raise ValueError("stream is closed") | |
|
1960 | 2043 | |
|
1961 | 2044 | if size < -1: |
|
1962 |
raise ValueError( |
|
|
2045 | raise ValueError("cannot read negative amounts less than -1") | |
|
1963 | 2046 | |
|
1964 | 2047 | if self._finished_output or size == 0: |
|
1965 |
return b |
|
|
2048 | return b"" | |
|
1966 | 2049 | |
|
1967 | 2050 | # -1 returns arbitrary number of bytes. |
|
1968 | 2051 | if size == -1: |
|
1969 | 2052 | size = DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1970 | 2053 | |
|
1971 |
dst_buffer = ffi.new( |
|
|
1972 |
out_buffer = ffi.new( |
|
|
2054 | dst_buffer = ffi.new("char[]", size) | |
|
2055 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
1973 | 2056 | out_buffer.dst = dst_buffer |
|
1974 | 2057 | out_buffer.size = size |
|
1975 | 2058 | out_buffer.pos = 0 |
@@ -1990,16 +2073,16 class ZstdDecompressionReader(object): | |||
|
1990 | 2073 | |
|
1991 | 2074 | def readinto1(self, b): |
|
1992 | 2075 | if self._closed: |
|
1993 |
raise ValueError( |
|
|
2076 | raise ValueError("stream is closed") | |
|
1994 | 2077 | |
|
1995 | 2078 | if self._finished_output: |
|
1996 | 2079 | return 0 |
|
1997 | 2080 | |
|
1998 | 2081 | # TODO use writable=True once we require CFFI >= 1.12. |
|
1999 | 2082 | dest_buffer = ffi.from_buffer(b) |
|
2000 |
ffi.memmove(b, b |
|
|
2001 | ||
|
2002 |
out_buffer = ffi.new( |
|
|
2083 | ffi.memmove(b, b"", 0) | |
|
2084 | ||
|
2085 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
2003 | 2086 | out_buffer.dst = dest_buffer |
|
2004 | 2087 | out_buffer.size = len(dest_buffer) |
|
2005 | 2088 | out_buffer.pos = 0 |
@@ -2016,33 +2099,31 class ZstdDecompressionReader(object): | |||
|
2016 | 2099 | |
|
2017 | 2100 | def seek(self, pos, whence=os.SEEK_SET): |
|
2018 | 2101 | if self._closed: |
|
2019 |
raise ValueError( |
|
|
2102 | raise ValueError("stream is closed") | |
|
2020 | 2103 | |
|
2021 | 2104 | read_amount = 0 |
|
2022 | 2105 | |
|
2023 | 2106 | if whence == os.SEEK_SET: |
|
2024 | 2107 | if pos < 0: |
|
2025 |
raise ValueError( |
|
|
2108 | raise ValueError("cannot seek to negative position with SEEK_SET") | |
|
2026 | 2109 | |
|
2027 | 2110 | if pos < self._bytes_decompressed: |
|
2028 |
raise ValueError( |
|
|
2029 | 'backwards') | |
|
2111 | raise ValueError("cannot seek zstd decompression stream " "backwards") | |
|
2030 | 2112 | |
|
2031 | 2113 | read_amount = pos - self._bytes_decompressed |
|
2032 | 2114 | |
|
2033 | 2115 | elif whence == os.SEEK_CUR: |
|
2034 | 2116 | if pos < 0: |
|
2035 |
raise ValueError( |
|
|
2036 | 'backwards') | |
|
2117 | raise ValueError("cannot seek zstd decompression stream " "backwards") | |
|
2037 | 2118 | |
|
2038 | 2119 | read_amount = pos |
|
2039 | 2120 | elif whence == os.SEEK_END: |
|
2040 | raise ValueError('zstd decompression streams cannot be seeked ' | |
|
2041 | 'with SEEK_END') | |
|
2121 | raise ValueError( | |
|
2122 | "zstd decompression streams cannot be seeked " "with SEEK_END" | |
|
2123 | ) | |
|
2042 | 2124 | |
|
2043 | 2125 | while read_amount: |
|
2044 | result = self.read(min(read_amount, | |
|
2045 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
2126 | result = self.read(min(read_amount, DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
2046 | 2127 | |
|
2047 | 2128 | if not result: |
|
2048 | 2129 | break |
@@ -2051,6 +2132,7 class ZstdDecompressionReader(object): | |||
|
2051 | 2132 | |
|
2052 | 2133 | return self._bytes_decompressed |
|
2053 | 2134 | |
|
2135 | ||
|
2054 | 2136 | class ZstdDecompressionWriter(object): |
|
2055 | 2137 | def __init__(self, decompressor, writer, write_size, write_return_read): |
|
2056 | 2138 | decompressor._ensure_dctx() |
@@ -2064,10 +2146,10 class ZstdDecompressionWriter(object): | |||
|
2064 | 2146 | |
|
2065 | 2147 | def __enter__(self): |
|
2066 | 2148 | if self._closed: |
|
2067 |
raise ValueError( |
|
|
2149 | raise ValueError("stream is closed") | |
|
2068 | 2150 | |
|
2069 | 2151 | if self._entered: |
|
2070 |
raise ZstdError( |
|
|
2152 | raise ZstdError("cannot __enter__ multiple times") | |
|
2071 | 2153 | |
|
2072 | 2154 | self._entered = True |
|
2073 | 2155 | |
@@ -2089,7 +2171,7 class ZstdDecompressionWriter(object): | |||
|
2089 | 2171 | finally: |
|
2090 | 2172 | self._closed = True |
|
2091 | 2173 | |
|
2092 |
f = getattr(self._writer, |
|
|
2174 | f = getattr(self._writer, "close", None) | |
|
2093 | 2175 | if f: |
|
2094 | 2176 | f() |
|
2095 | 2177 | |
@@ -2098,17 +2180,17 class ZstdDecompressionWriter(object): | |||
|
2098 | 2180 | return self._closed |
|
2099 | 2181 | |
|
2100 | 2182 | def fileno(self): |
|
2101 |
f = getattr(self._writer, |
|
|
2183 | f = getattr(self._writer, "fileno", None) | |
|
2102 | 2184 | if f: |
|
2103 | 2185 | return f() |
|
2104 | 2186 | else: |
|
2105 |
raise OSError( |
|
|
2187 | raise OSError("fileno not available on underlying writer") | |
|
2106 | 2188 | |
|
2107 | 2189 | def flush(self): |
|
2108 | 2190 | if self._closed: |
|
2109 |
raise ValueError( |
|
|
2110 | ||
|
2111 |
f = getattr(self._writer, |
|
|
2191 | raise ValueError("stream is closed") | |
|
2192 | ||
|
2193 | f = getattr(self._writer, "flush", None) | |
|
2112 | 2194 | if f: |
|
2113 | 2195 | return f() |
|
2114 | 2196 | |
@@ -2153,19 +2235,19 class ZstdDecompressionWriter(object): | |||
|
2153 | 2235 | |
|
2154 | 2236 | def write(self, data): |
|
2155 | 2237 | if self._closed: |
|
2156 |
raise ValueError( |
|
|
2238 | raise ValueError("stream is closed") | |
|
2157 | 2239 | |
|
2158 | 2240 | total_write = 0 |
|
2159 | 2241 | |
|
2160 |
in_buffer = ffi.new( |
|
|
2161 |
out_buffer = ffi.new( |
|
|
2242 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
2243 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
2162 | 2244 | |
|
2163 | 2245 | data_buffer = ffi.from_buffer(data) |
|
2164 | 2246 | in_buffer.src = data_buffer |
|
2165 | 2247 | in_buffer.size = len(data_buffer) |
|
2166 | 2248 | in_buffer.pos = 0 |
|
2167 | 2249 | |
|
2168 |
dst_buffer = ffi.new( |
|
|
2250 | dst_buffer = ffi.new("char[]", self._write_size) | |
|
2169 | 2251 | out_buffer.dst = dst_buffer |
|
2170 | 2252 | out_buffer.size = len(dst_buffer) |
|
2171 | 2253 | out_buffer.pos = 0 |
@@ -2175,8 +2257,7 class ZstdDecompressionWriter(object): | |||
|
2175 | 2257 | while in_buffer.pos < in_buffer.size: |
|
2176 | 2258 | zresult = lib.ZSTD_decompressStream(dctx, out_buffer, in_buffer) |
|
2177 | 2259 | if lib.ZSTD_isError(zresult): |
|
2178 |
raise ZstdError( |
|
|
2179 | _zstd_error(zresult)) | |
|
2260 | raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult)) | |
|
2180 | 2261 | |
|
2181 | 2262 | if out_buffer.pos: |
|
2182 | 2263 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
@@ -2206,8 +2287,9 class ZstdDecompressor(object): | |||
|
2206 | 2287 | try: |
|
2207 | 2288 | self._ensure_dctx() |
|
2208 | 2289 | finally: |
|
2209 |
self._dctx = ffi.gc( |
|
|
2210 |
|
|
|
2290 | self._dctx = ffi.gc( | |
|
2291 | dctx, lib.ZSTD_freeDCtx, size=lib.ZSTD_sizeof_DCtx(dctx) | |
|
2292 | ) | |
|
2211 | 2293 | |
|
2212 | 2294 | def memory_size(self): |
|
2213 | 2295 | return lib.ZSTD_sizeof_DCtx(self._dctx) |
@@ -2220,85 +2302,96 class ZstdDecompressor(object): | |||
|
2220 | 2302 | output_size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer)) |
|
2221 | 2303 | |
|
2222 | 2304 | if output_size == lib.ZSTD_CONTENTSIZE_ERROR: |
|
2223 |
raise ZstdError( |
|
|
2305 | raise ZstdError("error determining content size from frame header") | |
|
2224 | 2306 | elif output_size == 0: |
|
2225 |
return b |
|
|
2307 | return b"" | |
|
2226 | 2308 | elif output_size == lib.ZSTD_CONTENTSIZE_UNKNOWN: |
|
2227 | 2309 | if not max_output_size: |
|
2228 |
raise ZstdError( |
|
|
2229 | ||
|
2230 |
result_buffer = ffi.new( |
|
|
2310 | raise ZstdError("could not determine content size in frame header") | |
|
2311 | ||
|
2312 | result_buffer = ffi.new("char[]", max_output_size) | |
|
2231 | 2313 | result_size = max_output_size |
|
2232 | 2314 | output_size = 0 |
|
2233 | 2315 | else: |
|
2234 |
result_buffer = ffi.new( |
|
|
2316 | result_buffer = ffi.new("char[]", output_size) | |
|
2235 | 2317 | result_size = output_size |
|
2236 | 2318 | |
|
2237 |
out_buffer = ffi.new( |
|
|
2319 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
2238 | 2320 | out_buffer.dst = result_buffer |
|
2239 | 2321 | out_buffer.size = result_size |
|
2240 | 2322 | out_buffer.pos = 0 |
|
2241 | 2323 | |
|
2242 |
in_buffer = ffi.new( |
|
|
2324 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
2243 | 2325 | in_buffer.src = data_buffer |
|
2244 | 2326 | in_buffer.size = len(data_buffer) |
|
2245 | 2327 | in_buffer.pos = 0 |
|
2246 | 2328 | |
|
2247 | 2329 | zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) |
|
2248 | 2330 | if lib.ZSTD_isError(zresult): |
|
2249 |
raise ZstdError( |
|
|
2250 | _zstd_error(zresult)) | |
|
2331 | raise ZstdError("decompression error: %s" % _zstd_error(zresult)) | |
|
2251 | 2332 | elif zresult: |
|
2252 |
raise ZstdError( |
|
|
2333 | raise ZstdError("decompression error: did not decompress full frame") | |
|
2253 | 2334 | elif output_size and out_buffer.pos != output_size: |
|
2254 | raise ZstdError('decompression error: decompressed %d bytes; expected %d' % | |
|
2255 | (zresult, output_size)) | |
|
2335 | raise ZstdError( | |
|
2336 | "decompression error: decompressed %d bytes; expected %d" | |
|
2337 | % (zresult, output_size) | |
|
2338 | ) | |
|
2256 | 2339 | |
|
2257 | 2340 | return ffi.buffer(result_buffer, out_buffer.pos)[:] |
|
2258 | 2341 | |
|
2259 | def stream_reader(self, source, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
2260 | read_across_frames=False): | |
|
2342 | def stream_reader( | |
|
2343 | self, | |
|
2344 | source, | |
|
2345 | read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
2346 | read_across_frames=False, | |
|
2347 | ): | |
|
2261 | 2348 | self._ensure_dctx() |
|
2262 | 2349 | return ZstdDecompressionReader(self, source, read_size, read_across_frames) |
|
2263 | 2350 | |
|
2264 | 2351 | def decompressobj(self, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): |
|
2265 | 2352 | if write_size < 1: |
|
2266 |
raise ValueError( |
|
|
2353 | raise ValueError("write_size must be positive") | |
|
2267 | 2354 | |
|
2268 | 2355 | self._ensure_dctx() |
|
2269 | 2356 | return ZstdDecompressionObj(self, write_size=write_size) |
|
2270 | 2357 | |
|
2271 | def read_to_iter(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
2272 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
2273 | skip_bytes=0): | |
|
2358 | def read_to_iter( | |
|
2359 | self, | |
|
2360 | reader, | |
|
2361 | read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
2362 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
2363 | skip_bytes=0, | |
|
2364 | ): | |
|
2274 | 2365 | if skip_bytes >= read_size: |
|
2275 |
raise ValueError( |
|
|
2276 | ||
|
2277 |
if hasattr(reader, |
|
|
2366 | raise ValueError("skip_bytes must be smaller than read_size") | |
|
2367 | ||
|
2368 | if hasattr(reader, "read"): | |
|
2278 | 2369 | have_read = True |
|
2279 |
elif hasattr(reader, |
|
|
2370 | elif hasattr(reader, "__getitem__"): | |
|
2280 | 2371 | have_read = False |
|
2281 | 2372 | buffer_offset = 0 |
|
2282 | 2373 | size = len(reader) |
|
2283 | 2374 | else: |
|
2284 | raise ValueError('must pass an object with a read() method or ' | |
|
2285 | 'conforms to buffer protocol') | |
|
2375 | raise ValueError( | |
|
2376 | "must pass an object with a read() method or " | |
|
2377 | "conforms to buffer protocol" | |
|
2378 | ) | |
|
2286 | 2379 | |
|
2287 | 2380 | if skip_bytes: |
|
2288 | 2381 | if have_read: |
|
2289 | 2382 | reader.read(skip_bytes) |
|
2290 | 2383 | else: |
|
2291 | 2384 | if skip_bytes > size: |
|
2292 |
raise ValueError( |
|
|
2385 | raise ValueError("skip_bytes larger than first input chunk") | |
|
2293 | 2386 | |
|
2294 | 2387 | buffer_offset = skip_bytes |
|
2295 | 2388 | |
|
2296 | 2389 | self._ensure_dctx() |
|
2297 | 2390 | |
|
2298 |
in_buffer = ffi.new( |
|
|
2299 |
out_buffer = ffi.new( |
|
|
2300 | ||
|
2301 |
dst_buffer = ffi.new( |
|
|
2391 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
2392 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
2393 | ||
|
2394 | dst_buffer = ffi.new("char[]", write_size) | |
|
2302 | 2395 | out_buffer.dst = dst_buffer |
|
2303 | 2396 | out_buffer.size = len(dst_buffer) |
|
2304 | 2397 | out_buffer.pos = 0 |
@@ -2311,7 +2404,7 class ZstdDecompressor(object): | |||
|
2311 | 2404 | else: |
|
2312 | 2405 | remaining = size - buffer_offset |
|
2313 | 2406 | slice_size = min(remaining, read_size) |
|
2314 | read_result = reader[buffer_offset:buffer_offset + slice_size] | |
|
2407 | read_result = reader[buffer_offset : buffer_offset + slice_size] | |
|
2315 | 2408 | buffer_offset += slice_size |
|
2316 | 2409 | |
|
2317 | 2410 | # No new input. Break out of read loop. |
@@ -2330,8 +2423,7 class ZstdDecompressor(object): | |||
|
2330 | 2423 | |
|
2331 | 2424 | zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) |
|
2332 | 2425 | if lib.ZSTD_isError(zresult): |
|
2333 |
raise ZstdError( |
|
|
2334 | _zstd_error(zresult)) | |
|
2426 | raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult)) | |
|
2335 | 2427 | |
|
2336 | 2428 | if out_buffer.pos: |
|
2337 | 2429 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] |
@@ -2348,30 +2440,37 class ZstdDecompressor(object): | |||
|
2348 | 2440 | |
|
2349 | 2441 | read_from = read_to_iter |
|
2350 | 2442 | |
|
2351 | def stream_writer(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
2352 | write_return_read=False): | |
|
2353 | if not hasattr(writer, 'write'): | |
|
2354 | raise ValueError('must pass an object with a write() method') | |
|
2355 | ||
|
2356 | return ZstdDecompressionWriter(self, writer, write_size, | |
|
2357 | write_return_read) | |
|
2443 | def stream_writer( | |
|
2444 | self, | |
|
2445 | writer, | |
|
2446 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
2447 | write_return_read=False, | |
|
2448 | ): | |
|
2449 | if not hasattr(writer, "write"): | |
|
2450 | raise ValueError("must pass an object with a write() method") | |
|
2451 | ||
|
2452 | return ZstdDecompressionWriter(self, writer, write_size, write_return_read) | |
|
2358 | 2453 | |
|
2359 | 2454 | write_to = stream_writer |
|
2360 | 2455 | |
|
2361 |
def copy_stream( |
|
|
2362 | read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
2363 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
|
2364 | if not hasattr(ifh, 'read'): | |
|
2365 | raise ValueError('first argument must have a read() method') | |
|
2366 | if not hasattr(ofh, 'write'): | |
|
2367 | raise ValueError('second argument must have a write() method') | |
|
2456 | def copy_stream( | |
|
2457 | self, | |
|
2458 | ifh, | |
|
2459 | ofh, | |
|
2460 | read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
|
2461 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
|
2462 | ): | |
|
2463 | if not hasattr(ifh, "read"): | |
|
2464 | raise ValueError("first argument must have a read() method") | |
|
2465 | if not hasattr(ofh, "write"): | |
|
2466 | raise ValueError("second argument must have a write() method") | |
|
2368 | 2467 | |
|
2369 | 2468 | self._ensure_dctx() |
|
2370 | 2469 | |
|
2371 |
in_buffer = ffi.new( |
|
|
2372 |
out_buffer = ffi.new( |
|
|
2373 | ||
|
2374 |
dst_buffer = ffi.new( |
|
|
2470 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
2471 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
2472 | ||
|
2473 | dst_buffer = ffi.new("char[]", write_size) | |
|
2375 | 2474 | out_buffer.dst = dst_buffer |
|
2376 | 2475 | out_buffer.size = write_size |
|
2377 | 2476 | out_buffer.pos = 0 |
@@ -2394,8 +2493,9 class ZstdDecompressor(object): | |||
|
2394 | 2493 | while in_buffer.pos < in_buffer.size: |
|
2395 | 2494 | zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) |
|
2396 | 2495 | if lib.ZSTD_isError(zresult): |
|
2397 |
raise ZstdError( |
|
|
2398 |
|
|
|
2496 | raise ZstdError( | |
|
2497 | "zstd decompressor error: %s" % _zstd_error(zresult) | |
|
2498 | ) | |
|
2399 | 2499 | |
|
2400 | 2500 | if out_buffer.pos: |
|
2401 | 2501 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
@@ -2408,48 +2508,47 class ZstdDecompressor(object): | |||
|
2408 | 2508 | |
|
2409 | 2509 | def decompress_content_dict_chain(self, frames): |
|
2410 | 2510 | if not isinstance(frames, list): |
|
2411 |
raise TypeError( |
|
|
2511 | raise TypeError("argument must be a list") | |
|
2412 | 2512 | |
|
2413 | 2513 | if not frames: |
|
2414 |
raise ValueError( |
|
|
2514 | raise ValueError("empty input chain") | |
|
2415 | 2515 | |
|
2416 | 2516 | # First chunk should not be using a dictionary. We handle it specially. |
|
2417 | 2517 | chunk = frames[0] |
|
2418 | 2518 | if not isinstance(chunk, bytes_type): |
|
2419 |
raise ValueError( |
|
|
2519 | raise ValueError("chunk 0 must be bytes") | |
|
2420 | 2520 | |
|
2421 | 2521 | # All chunks should be zstd frames and should have content size set. |
|
2422 | 2522 | chunk_buffer = ffi.from_buffer(chunk) |
|
2423 |
params = ffi.new( |
|
|
2523 | params = ffi.new("ZSTD_frameHeader *") | |
|
2424 | 2524 | zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer)) |
|
2425 | 2525 | if lib.ZSTD_isError(zresult): |
|
2426 |
raise ValueError( |
|
|
2526 | raise ValueError("chunk 0 is not a valid zstd frame") | |
|
2427 | 2527 | elif zresult: |
|
2428 |
raise ValueError( |
|
|
2528 | raise ValueError("chunk 0 is too small to contain a zstd frame") | |
|
2429 | 2529 | |
|
2430 | 2530 | if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN: |
|
2431 |
raise ValueError( |
|
|
2531 | raise ValueError("chunk 0 missing content size in frame") | |
|
2432 | 2532 | |
|
2433 | 2533 | self._ensure_dctx(load_dict=False) |
|
2434 | 2534 | |
|
2435 |
last_buffer = ffi.new( |
|
|
2436 | ||
|
2437 |
out_buffer = ffi.new( |
|
|
2535 | last_buffer = ffi.new("char[]", params.frameContentSize) | |
|
2536 | ||
|
2537 | out_buffer = ffi.new("ZSTD_outBuffer *") | |
|
2438 | 2538 | out_buffer.dst = last_buffer |
|
2439 | 2539 | out_buffer.size = len(last_buffer) |
|
2440 | 2540 | out_buffer.pos = 0 |
|
2441 | 2541 | |
|
2442 |
in_buffer = ffi.new( |
|
|
2542 | in_buffer = ffi.new("ZSTD_inBuffer *") | |
|
2443 | 2543 | in_buffer.src = chunk_buffer |
|
2444 | 2544 | in_buffer.size = len(chunk_buffer) |
|
2445 | 2545 | in_buffer.pos = 0 |
|
2446 | 2546 | |
|
2447 | 2547 | zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) |
|
2448 | 2548 | if lib.ZSTD_isError(zresult): |
|
2449 |
raise ZstdError( |
|
|
2450 | _zstd_error(zresult)) | |
|
2549 | raise ZstdError("could not decompress chunk 0: %s" % _zstd_error(zresult)) | |
|
2451 | 2550 | elif zresult: |
|
2452 |
raise ZstdError( |
|
|
2551 | raise ZstdError("chunk 0 did not decompress full frame") | |
|
2453 | 2552 | |
|
2454 | 2553 | # Special case of chain length of 1 |
|
2455 | 2554 | if len(frames) == 1: |
@@ -2459,19 +2558,19 class ZstdDecompressor(object): | |||
|
2459 | 2558 | while i < len(frames): |
|
2460 | 2559 | chunk = frames[i] |
|
2461 | 2560 | if not isinstance(chunk, bytes_type): |
|
2462 |
raise ValueError( |
|
|
2561 | raise ValueError("chunk %d must be bytes" % i) | |
|
2463 | 2562 | |
|
2464 | 2563 | chunk_buffer = ffi.from_buffer(chunk) |
|
2465 | 2564 | zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer)) |
|
2466 | 2565 | if lib.ZSTD_isError(zresult): |
|
2467 |
raise ValueError( |
|
|
2566 | raise ValueError("chunk %d is not a valid zstd frame" % i) | |
|
2468 | 2567 | elif zresult: |
|
2469 |
raise ValueError( |
|
|
2568 | raise ValueError("chunk %d is too small to contain a zstd frame" % i) | |
|
2470 | 2569 | |
|
2471 | 2570 | if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN: |
|
2472 |
raise ValueError( |
|
|
2473 | ||
|
2474 |
dest_buffer = ffi.new( |
|
|
2571 | raise ValueError("chunk %d missing content size in frame" % i) | |
|
2572 | ||
|
2573 | dest_buffer = ffi.new("char[]", params.frameContentSize) | |
|
2475 | 2574 | |
|
2476 | 2575 | out_buffer.dst = dest_buffer |
|
2477 | 2576 | out_buffer.size = len(dest_buffer) |
@@ -2483,10 +2582,11 class ZstdDecompressor(object): | |||
|
2483 | 2582 | |
|
2484 | 2583 | zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer) |
|
2485 | 2584 | if lib.ZSTD_isError(zresult): |
|
2486 |
raise ZstdError( |
|
|
2487 |
|
|
|
2585 | raise ZstdError( | |
|
2586 | "could not decompress chunk %d: %s" % _zstd_error(zresult) | |
|
2587 | ) | |
|
2488 | 2588 | elif zresult: |
|
2489 |
raise ZstdError( |
|
|
2589 | raise ZstdError("chunk %d did not decompress full frame" % i) | |
|
2490 | 2590 | |
|
2491 | 2591 | last_buffer = dest_buffer |
|
2492 | 2592 | i += 1 |
@@ -2497,19 +2597,19 class ZstdDecompressor(object): | |||
|
2497 | 2597 | lib.ZSTD_DCtx_reset(self._dctx, lib.ZSTD_reset_session_only) |
|
2498 | 2598 | |
|
2499 | 2599 | if self._max_window_size: |
|
2500 | zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx, | |
|
2501 | self._max_window_size) | |
|
2600 | zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx, self._max_window_size) | |
|
2502 | 2601 | if lib.ZSTD_isError(zresult): |
|
2503 |
raise ZstdError( |
|
|
2504 |
|
|
|
2602 | raise ZstdError( | |
|
2603 | "unable to set max window size: %s" % _zstd_error(zresult) | |
|
2604 | ) | |
|
2505 | 2605 | |
|
2506 | 2606 | zresult = lib.ZSTD_DCtx_setFormat(self._dctx, self._format) |
|
2507 | 2607 | if lib.ZSTD_isError(zresult): |
|
2508 |
raise ZstdError( |
|
|
2509 | _zstd_error(zresult)) | |
|
2608 | raise ZstdError("unable to set decoding format: %s" % _zstd_error(zresult)) | |
|
2510 | 2609 | |
|
2511 | 2610 | if self._dict_data and load_dict: |
|
2512 | 2611 | zresult = lib.ZSTD_DCtx_refDDict(self._dctx, self._dict_data._ddict) |
|
2513 | 2612 | if lib.ZSTD_isError(zresult): |
|
2514 | raise ZstdError('unable to reference prepared dictionary: %s' % | |
|
2515 |
|
|
|
2613 | raise ZstdError( | |
|
2614 | "unable to reference prepared dictionary: %s" % _zstd_error(zresult) | |
|
2615 | ) |
@@ -210,7 +210,7 void zstd_module_init(PyObject* m) { | |||
|
210 | 210 | We detect this mismatch here and refuse to load the module if this |
|
211 | 211 | scenario is detected. |
|
212 | 212 | */ |
|
213 |
if (ZSTD_VERSION_NUMBER != 1040 |
|
|
213 | if (ZSTD_VERSION_NUMBER != 10404 || ZSTD_versionNumber() != 10404) { | |
|
214 | 214 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); |
|
215 | 215 | return; |
|
216 | 216 | } |
@@ -164,7 +164,7 MEM_STATIC unsigned BIT_highbit32 (U32 v | |||
|
164 | 164 | _BitScanReverse ( &r, val ); |
|
165 | 165 | return (unsigned) r; |
|
166 | 166 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ |
|
167 |
return |
|
|
167 | return __builtin_clz (val) ^ 31; | |
|
168 | 168 | # elif defined(__ICCARM__) /* IAR Intrinsic */ |
|
169 | 169 | return 31 - __CLZ(val); |
|
170 | 170 | # else /* Software version */ |
@@ -244,9 +244,9 MEM_STATIC void BIT_flushBitsFast(BIT_CS | |||
|
244 | 244 | { |
|
245 | 245 | size_t const nbBytes = bitC->bitPos >> 3; |
|
246 | 246 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
|
247 | assert(bitC->ptr <= bitC->endPtr); | |
|
247 | 248 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
|
248 | 249 | bitC->ptr += nbBytes; |
|
249 | assert(bitC->ptr <= bitC->endPtr); | |
|
250 | 250 | bitC->bitPos &= 7; |
|
251 | 251 | bitC->bitContainer >>= nbBytes*8; |
|
252 | 252 | } |
@@ -260,6 +260,7 MEM_STATIC void BIT_flushBits(BIT_CStrea | |||
|
260 | 260 | { |
|
261 | 261 | size_t const nbBytes = bitC->bitPos >> 3; |
|
262 | 262 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
|
263 | assert(bitC->ptr <= bitC->endPtr); | |
|
263 | 264 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
|
264 | 265 | bitC->ptr += nbBytes; |
|
265 | 266 | if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; |
@@ -61,6 +61,13 | |||
|
61 | 61 | # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR |
|
62 | 62 | #endif |
|
63 | 63 | |
|
64 | /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ | |
|
65 | #if defined(__GNUC__) | |
|
66 | # define UNUSED_ATTR __attribute__((unused)) | |
|
67 | #else | |
|
68 | # define UNUSED_ATTR | |
|
69 | #endif | |
|
70 | ||
|
64 | 71 | /* force no inlining */ |
|
65 | 72 | #ifdef _MSC_VER |
|
66 | 73 | # define FORCE_NOINLINE static __declspec(noinline) |
@@ -127,9 +134,14 | |||
|
127 | 134 | } \ |
|
128 | 135 | } |
|
129 | 136 | |
|
130 |
/* vectorization |
|
|
137 | /* vectorization | |
|
138 | * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ | |
|
131 | 139 | #if !defined(__clang__) && defined(__GNUC__) |
|
132 | # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) | |
|
140 | # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) | |
|
141 | # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) | |
|
142 | # else | |
|
143 | # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") | |
|
144 | # endif | |
|
133 | 145 | #else |
|
134 | 146 | # define DONT_VECTORIZE |
|
135 | 147 | #endif |
@@ -308,7 +308,7 If there is an error, the function will | |||
|
308 | 308 | *******************************************/ |
|
309 | 309 | /* FSE buffer bounds */ |
|
310 | 310 | #define FSE_NCOUNTBOUND 512 |
|
311 | #define FSE_BLOCKBOUND(size) (size + (size>>7)) | |
|
311 | #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) | |
|
312 | 312 | #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ |
|
313 | 313 | |
|
314 | 314 | /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ |
@@ -52,7 +52,9 | |||
|
52 | 52 | #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ |
|
53 | 53 | |
|
54 | 54 | /* check and forward error code */ |
|
55 | #ifndef CHECK_F | |
|
55 | 56 | #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } |
|
57 | #endif | |
|
56 | 58 | |
|
57 | 59 | |
|
58 | 60 | /* ************************************************************** |
@@ -47,6 +47,79 extern "C" { | |||
|
47 | 47 | #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } |
|
48 | 48 | MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } |
|
49 | 49 | |
|
50 | /* detects whether we are being compiled under msan */ | |
|
51 | #if defined (__has_feature) | |
|
52 | # if __has_feature(memory_sanitizer) | |
|
53 | # define MEMORY_SANITIZER 1 | |
|
54 | # endif | |
|
55 | #endif | |
|
56 | ||
|
57 | #if defined (MEMORY_SANITIZER) | |
|
58 | /* Not all platforms that support msan provide sanitizers/msan_interface.h. | |
|
59 | * We therefore declare the functions we need ourselves, rather than trying to | |
|
60 | * include the header file... */ | |
|
61 | ||
|
62 | #include <stdint.h> /* intptr_t */ | |
|
63 | ||
|
64 | /* Make memory region fully initialized (without changing its contents). */ | |
|
65 | void __msan_unpoison(const volatile void *a, size_t size); | |
|
66 | ||
|
67 | /* Make memory region fully uninitialized (without changing its contents). | |
|
68 | This is a legacy interface that does not update origin information. Use | |
|
69 | __msan_allocated_memory() instead. */ | |
|
70 | void __msan_poison(const volatile void *a, size_t size); | |
|
71 | ||
|
72 | /* Returns the offset of the first (at least partially) poisoned byte in the | |
|
73 | memory range, or -1 if the whole range is good. */ | |
|
74 | intptr_t __msan_test_shadow(const volatile void *x, size_t size); | |
|
75 | #endif | |
|
76 | ||
|
77 | /* detects whether we are being compiled under asan */ | |
|
78 | #if defined (__has_feature) | |
|
79 | # if __has_feature(address_sanitizer) | |
|
80 | # define ADDRESS_SANITIZER 1 | |
|
81 | # endif | |
|
82 | #elif defined(__SANITIZE_ADDRESS__) | |
|
83 | # define ADDRESS_SANITIZER 1 | |
|
84 | #endif | |
|
85 | ||
|
86 | #if defined (ADDRESS_SANITIZER) | |
|
87 | /* Not all platforms that support asan provide sanitizers/asan_interface.h. | |
|
88 | * We therefore declare the functions we need ourselves, rather than trying to | |
|
89 | * include the header file... */ | |
|
90 | ||
|
91 | /** | |
|
92 | * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable. | |
|
93 | * | |
|
94 | * This memory must be previously allocated by your program. Instrumented | |
|
95 | * code is forbidden from accessing addresses in this region until it is | |
|
96 | * unpoisoned. This function is not guaranteed to poison the entire region - | |
|
97 | * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan | |
|
98 | * alignment restrictions. | |
|
99 | * | |
|
100 | * \note This function is not thread-safe because no two threads can poison or | |
|
101 | * unpoison memory in the same memory region simultaneously. | |
|
102 | * | |
|
103 | * \param addr Start of memory region. | |
|
104 | * \param size Size of memory region. */ | |
|
105 | void __asan_poison_memory_region(void const volatile *addr, size_t size); | |
|
106 | ||
|
107 | /** | |
|
108 | * Marks a memory region (<c>[addr, addr+size)</c>) as addressable. | |
|
109 | * | |
|
110 | * This memory must be previously allocated by your program. Accessing | |
|
111 | * addresses in this region is allowed until this region is poisoned again. | |
|
112 | * This function could unpoison a super-region of <c>[addr, addr+size)</c> due | |
|
113 | * to ASan alignment restrictions. | |
|
114 | * | |
|
115 | * \note This function is not thread-safe because no two threads can | |
|
116 | * poison or unpoison memory in the same memory region simultaneously. | |
|
117 | * | |
|
118 | * \param addr Start of memory region. | |
|
119 | * \param size Size of memory region. */ | |
|
120 | void __asan_unpoison_memory_region(void const volatile *addr, size_t size); | |
|
121 | #endif | |
|
122 | ||
|
50 | 123 | |
|
51 | 124 | /*-************************************************************** |
|
52 | 125 | * Basic Types |
@@ -127,9 +127,13 POOL_ctx* POOL_create_advanced(size_t nu | |||
|
127 | 127 | ctx->queueTail = 0; |
|
128 | 128 | ctx->numThreadsBusy = 0; |
|
129 | 129 | ctx->queueEmpty = 1; |
|
130 | (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); | |
|
131 | (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); | |
|
132 |
|
|
|
130 | { | |
|
131 | int error = 0; | |
|
132 | error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); | |
|
133 | error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); | |
|
134 | error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); | |
|
135 | if (error) { POOL_free(ctx); return NULL; } | |
|
136 | } | |
|
133 | 137 | ctx->shutdown = 0; |
|
134 | 138 | /* Allocate space for the thread handles */ |
|
135 | 139 | ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); |
@@ -14,6 +14,8 | |||
|
14 | 14 | * This file will hold wrapper for systems, which do not support pthreads |
|
15 | 15 | */ |
|
16 | 16 | |
|
17 | #include "threading.h" | |
|
18 | ||
|
17 | 19 | /* create fake symbol to avoid empty translation unit warning */ |
|
18 | 20 | int g_ZSTD_threading_useless_symbol; |
|
19 | 21 | |
@@ -28,7 +30,6 int g_ZSTD_threading_useless_symbol; | |||
|
28 | 30 | /* === Dependencies === */ |
|
29 | 31 | #include <process.h> |
|
30 | 32 | #include <errno.h> |
|
31 | #include "threading.h" | |
|
32 | 33 | |
|
33 | 34 | |
|
34 | 35 | /* === Implementation === */ |
@@ -73,3 +74,47 int ZSTD_pthread_join(ZSTD_pthread_t thr | |||
|
73 | 74 | } |
|
74 | 75 | |
|
75 | 76 | #endif /* ZSTD_MULTITHREAD */ |
|
77 | ||
|
78 | #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) | |
|
79 | ||
|
80 | #include <stdlib.h> | |
|
81 | ||
|
82 | int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) | |
|
83 | { | |
|
84 | *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); | |
|
85 | if (!*mutex) | |
|
86 | return 1; | |
|
87 | return pthread_mutex_init(*mutex, attr); | |
|
88 | } | |
|
89 | ||
|
90 | int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) | |
|
91 | { | |
|
92 | if (!*mutex) | |
|
93 | return 0; | |
|
94 | { | |
|
95 | int const ret = pthread_mutex_destroy(*mutex); | |
|
96 | free(*mutex); | |
|
97 | return ret; | |
|
98 | } | |
|
99 | } | |
|
100 | ||
|
101 | int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) | |
|
102 | { | |
|
103 | *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t)); | |
|
104 | if (!*cond) | |
|
105 | return 1; | |
|
106 | return pthread_cond_init(*cond, attr); | |
|
107 | } | |
|
108 | ||
|
109 | int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) | |
|
110 | { | |
|
111 | if (!*cond) | |
|
112 | return 0; | |
|
113 | { | |
|
114 | int const ret = pthread_cond_destroy(*cond); | |
|
115 | free(*cond); | |
|
116 | return ret; | |
|
117 | } | |
|
118 | } | |
|
119 | ||
|
120 | #endif |
@@ -13,6 +13,8 | |||
|
13 | 13 | #ifndef THREADING_H_938743 |
|
14 | 14 | #define THREADING_H_938743 |
|
15 | 15 | |
|
16 | #include "debug.h" | |
|
17 | ||
|
16 | 18 | #if defined (__cplusplus) |
|
17 | 19 | extern "C" { |
|
18 | 20 | #endif |
@@ -75,10 +77,12 int ZSTD_pthread_join(ZSTD_pthread_t thr | |||
|
75 | 77 | */ |
|
76 | 78 | |
|
77 | 79 | |
|
78 | #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ | |
|
80 | #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ | |
|
79 | 81 | /* === POSIX Systems === */ |
|
80 | 82 | # include <pthread.h> |
|
81 | 83 | |
|
84 | #if DEBUGLEVEL < 1 | |
|
85 | ||
|
82 | 86 | #define ZSTD_pthread_mutex_t pthread_mutex_t |
|
83 | 87 | #define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) |
|
84 | 88 | #define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) |
@@ -96,6 +100,33 int ZSTD_pthread_join(ZSTD_pthread_t thr | |||
|
96 | 100 | #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) |
|
97 | 101 | #define ZSTD_pthread_join(a, b) pthread_join((a),(b)) |
|
98 | 102 | |
|
103 | #else /* DEBUGLEVEL >= 1 */ | |
|
104 | ||
|
105 | /* Debug implementation of threading. | |
|
106 | * In this implementation we use pointers for mutexes and condition variables. | |
|
107 | * This way, if we forget to init/destroy them the program will crash or ASAN | |
|
108 | * will report leaks. | |
|
109 | */ | |
|
110 | ||
|
111 | #define ZSTD_pthread_mutex_t pthread_mutex_t* | |
|
112 | int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); | |
|
113 | int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); | |
|
114 | #define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) | |
|
115 | #define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) | |
|
116 | ||
|
117 | #define ZSTD_pthread_cond_t pthread_cond_t* | |
|
118 | int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); | |
|
119 | int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); | |
|
120 | #define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) | |
|
121 | #define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) | |
|
122 | #define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) | |
|
123 | ||
|
124 | #define ZSTD_pthread_t pthread_t | |
|
125 | #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) | |
|
126 | #define ZSTD_pthread_join(a, b) pthread_join((a),(b)) | |
|
127 | ||
|
128 | #endif | |
|
129 | ||
|
99 | 130 | #else /* ZSTD_MULTITHREAD not defined */ |
|
100 | 131 | /* No multithreading support */ |
|
101 | 132 |
@@ -197,79 +197,56 static void ZSTD_copy8(void* dst, const | |||
|
197 | 197 | static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } |
|
198 | 198 | #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } |
|
199 | 199 | |
|
200 |
#define WILDCOPY_OVERLENGTH |
|
|
201 | #define VECLEN 16 | |
|
200 | #define WILDCOPY_OVERLENGTH 32 | |
|
201 | #define WILDCOPY_VECLEN 16 | |
|
202 | 202 | |
|
203 | 203 | typedef enum { |
|
204 | 204 | ZSTD_no_overlap, |
|
205 |
ZSTD_overlap_src_before_dst |
|
|
205 | ZSTD_overlap_src_before_dst | |
|
206 | 206 | /* ZSTD_overlap_dst_before_src, */ |
|
207 | 207 | } ZSTD_overlap_e; |
|
208 | 208 | |
|
209 | 209 | /*! ZSTD_wildcopy() : |
|
210 |
* |
|
|
210 | * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) | |
|
211 | * @param ovtype controls the overlap detection | |
|
212 | * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. | |
|
213 | * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. | |
|
214 | * The src buffer must be before the dst buffer. | |
|
215 | */ | |
|
211 | 216 | MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE |
|
212 | void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) | |
|
217 | void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) | |
|
213 | 218 | { |
|
214 | 219 | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; |
|
215 | 220 | const BYTE* ip = (const BYTE*)src; |
|
216 | 221 | BYTE* op = (BYTE*)dst; |
|
217 | 222 | BYTE* const oend = op + length; |
|
218 | 223 | |
|
219 |
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < - |
|
|
220 | if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { | |
|
221 | do | |
|
222 | COPY8(op, ip) | |
|
223 | while (op < oend); | |
|
224 | } | |
|
225 | else { | |
|
226 | if ((length & 8) == 0) | |
|
227 | COPY8(op, ip); | |
|
228 | do { | |
|
229 | COPY16(op, ip); | |
|
230 | } | |
|
231 | while (op < oend); | |
|
232 | } | |
|
233 | } | |
|
234 | ||
|
235 | /*! ZSTD_wildcopy_16min() : | |
|
236 | * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ | |
|
237 | MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE | |
|
238 | void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) | |
|
239 | { | |
|
240 | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | |
|
241 | const BYTE* ip = (const BYTE*)src; | |
|
242 | BYTE* op = (BYTE*)dst; | |
|
243 | BYTE* const oend = op + length; | |
|
224 | assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); | |
|
244 | 225 | |
|
245 | assert(length >= 8); | |
|
246 | assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); | |
|
247 | ||
|
248 | if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { | |
|
249 | do | |
|
250 | COPY8(op, ip) | |
|
251 | while (op < oend); | |
|
252 | } | |
|
253 | else { | |
|
254 | if ((length & 8) == 0) | |
|
255 | COPY8(op, ip); | |
|
256 | do { | |
|
226 | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | |
|
227 | /* Handle short offset copies. */ | |
|
228 | do { | |
|
229 | COPY8(op, ip) | |
|
230 | } while (op < oend); | |
|
231 | } else { | |
|
232 | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | |
|
233 | /* Separate out the first two COPY16() calls because the copy length is | |
|
234 | * almost certain to be short, so the branches have different | |
|
235 | * probabilities. | |
|
236 | * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%. | |
|
237 | * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%. | |
|
238 | */ | |
|
257 | 239 | COPY16(op, ip); |
|
258 | } | |
|
259 |
|
|
|
240 | COPY16(op, ip); | |
|
241 | if (op >= oend) return; | |
|
242 | do { | |
|
243 | COPY16(op, ip); | |
|
244 | COPY16(op, ip); | |
|
245 | } | |
|
246 | while (op < oend); | |
|
260 | 247 | } |
|
261 | 248 | } |
|
262 | 249 | |
|
263 | MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ | |
|
264 | { | |
|
265 | const BYTE* ip = (const BYTE*)src; | |
|
266 | BYTE* op = (BYTE*)dst; | |
|
267 | BYTE* const oend = (BYTE*)dstEnd; | |
|
268 | do | |
|
269 | COPY8(op, ip) | |
|
270 | while (op < oend); | |
|
271 | } | |
|
272 | ||
|
273 | 250 | |
|
274 | 251 | /*-******************************************* |
|
275 | 252 | * Private declarations |
@@ -323,7 +300,7 MEM_STATIC U32 ZSTD_highbit32(U32 val) | |||
|
323 | 300 | _BitScanReverse(&r, val); |
|
324 | 301 | return (unsigned)r; |
|
325 | 302 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ |
|
326 |
return |
|
|
303 | return __builtin_clz (val) ^ 31; | |
|
327 | 304 | # elif defined(__ICCARM__) /* IAR Intrinsic */ |
|
328 | 305 | return 31 - __CLZ(val); |
|
329 | 306 | # else /* Software version */ |
This diff has been collapsed as it changes many lines, (1099 lines changed) Show them Hide them | |||
@@ -42,15 +42,15 size_t ZSTD_compressBound(size_t srcSize | |||
|
42 | 42 | * Context memory management |
|
43 | 43 | ***************************************/ |
|
44 | 44 | struct ZSTD_CDict_s { |
|
45 | void* dictBuffer; | |
|
46 | 45 | const void* dictContent; |
|
47 | 46 | size_t dictContentSize; |
|
48 | void* workspace; | |
|
49 |
|
|
|
47 | U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ | |
|
48 | ZSTD_cwksp workspace; | |
|
50 | 49 | ZSTD_matchState_t matchState; |
|
51 | 50 | ZSTD_compressedBlockState_t cBlockState; |
|
52 | 51 | ZSTD_customMem customMem; |
|
53 | 52 | U32 dictID; |
|
53 | int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ | |
|
54 | 54 | }; /* typedef'd to ZSTD_CDict within "zstd.h" */ |
|
55 | 55 | |
|
56 | 56 | ZSTD_CCtx* ZSTD_createCCtx(void) |
@@ -84,23 +84,26 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD | |||
|
84 | 84 | |
|
85 | 85 | ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) |
|
86 | 86 | { |
|
87 | ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; | |
|
87 | ZSTD_cwksp ws; | |
|
88 | ZSTD_CCtx* cctx; | |
|
88 | 89 | if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ |
|
89 | 90 | if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ |
|
90 | memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ | |
|
91 | ZSTD_cwksp_init(&ws, workspace, workspaceSize); | |
|
92 | ||
|
93 | cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); | |
|
94 | if (cctx == NULL) { | |
|
95 | return NULL; | |
|
96 | } | |
|
97 | memset(cctx, 0, sizeof(ZSTD_CCtx)); | |
|
98 | ZSTD_cwksp_move(&cctx->workspace, &ws); | |
|
91 | 99 | cctx->staticSize = workspaceSize; |
|
92 | cctx->workSpace = (void*)(cctx+1); | |
|
93 | cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); | |
|
94 | 100 | |
|
95 | 101 | /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ |
|
96 |
if (cctx->work |
|
|
97 | assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ | |
|
98 |
cctx->blockState. |
|
|
99 | cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; | |
|
100 | { | |
|
101 | void* const ptr = cctx->blockState.nextCBlock + 1; | |
|
102 | cctx->entropyWorkspace = (U32*)ptr; | |
|
103 | } | |
|
102 | if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; | |
|
103 | cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); | |
|
104 | cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); | |
|
105 | cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object( | |
|
106 | &cctx->workspace, HUF_WORKSPACE_SIZE); | |
|
104 | 107 | cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); |
|
105 | 108 | return cctx; |
|
106 | 109 | } |
@@ -128,11 +131,11 static void ZSTD_freeCCtxContent(ZSTD_CC | |||
|
128 | 131 | { |
|
129 | 132 | assert(cctx != NULL); |
|
130 | 133 | assert(cctx->staticSize == 0); |
|
131 | ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; | |
|
132 | 134 | ZSTD_clearAllDicts(cctx); |
|
133 | 135 | #ifdef ZSTD_MULTITHREAD |
|
134 | 136 | ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; |
|
135 | 137 | #endif |
|
138 | ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); | |
|
136 | 139 | } |
|
137 | 140 | |
|
138 | 141 | size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) |
@@ -140,8 +143,13 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) | |||
|
140 | 143 | if (cctx==NULL) return 0; /* support free on NULL */ |
|
141 | 144 | RETURN_ERROR_IF(cctx->staticSize, memory_allocation, |
|
142 | 145 | "not compatible with static CCtx"); |
|
143 | ZSTD_freeCCtxContent(cctx); | |
|
144 | ZSTD_free(cctx, cctx->customMem); | |
|
146 | { | |
|
147 | int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); | |
|
148 | ZSTD_freeCCtxContent(cctx); | |
|
149 | if (!cctxInWorkspace) { | |
|
150 | ZSTD_free(cctx, cctx->customMem); | |
|
151 | } | |
|
152 | } | |
|
145 | 153 | return 0; |
|
146 | 154 | } |
|
147 | 155 | |
@@ -160,7 +168,9 static size_t ZSTD_sizeof_mtctx(const ZS | |||
|
160 | 168 | size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) |
|
161 | 169 | { |
|
162 | 170 | if (cctx==NULL) return 0; /* support sizeof on NULL */ |
|
163 | return sizeof(*cctx) + cctx->workSpaceSize | |
|
171 | /* cctx may be in the workspace */ | |
|
172 | return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) | |
|
173 | + ZSTD_cwksp_sizeof(&cctx->workspace) | |
|
164 | 174 | + ZSTD_sizeof_localDict(cctx->localDict) |
|
165 | 175 | + ZSTD_sizeof_mtctx(cctx); |
|
166 | 176 | } |
@@ -229,23 +239,23 size_t ZSTD_CCtxParams_init_advanced(ZST | |||
|
229 | 239 | RETURN_ERROR_IF(!cctxParams, GENERIC); |
|
230 | 240 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); |
|
231 | 241 | memset(cctxParams, 0, sizeof(*cctxParams)); |
|
242 | assert(!ZSTD_checkCParams(params.cParams)); | |
|
232 | 243 | cctxParams->cParams = params.cParams; |
|
233 | 244 | cctxParams->fParams = params.fParams; |
|
234 | 245 | cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ |
|
235 | assert(!ZSTD_checkCParams(params.cParams)); | |
|
236 | 246 | return 0; |
|
237 | 247 | } |
|
238 | 248 | |
|
239 | 249 | /* ZSTD_assignParamsToCCtxParams() : |
|
240 | 250 | * params is presumed valid at this stage */ |
|
241 | 251 | static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( |
|
242 | ZSTD_CCtx_params cctxParams, ZSTD_parameters params) | |
|
252 | const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) | |
|
243 | 253 | { |
|
244 | ZSTD_CCtx_params ret = cctxParams; | |
|
254 | ZSTD_CCtx_params ret = *cctxParams; | |
|
255 | assert(!ZSTD_checkCParams(params.cParams)); | |
|
245 | 256 | ret.cParams = params.cParams; |
|
246 | 257 | ret.fParams = params.fParams; |
|
247 | 258 | ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ |
|
248 | assert(!ZSTD_checkCParams(params.cParams)); | |
|
249 | 259 | return ret; |
|
250 | 260 | } |
|
251 | 261 | |
@@ -378,7 +388,7 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c | |||
|
378 | 388 | case ZSTD_c_forceAttachDict: |
|
379 | 389 | ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); |
|
380 | 390 | bounds.lowerBound = ZSTD_dictDefaultAttach; |
|
381 |
bounds.upperBound = ZSTD_dictForce |
|
|
391 | bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ | |
|
382 | 392 | return bounds; |
|
383 | 393 | |
|
384 | 394 | case ZSTD_c_literalCompressionMode: |
@@ -392,6 +402,11 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c | |||
|
392 | 402 | bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; |
|
393 | 403 | return bounds; |
|
394 | 404 | |
|
405 | case ZSTD_c_srcSizeHint: | |
|
406 | bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; | |
|
407 | bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; | |
|
408 | return bounds; | |
|
409 | ||
|
395 | 410 | default: |
|
396 | 411 | { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; |
|
397 | 412 | return boundError; |
@@ -448,6 +463,7 static int ZSTD_isUpdateAuthorized(ZSTD_ | |||
|
448 | 463 | case ZSTD_c_forceAttachDict: |
|
449 | 464 | case ZSTD_c_literalCompressionMode: |
|
450 | 465 | case ZSTD_c_targetCBlockSize: |
|
466 | case ZSTD_c_srcSizeHint: | |
|
451 | 467 | default: |
|
452 | 468 | return 0; |
|
453 | 469 | } |
@@ -494,6 +510,7 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* | |||
|
494 | 510 | case ZSTD_c_ldmMinMatch: |
|
495 | 511 | case ZSTD_c_ldmBucketSizeLog: |
|
496 | 512 | case ZSTD_c_targetCBlockSize: |
|
513 | case ZSTD_c_srcSizeHint: | |
|
497 | 514 | break; |
|
498 | 515 | |
|
499 | 516 | default: RETURN_ERROR(parameter_unsupported); |
@@ -517,33 +534,33 size_t ZSTD_CCtxParams_setParameter(ZSTD | |||
|
517 | 534 | if (value) { /* 0 : does not change current level */ |
|
518 | 535 | CCtxParams->compressionLevel = value; |
|
519 | 536 | } |
|
520 | if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; | |
|
537 | if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; | |
|
521 | 538 | return 0; /* return type (size_t) cannot represent negative values */ |
|
522 | 539 | } |
|
523 | 540 | |
|
524 | 541 | case ZSTD_c_windowLog : |
|
525 | 542 | if (value!=0) /* 0 => use default */ |
|
526 | 543 | BOUNDCHECK(ZSTD_c_windowLog, value); |
|
527 | CCtxParams->cParams.windowLog = value; | |
|
544 | CCtxParams->cParams.windowLog = (U32)value; | |
|
528 | 545 | return CCtxParams->cParams.windowLog; |
|
529 | 546 | |
|
530 | 547 | case ZSTD_c_hashLog : |
|
531 | 548 | if (value!=0) /* 0 => use default */ |
|
532 | 549 | BOUNDCHECK(ZSTD_c_hashLog, value); |
|
533 | CCtxParams->cParams.hashLog = value; | |
|
550 | CCtxParams->cParams.hashLog = (U32)value; | |
|
534 | 551 | return CCtxParams->cParams.hashLog; |
|
535 | 552 | |
|
536 | 553 | case ZSTD_c_chainLog : |
|
537 | 554 | if (value!=0) /* 0 => use default */ |
|
538 | 555 | BOUNDCHECK(ZSTD_c_chainLog, value); |
|
539 | CCtxParams->cParams.chainLog = value; | |
|
556 | CCtxParams->cParams.chainLog = (U32)value; | |
|
540 | 557 | return CCtxParams->cParams.chainLog; |
|
541 | 558 | |
|
542 | 559 | case ZSTD_c_searchLog : |
|
543 | 560 | if (value!=0) /* 0 => use default */ |
|
544 | 561 | BOUNDCHECK(ZSTD_c_searchLog, value); |
|
545 | CCtxParams->cParams.searchLog = value; | |
|
546 | return value; | |
|
562 | CCtxParams->cParams.searchLog = (U32)value; | |
|
563 | return (size_t)value; | |
|
547 | 564 | |
|
548 | 565 | case ZSTD_c_minMatch : |
|
549 | 566 | if (value!=0) /* 0 => use default */ |
@@ -674,6 +691,12 size_t ZSTD_CCtxParams_setParameter(ZSTD | |||
|
674 | 691 | CCtxParams->targetCBlockSize = value; |
|
675 | 692 | return CCtxParams->targetCBlockSize; |
|
676 | 693 | |
|
694 | case ZSTD_c_srcSizeHint : | |
|
695 | if (value!=0) /* 0 ==> default */ | |
|
696 | BOUNDCHECK(ZSTD_c_srcSizeHint, value); | |
|
697 | CCtxParams->srcSizeHint = value; | |
|
698 | return CCtxParams->srcSizeHint; | |
|
699 | ||
|
677 | 700 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); |
|
678 | 701 | } |
|
679 | 702 | } |
@@ -779,6 +802,9 size_t ZSTD_CCtxParams_getParameter( | |||
|
779 | 802 | case ZSTD_c_targetCBlockSize : |
|
780 | 803 | *value = (int)CCtxParams->targetCBlockSize; |
|
781 | 804 | break; |
|
805 | case ZSTD_c_srcSizeHint : | |
|
806 | *value = (int)CCtxParams->srcSizeHint; | |
|
807 | break; | |
|
782 | 808 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); |
|
783 | 809 | } |
|
784 | 810 | return 0; |
@@ -1029,7 +1055,11 ZSTD_adjustCParams(ZSTD_compressionParam | |||
|
1029 | 1055 | ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( |
|
1030 | 1056 | const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) |
|
1031 | 1057 | { |
|
1032 | ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); | |
|
1058 | ZSTD_compressionParameters cParams; | |
|
1059 | if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { | |
|
1060 | srcSizeHint = CCtxParams->srcSizeHint; | |
|
1061 | } | |
|
1062 | cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); | |
|
1033 | 1063 | if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; |
|
1034 | 1064 | if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; |
|
1035 | 1065 | if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; |
@@ -1049,10 +1079,19 ZSTD_sizeof_matchState(const ZSTD_compre | |||
|
1049 | 1079 | size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); |
|
1050 | 1080 | size_t const hSize = ((size_t)1) << cParams->hashLog; |
|
1051 | 1081 | U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; |
|
1052 | size_t const h3Size = ((size_t)1) << hashLog3; | |
|
1053 | size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); | |
|
1054 | size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32) | |
|
1055 | + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t)); | |
|
1082 | size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; | |
|
1083 | /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't | |
|
1084 | * surrounded by redzones in ASAN. */ | |
|
1085 | size_t const tableSpace = chainSize * sizeof(U32) | |
|
1086 | + hSize * sizeof(U32) | |
|
1087 | + h3Size * sizeof(U32); | |
|
1088 | size_t const optPotentialSpace = | |
|
1089 | ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) | |
|
1090 | + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) | |
|
1091 | + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) | |
|
1092 | + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32)) | |
|
1093 | + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) | |
|
1094 | + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); | |
|
1056 | 1095 | size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) |
|
1057 | 1096 | ? optPotentialSpace |
|
1058 | 1097 | : 0; |
@@ -1069,20 +1108,23 size_t ZSTD_estimateCCtxSize_usingCCtxPa | |||
|
1069 | 1108 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); |
|
1070 | 1109 | U32 const divider = (cParams.minMatch==3) ? 3 : 4; |
|
1071 | 1110 | size_t const maxNbSeq = blockSize / divider; |
|
1072 |
size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize |
|
|
1073 | size_t const entropySpace = HUF_WORKSPACE_SIZE; | |
|
1074 | size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); | |
|
1111 | size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) | |
|
1112 | + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) | |
|
1113 | + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); | |
|
1114 | size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); | |
|
1115 | size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); | |
|
1075 | 1116 | size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); |
|
1076 | 1117 | |
|
1077 | 1118 | size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); |
|
1078 | size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); | |
|
1119 | size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); | |
|
1079 | 1120 | |
|
1080 | 1121 | size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + |
|
1081 | 1122 | matchStateSize + ldmSpace + ldmSeqSpace; |
|
1082 | ||
|
1083 | DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); | |
|
1084 |
DEBUGLOG(5, " |
|
|
1085 | return sizeof(ZSTD_CCtx) + neededSpace; | |
|
1123 | size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); | |
|
1124 | ||
|
1125 | DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace); | |
|
1126 | DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); | |
|
1127 | return cctxSpace + neededSpace; | |
|
1086 | 1128 | } |
|
1087 | 1129 | } |
|
1088 | 1130 | |
@@ -1118,7 +1160,8 size_t ZSTD_estimateCStreamSize_usingCCt | |||
|
1118 | 1160 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); |
|
1119 | 1161 | size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; |
|
1120 | 1162 | size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; |
|
1121 |
size_t const streamingSize = inBuffSize |
|
|
1163 | size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) | |
|
1164 | + ZSTD_cwksp_alloc_size(outBuffSize); | |
|
1122 | 1165 | |
|
1123 | 1166 | return CCtxSize + streamingSize; |
|
1124 | 1167 | } |
@@ -1186,17 +1229,6 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) | |||
|
1186 | 1229 | return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ |
|
1187 | 1230 | } |
|
1188 | 1231 | |
|
1189 | ||
|
1190 | ||
|
1191 | static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, | |
|
1192 | ZSTD_compressionParameters cParams2) | |
|
1193 | { | |
|
1194 | return (cParams1.hashLog == cParams2.hashLog) | |
|
1195 | & (cParams1.chainLog == cParams2.chainLog) | |
|
1196 | & (cParams1.strategy == cParams2.strategy) /* opt parser space */ | |
|
1197 | & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */ | |
|
1198 | } | |
|
1199 | ||
|
1200 | 1232 | static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, |
|
1201 | 1233 | ZSTD_compressionParameters cParams2) |
|
1202 | 1234 | { |
@@ -1211,71 +1243,6 static void ZSTD_assertEqualCParams(ZSTD | |||
|
1211 | 1243 | assert(cParams1.strategy == cParams2.strategy); |
|
1212 | 1244 | } |
|
1213 | 1245 | |
|
1214 | /** The parameters are equivalent if ldm is not enabled in both sets or | |
|
1215 | * all the parameters are equivalent. */ | |
|
1216 | static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, | |
|
1217 | ldmParams_t ldmParams2) | |
|
1218 | { | |
|
1219 | return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || | |
|
1220 | (ldmParams1.enableLdm == ldmParams2.enableLdm && | |
|
1221 | ldmParams1.hashLog == ldmParams2.hashLog && | |
|
1222 | ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && | |
|
1223 | ldmParams1.minMatchLength == ldmParams2.minMatchLength && | |
|
1224 | ldmParams1.hashRateLog == ldmParams2.hashRateLog); | |
|
1225 | } | |
|
1226 | ||
|
1227 | typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; | |
|
1228 | ||
|
1229 | /* ZSTD_sufficientBuff() : | |
|
1230 | * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . | |
|
1231 | * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ | |
|
1232 | static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1, | |
|
1233 | size_t maxNbLit1, | |
|
1234 | ZSTD_buffered_policy_e buffPol2, | |
|
1235 | ZSTD_compressionParameters cParams2, | |
|
1236 | U64 pledgedSrcSize) | |
|
1237 | { | |
|
1238 | size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); | |
|
1239 | size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); | |
|
1240 | size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4); | |
|
1241 | size_t const maxNbLit2 = blockSize2; | |
|
1242 | size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; | |
|
1243 | DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u", | |
|
1244 | (U32)neededBufferSize2, (U32)bufferSize1); | |
|
1245 | DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u", | |
|
1246 | (U32)maxNbSeq2, (U32)maxNbSeq1); | |
|
1247 | DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u", | |
|
1248 | (U32)maxNbLit2, (U32)maxNbLit1); | |
|
1249 | return (maxNbLit2 <= maxNbLit1) | |
|
1250 | & (maxNbSeq2 <= maxNbSeq1) | |
|
1251 | & (neededBufferSize2 <= bufferSize1); | |
|
1252 | } | |
|
1253 | ||
|
1254 | /** Equivalence for resetCCtx purposes */ | |
|
1255 | static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, | |
|
1256 | ZSTD_CCtx_params params2, | |
|
1257 | size_t buffSize1, | |
|
1258 | size_t maxNbSeq1, size_t maxNbLit1, | |
|
1259 | ZSTD_buffered_policy_e buffPol2, | |
|
1260 | U64 pledgedSrcSize) | |
|
1261 | { | |
|
1262 | DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); | |
|
1263 | if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) { | |
|
1264 | DEBUGLOG(4, "ZSTD_equivalentCParams() == 0"); | |
|
1265 | return 0; | |
|
1266 | } | |
|
1267 | if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) { | |
|
1268 | DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0"); | |
|
1269 | return 0; | |
|
1270 | } | |
|
1271 | if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2, | |
|
1272 | params2.cParams, pledgedSrcSize)) { | |
|
1273 | DEBUGLOG(4, "ZSTD_sufficientBuff() == 0"); | |
|
1274 | return 0; | |
|
1275 | } | |
|
1276 | return 1; | |
|
1277 | } | |
|
1278 | ||
|
1279 | 1246 | static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) |
|
1280 | 1247 | { |
|
1281 | 1248 | int i; |
@@ -1301,87 +1268,104 static void ZSTD_invalidateMatchState(ZS | |||
|
1301 | 1268 | ms->dictMatchState = NULL; |
|
1302 | 1269 | } |
|
1303 | 1270 | |
|
1304 | /*! ZSTD_continueCCtx() : | |
|
1305 | * reuse CCtx without reset (note : requires no dictionary) */ | |
|
1306 | static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) | |
|
1307 | { | |
|
1308 | size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); | |
|
1309 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); | |
|
1310 | DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); | |
|
1311 | ||
|
1312 | cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ | |
|
1313 | cctx->appliedParams = params; | |
|
1314 | cctx->blockState.matchState.cParams = params.cParams; | |
|
1315 | cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; | |
|
1316 | cctx->consumedSrcSize = 0; | |
|
1317 | cctx->producedCSize = 0; | |
|
1318 | if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) | |
|
1319 | cctx->appliedParams.fParams.contentSizeFlag = 0; | |
|
1320 | DEBUGLOG(4, "pledged content size : %u ; flag : %u", | |
|
1321 | (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); | |
|
1322 | cctx->stage = ZSTDcs_init; | |
|
1323 | cctx->dictID = 0; | |
|
1324 | if (params.ldmParams.enableLdm) | |
|
1325 | ZSTD_window_clear(&cctx->ldmState.window); | |
|
1326 | ZSTD_referenceExternalSequences(cctx, NULL, 0); | |
|
1327 | ZSTD_invalidateMatchState(&cctx->blockState.matchState); | |
|
1328 | ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); | |
|
1329 | XXH64_reset(&cctx->xxhState, 0); | |
|
1330 | return 0; | |
|
1331 | } | |
|
1332 | ||
|
1333 | typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; | |
|
1334 | ||
|
1335 | typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; | |
|
1336 | ||
|
1337 | static void* | |
|
1271 | /** | |
|
1272 | * Indicates whether this compression proceeds directly from user-provided | |
|
1273 | * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or | |
|
1274 | * whether the context needs to buffer the input/output (ZSTDb_buffered). | |
|
1275 | */ | |
|
1276 | typedef enum { | |
|
1277 | ZSTDb_not_buffered, | |
|
1278 | ZSTDb_buffered | |
|
1279 | } ZSTD_buffered_policy_e; | |
|
1280 | ||
|
1281 | /** | |
|
1282 | * Controls, for this matchState reset, whether the tables need to be cleared / | |
|
1283 | * prepared for the coming compression (ZSTDcrp_makeClean), or whether the | |
|
1284 | * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a | |
|
1285 | * subsequent operation will overwrite the table space anyways (e.g., copying | |
|
1286 | * the matchState contents in from a CDict). | |
|
1287 | */ | |
|
1288 | typedef enum { | |
|
1289 | ZSTDcrp_makeClean, | |
|
1290 | ZSTDcrp_leaveDirty | |
|
1291 | } ZSTD_compResetPolicy_e; | |
|
1292 | ||
|
1293 | /** | |
|
1294 | * Controls, for this matchState reset, whether indexing can continue where it | |
|
1295 | * left off (ZSTDirp_continue), or whether it needs to be restarted from zero | |
|
1296 | * (ZSTDirp_reset). | |
|
1297 | */ | |
|
1298 | typedef enum { | |
|
1299 | ZSTDirp_continue, | |
|
1300 | ZSTDirp_reset | |
|
1301 | } ZSTD_indexResetPolicy_e; | |
|
1302 | ||
|
1303 | typedef enum { | |
|
1304 | ZSTD_resetTarget_CDict, | |
|
1305 | ZSTD_resetTarget_CCtx | |
|
1306 | } ZSTD_resetTarget_e; | |
|
1307 | ||
|
1308 | static size_t | |
|
1338 | 1309 | ZSTD_reset_matchState(ZSTD_matchState_t* ms, |
|
1339 |
|
|
|
1310 | ZSTD_cwksp* ws, | |
|
1340 | 1311 | const ZSTD_compressionParameters* cParams, |
|
1341 |
|
|
|
1312 | const ZSTD_compResetPolicy_e crp, | |
|
1313 | const ZSTD_indexResetPolicy_e forceResetIndex, | |
|
1314 | const ZSTD_resetTarget_e forWho) | |
|
1342 | 1315 | { |
|
1343 | 1316 | size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); |
|
1344 | 1317 | size_t const hSize = ((size_t)1) << cParams->hashLog; |
|
1345 | 1318 | U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; |
|
1346 | size_t const h3Size = ((size_t)1) << hashLog3; | |
|
1347 | size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); | |
|
1348 | ||
|
1349 | assert(((size_t)ptr & 3) == 0); | |
|
1319 | size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; | |
|
1320 | ||
|
1321 | DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); | |
|
1322 | if (forceResetIndex == ZSTDirp_reset) { | |
|
1323 | memset(&ms->window, 0, sizeof(ms->window)); | |
|
1324 | ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ | |
|
1325 | ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ | |
|
1326 | ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ | |
|
1327 | ZSTD_cwksp_mark_tables_dirty(ws); | |
|
1328 | } | |
|
1350 | 1329 | |
|
1351 | 1330 | ms->hashLog3 = hashLog3; |
|
1352 | memset(&ms->window, 0, sizeof(ms->window)); | |
|
1353 | ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ | |
|
1354 | ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ | |
|
1355 | ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ | |
|
1331 | ||
|
1356 | 1332 | ZSTD_invalidateMatchState(ms); |
|
1357 | 1333 | |
|
1334 | assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ | |
|
1335 | ||
|
1336 | ZSTD_cwksp_clear_tables(ws); | |
|
1337 | ||
|
1338 | DEBUGLOG(5, "reserving table space"); | |
|
1339 | /* table Space */ | |
|
1340 | ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); | |
|
1341 | ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); | |
|
1342 | ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); | |
|
1343 | RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, | |
|
1344 | "failed a workspace allocation in ZSTD_reset_matchState"); | |
|
1345 | ||
|
1346 | DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); | |
|
1347 | if (crp!=ZSTDcrp_leaveDirty) { | |
|
1348 | /* reset tables only */ | |
|
1349 | ZSTD_cwksp_clean_tables(ws); | |
|
1350 | } | |
|
1351 | ||
|
1358 | 1352 | /* opt parser space */ |
|
1359 | 1353 | if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { |
|
1360 | 1354 | DEBUGLOG(4, "reserving optimal parser space"); |
|
1361 | ms->opt.litFreq = (unsigned*)ptr; | |
|
1362 | ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); | |
|
1363 | ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); | |
|
1364 | ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); | |
|
1365 | ptr = ms->opt.offCodeFreq + (MaxOff+1); | |
|
1366 | ms->opt.matchTable = (ZSTD_match_t*)ptr; | |
|
1367 | ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; | |
|
1368 | ms->opt.priceTable = (ZSTD_optimal_t*)ptr; | |
|
1369 | ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; | |
|
1355 | ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned)); | |
|
1356 | ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); | |
|
1357 | ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); | |
|
1358 | ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); | |
|
1359 | ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); | |
|
1360 | ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); | |
|
1370 | 1361 | } |
|
1371 | 1362 | |
|
1372 | /* table Space */ | |
|
1373 | DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); | |
|
1374 | assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ | |
|
1375 | if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ | |
|
1376 | ms->hashTable = (U32*)(ptr); | |
|
1377 | ms->chainTable = ms->hashTable + hSize; | |
|
1378 | ms->hashTable3 = ms->chainTable + chainSize; | |
|
1379 | ptr = ms->hashTable3 + h3Size; | |
|
1380 | ||
|
1381 | 1363 | ms->cParams = *cParams; |
|
1382 | 1364 | |
|
1383 | assert(((size_t)ptr & 3) == 0); | |
|
1384 | return ptr; | |
|
1365 | RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, | |
|
1366 | "failed a workspace allocation in ZSTD_reset_matchState"); | |
|
1367 | ||
|
1368 | return 0; | |
|
1385 | 1369 | } |
|
1386 | 1370 | |
|
1387 | 1371 | /* ZSTD_indexTooCloseToMax() : |
@@ -1397,13 +1381,6 static int ZSTD_indexTooCloseToMax(ZSTD_ | |||
|
1397 | 1381 | return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); |
|
1398 | 1382 | } |
|
1399 | 1383 | |
|
1400 | #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ | |
|
1401 | #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large | |
|
1402 | * during at least this number of times, | |
|
1403 | * context's memory usage is considered wasteful, | |
|
1404 | * because it's sized to handle a worst case scenario which rarely happens. | |
|
1405 | * In which case, resize it down to free some memory */ | |
|
1406 | ||
|
1407 | 1384 | /*! ZSTD_resetCCtx_internal() : |
|
1408 | 1385 | note : `params` are assumed fully validated at this stage */ |
|
1409 | 1386 | static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, |
@@ -1412,30 +1389,12 static size_t ZSTD_resetCCtx_internal(ZS | |||
|
1412 | 1389 | ZSTD_compResetPolicy_e const crp, |
|
1413 | 1390 | ZSTD_buffered_policy_e const zbuff) |
|
1414 | 1391 | { |
|
1392 | ZSTD_cwksp* const ws = &zc->workspace; | |
|
1415 | 1393 | DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", |
|
1416 | 1394 | (U32)pledgedSrcSize, params.cParams.windowLog); |
|
1417 | 1395 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); |
|
1418 | 1396 | |
|
1419 | if (crp == ZSTDcrp_continue) { | |
|
1420 | if (ZSTD_equivalentParams(zc->appliedParams, params, | |
|
1421 | zc->inBuffSize, | |
|
1422 | zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, | |
|
1423 | zbuff, pledgedSrcSize) ) { | |
|
1424 | DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode"); | |
|
1425 | zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ | |
|
1426 | if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) { | |
|
1427 | DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)", | |
|
1428 | zc->appliedParams.cParams.windowLog, zc->blockSize); | |
|
1429 | if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { | |
|
1430 | /* prefer a reset, faster than a rescale */ | |
|
1431 | ZSTD_reset_matchState(&zc->blockState.matchState, | |
|
1432 | zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, | |
|
1433 | ¶ms.cParams, | |
|
1434 | crp, ZSTD_resetTarget_CCtx); | |
|
1435 | } | |
|
1436 | return ZSTD_continueCCtx(zc, params, pledgedSrcSize); | |
|
1437 | } } } | |
|
1438 | DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); | |
|
1397 | zc->isFirstBlock = 1; | |
|
1439 | 1398 | |
|
1440 | 1399 | if (params.ldmParams.enableLdm) { |
|
1441 | 1400 | /* Adjust long distance matching parameters */ |
@@ -1449,58 +1408,74 static size_t ZSTD_resetCCtx_internal(ZS | |||
|
1449 | 1408 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); |
|
1450 | 1409 | U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; |
|
1451 | 1410 | size_t const maxNbSeq = blockSize / divider; |
|
1452 |
size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize |
|
|
1411 | size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) | |
|
1412 | + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) | |
|
1413 | + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); | |
|
1453 | 1414 | size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; |
|
1454 | 1415 | size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; |
|
1455 | 1416 | size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); |
|
1456 | 1417 | size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); |
|
1457 | void* ptr; /* used to partition workSpace */ | |
|
1458 | ||
|
1459 | /* Check if workSpace is large enough, alloc a new one if needed */ | |
|
1460 | { size_t const entropySpace = HUF_WORKSPACE_SIZE; | |
|
1461 | size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); | |
|
1462 | size_t const bufferSpace = buffInSize + buffOutSize; | |
|
1418 | ||
|
1419 | ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue; | |
|
1420 | ||
|
1421 | if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { | |
|
1422 | needsIndexReset = ZSTDirp_reset; | |
|
1423 | } | |
|
1424 | ||
|
1425 | ZSTD_cwksp_bump_oversized_duration(ws, 0); | |
|
1426 | ||
|
1427 | /* Check if workspace is large enough, alloc a new one if needed */ | |
|
1428 | { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; | |
|
1429 | size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); | |
|
1430 | size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); | |
|
1431 | size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); | |
|
1463 | 1432 | size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); |
|
1464 | size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); | |
|
1465 | ||
|
1466 |
size_t const neededSpace = |
|
|
1467 | ldmSeqSpace + matchStateSize + tokenSpace + | |
|
1468 | bufferSpace; | |
|
1469 | ||
|
1470 | int const workSpaceTooSmall = zc->workSpaceSize < neededSpace; | |
|
1471 | int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace; | |
|
1472 | int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION); | |
|
1473 | zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0; | |
|
1433 | size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); | |
|
1434 | ||
|
1435 | size_t const neededSpace = | |
|
1436 | cctxSpace + | |
|
1437 | entropySpace + | |
|
1438 | blockStateSpace + | |
|
1439 | ldmSpace + | |
|
1440 | ldmSeqSpace + | |
|
1441 | matchStateSize + | |
|
1442 | tokenSpace + | |
|
1443 | bufferSpace; | |
|
1444 | ||
|
1445 | int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; | |
|
1446 | int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); | |
|
1474 | 1447 | |
|
1475 | 1448 | DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", |
|
1476 | 1449 | neededSpace>>10, matchStateSize>>10, bufferSpace>>10); |
|
1477 | 1450 | DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); |
|
1478 | 1451 | |
|
1479 |
if (work |
|
|
1480 |
DEBUGLOG(4, "Resize work |
|
|
1481 |
|
|
|
1452 | if (workspaceTooSmall || workspaceWasteful) { | |
|
1453 | DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", | |
|
1454 | ZSTD_cwksp_sizeof(ws) >> 10, | |
|
1482 | 1455 | neededSpace >> 10); |
|
1483 | 1456 | |
|
1484 | 1457 | RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); |
|
1485 | 1458 | |
|
1486 | zc->workSpaceSize = 0; | |
|
1487 | ZSTD_free(zc->workSpace, zc->customMem); | |
|
1488 |
|
|
|
1489 | RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); | |
|
1490 | zc->workSpaceSize = neededSpace; | |
|
1491 | zc->workSpaceOversizedDuration = 0; | |
|
1492 | ||
|
1459 | needsIndexReset = ZSTDirp_reset; | |
|
1460 | ||
|
1461 | ZSTD_cwksp_free(ws, zc->customMem); | |
|
1462 | FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem)); | |
|
1463 | ||
|
1464 | DEBUGLOG(5, "reserving object space"); | |
|
1493 | 1465 | /* Statically sized space. |
|
1494 | 1466 | * entropyWorkspace never moves, |
|
1495 | 1467 | * though prev/next block swap places */ |
|
1496 | assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ | |
|
1497 | assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); | |
|
1498 | zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; | |
|
1499 | zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; | |
|
1500 | ptr = zc->blockState.nextCBlock + 1; | |
|
1501 |
zc->entropyWorkspace = (U32*) |
|
|
1468 | assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); | |
|
1469 | zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); | |
|
1470 | RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); | |
|
1471 | zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); | |
|
1472 | RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); | |
|
1473 | zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); | |
|
1474 | RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); | |
|
1502 | 1475 | } } |
|
1503 | 1476 | |
|
1477 | ZSTD_cwksp_clear(ws); | |
|
1478 | ||
|
1504 | 1479 | /* init params */ |
|
1505 | 1480 | zc->appliedParams = params; |
|
1506 | 1481 | zc->blockState.matchState.cParams = params.cParams; |
@@ -1519,58 +1494,58 static size_t ZSTD_resetCCtx_internal(ZS | |||
|
1519 | 1494 | |
|
1520 | 1495 | ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); |
|
1521 | 1496 | |
|
1522 | ptr = ZSTD_reset_matchState(&zc->blockState.matchState, | |
|
1523 | zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, | |
|
1524 | ¶ms.cParams, | |
|
1525 | crp, ZSTD_resetTarget_CCtx); | |
|
1497 | /* ZSTD_wildcopy() is used to copy into the literals buffer, | |
|
1498 | * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. | |
|
1499 | */ | |
|
1500 | zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); | |
|
1501 | zc->seqStore.maxNbLit = blockSize; | |
|
1502 | ||
|
1503 | /* buffers */ | |
|
1504 | zc->inBuffSize = buffInSize; | |
|
1505 | zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); | |
|
1506 | zc->outBuffSize = buffOutSize; | |
|
1507 | zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); | |
|
1508 | ||
|
1509 | /* ldm bucketOffsets table */ | |
|
1510 | if (params.ldmParams.enableLdm) { | |
|
1511 | /* TODO: avoid memset? */ | |
|
1512 | size_t const ldmBucketSize = | |
|
1513 | ((size_t)1) << (params.ldmParams.hashLog - | |
|
1514 | params.ldmParams.bucketSizeLog); | |
|
1515 | zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); | |
|
1516 | memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); | |
|
1517 | } | |
|
1518 | ||
|
1519 | /* sequences storage */ | |
|
1520 | ZSTD_referenceExternalSequences(zc, NULL, 0); | |
|
1521 | zc->seqStore.maxNbSeq = maxNbSeq; | |
|
1522 | zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); | |
|
1523 | zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); | |
|
1524 | zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); | |
|
1525 | zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); | |
|
1526 | ||
|
1527 | FORWARD_IF_ERROR(ZSTD_reset_matchState( | |
|
1528 | &zc->blockState.matchState, | |
|
1529 | ws, | |
|
1530 | ¶ms.cParams, | |
|
1531 | crp, | |
|
1532 | needsIndexReset, | |
|
1533 | ZSTD_resetTarget_CCtx)); | |
|
1526 | 1534 | |
|
1527 | 1535 | /* ldm hash table */ |
|
1528 | /* initialize bucketOffsets table later for pointer alignment */ | |
|
1529 | 1536 | if (params.ldmParams.enableLdm) { |
|
1537 | /* TODO: avoid memset? */ | |
|
1530 | 1538 | size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; |
|
1531 |
|
|
|
1532 | assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ | |
|
1533 | zc->ldmState.hashTable = (ldmEntry_t*)ptr; | |
|
1534 | ptr = zc->ldmState.hashTable + ldmHSize; | |
|
1535 | zc->ldmSequences = (rawSeq*)ptr; | |
|
1536 | ptr = zc->ldmSequences + maxNbLdmSeq; | |
|
1539 | zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); | |
|
1540 | memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); | |
|
1541 | zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); | |
|
1537 | 1542 | zc->maxNbLdmSequences = maxNbLdmSeq; |
|
1538 | 1543 | |
|
1539 | 1544 | memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); |
|
1540 | } | |
|
1541 | assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ | |
|
1542 | ||
|
1543 | /* sequences storage */ | |
|
1544 | zc->seqStore.maxNbSeq = maxNbSeq; | |
|
1545 | zc->seqStore.sequencesStart = (seqDef*)ptr; | |
|
1546 | ptr = zc->seqStore.sequencesStart + maxNbSeq; | |
|
1547 | zc->seqStore.llCode = (BYTE*) ptr; | |
|
1548 | zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; | |
|
1549 | zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; | |
|
1550 | zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; | |
|
1551 | /* ZSTD_wildcopy() is used to copy into the literals buffer, | |
|
1552 | * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. | |
|
1553 | */ | |
|
1554 | zc->seqStore.maxNbLit = blockSize; | |
|
1555 | ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH; | |
|
1556 | ||
|
1557 | /* ldm bucketOffsets table */ | |
|
1558 | if (params.ldmParams.enableLdm) { | |
|
1559 | size_t const ldmBucketSize = | |
|
1560 | ((size_t)1) << (params.ldmParams.hashLog - | |
|
1561 | params.ldmParams.bucketSizeLog); | |
|
1562 | memset(ptr, 0, ldmBucketSize); | |
|
1563 | zc->ldmState.bucketOffsets = (BYTE*)ptr; | |
|
1564 | ptr = zc->ldmState.bucketOffsets + ldmBucketSize; | |
|
1565 | 1545 | ZSTD_window_clear(&zc->ldmState.window); |
|
1566 | 1546 | } |
|
1567 | ZSTD_referenceExternalSequences(zc, NULL, 0); | |
|
1568 | ||
|
1569 | /* buffers */ | |
|
1570 | zc->inBuffSize = buffInSize; | |
|
1571 | zc->inBuff = (char*)ptr; | |
|
1572 | zc->outBuffSize = buffOutSize; | |
|
1573 | zc->outBuff = zc->inBuff + buffInSize; | |
|
1547 | ||
|
1548 | DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); | |
|
1574 | 1549 | |
|
1575 | 1550 | return 0; |
|
1576 | 1551 | } |
@@ -1604,15 +1579,15 static const size_t attachDictSizeCutoff | |||
|
1604 | 1579 | }; |
|
1605 | 1580 | |
|
1606 | 1581 | static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, |
|
1607 | ZSTD_CCtx_params params, | |
|
1582 | const ZSTD_CCtx_params* params, | |
|
1608 | 1583 | U64 pledgedSrcSize) |
|
1609 | 1584 | { |
|
1610 | 1585 | size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; |
|
1611 | 1586 | return ( pledgedSrcSize <= cutoff |
|
1612 | 1587 | || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN |
|
1613 |
|| params |
|
|
1614 |
&& params |
|
|
1615 |
&& !params |
|
|
1588 | || params->attachDictPref == ZSTD_dictForceAttach ) | |
|
1589 | && params->attachDictPref != ZSTD_dictForceCopy | |
|
1590 | && !params->forceWindow; /* dictMatchState isn't correctly | |
|
1616 | 1591 | * handled in _enforceMaxDist */ |
|
1617 | 1592 | } |
|
1618 | 1593 | |
@@ -1630,8 +1605,8 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCt | |||
|
1630 | 1605 | * has its own tables. */ |
|
1631 | 1606 | params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); |
|
1632 | 1607 | params.cParams.windowLog = windowLog; |
|
1633 | ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, | |
|
1634 |
ZSTDcrp_ |
|
|
1608 | FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, | |
|
1609 | ZSTDcrp_makeClean, zbuff)); | |
|
1635 | 1610 | assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); |
|
1636 | 1611 | } |
|
1637 | 1612 | |
@@ -1679,30 +1654,36 static size_t ZSTD_resetCCtx_byCopyingCD | |||
|
1679 | 1654 | /* Copy only compression parameters related to tables. */ |
|
1680 | 1655 | params.cParams = *cdict_cParams; |
|
1681 | 1656 | params.cParams.windowLog = windowLog; |
|
1682 | ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, | |
|
1683 |
ZSTDcrp_ |
|
|
1657 | FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, | |
|
1658 | ZSTDcrp_leaveDirty, zbuff)); | |
|
1684 | 1659 | assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); |
|
1685 | 1660 | assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); |
|
1686 | 1661 | assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); |
|
1687 | 1662 | } |
|
1688 | 1663 | |
|
1664 | ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); | |
|
1665 | ||
|
1689 | 1666 | /* copy tables */ |
|
1690 | 1667 | { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); |
|
1691 | 1668 | size_t const hSize = (size_t)1 << cdict_cParams->hashLog; |
|
1692 | size_t const tableSpace = (chainSize + hSize) * sizeof(U32); | |
|
1693 | assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ | |
|
1694 | assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); | |
|
1695 | assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ | |
|
1696 | assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); | |
|
1697 | memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ | |
|
1669 | ||
|
1670 | memcpy(cctx->blockState.matchState.hashTable, | |
|
1671 | cdict->matchState.hashTable, | |
|
1672 | hSize * sizeof(U32)); | |
|
1673 | memcpy(cctx->blockState.matchState.chainTable, | |
|
1674 | cdict->matchState.chainTable, | |
|
1675 | chainSize * sizeof(U32)); | |
|
1698 | 1676 | } |
|
1699 | 1677 | |
|
1700 | 1678 | /* Zero the hashTable3, since the cdict never fills it */ |
|
1701 |
{ |
|
|
1679 | { int const h3log = cctx->blockState.matchState.hashLog3; | |
|
1680 | size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; | |
|
1702 | 1681 | assert(cdict->matchState.hashLog3 == 0); |
|
1703 | 1682 | memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); |
|
1704 | 1683 | } |
|
1705 | 1684 | |
|
1685 | ZSTD_cwksp_mark_tables_clean(&cctx->workspace); | |
|
1686 | ||
|
1706 | 1687 | /* copy dictionary offsets */ |
|
1707 | 1688 | { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; |
|
1708 | 1689 | ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; |
@@ -1724,7 +1705,7 static size_t ZSTD_resetCCtx_byCopyingCD | |||
|
1724 | 1705 | * in-place. We decide here which strategy to use. */ |
|
1725 | 1706 | static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, |
|
1726 | 1707 | const ZSTD_CDict* cdict, |
|
1727 | ZSTD_CCtx_params params, | |
|
1708 | const ZSTD_CCtx_params* params, | |
|
1728 | 1709 | U64 pledgedSrcSize, |
|
1729 | 1710 | ZSTD_buffered_policy_e zbuff) |
|
1730 | 1711 | { |
@@ -1734,10 +1715,10 static size_t ZSTD_resetCCtx_usingCDict( | |||
|
1734 | 1715 | |
|
1735 | 1716 | if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { |
|
1736 | 1717 | return ZSTD_resetCCtx_byAttachingCDict( |
|
1737 | cctx, cdict, params, pledgedSrcSize, zbuff); | |
|
1718 | cctx, cdict, *params, pledgedSrcSize, zbuff); | |
|
1738 | 1719 | } else { |
|
1739 | 1720 | return ZSTD_resetCCtx_byCopyingCDict( |
|
1740 | cctx, cdict, params, pledgedSrcSize, zbuff); | |
|
1721 | cctx, cdict, *params, pledgedSrcSize, zbuff); | |
|
1741 | 1722 | } |
|
1742 | 1723 | } |
|
1743 | 1724 | |
@@ -1763,7 +1744,7 static size_t ZSTD_copyCCtx_internal(ZST | |||
|
1763 | 1744 | params.cParams = srcCCtx->appliedParams.cParams; |
|
1764 | 1745 | params.fParams = fParams; |
|
1765 | 1746 | ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, |
|
1766 |
ZSTDcrp_ |
|
|
1747 | ZSTDcrp_leaveDirty, zbuff); | |
|
1767 | 1748 | assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); |
|
1768 | 1749 | assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); |
|
1769 | 1750 | assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); |
@@ -1771,16 +1752,27 static size_t ZSTD_copyCCtx_internal(ZST | |||
|
1771 | 1752 | assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); |
|
1772 | 1753 | } |
|
1773 | 1754 | |
|
1755 | ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); | |
|
1756 | ||
|
1774 | 1757 | /* copy tables */ |
|
1775 | 1758 | { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); |
|
1776 | 1759 | size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; |
|
1777 |
|
|
|
1778 | size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); | |
|
1779 | assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ | |
|
1780 | assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); | |
|
1781 | memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ | |
|
1760 | int const h3log = srcCCtx->blockState.matchState.hashLog3; | |
|
1761 | size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; | |
|
1762 | ||
|
1763 | memcpy(dstCCtx->blockState.matchState.hashTable, | |
|
1764 | srcCCtx->blockState.matchState.hashTable, | |
|
1765 | hSize * sizeof(U32)); | |
|
1766 | memcpy(dstCCtx->blockState.matchState.chainTable, | |
|
1767 | srcCCtx->blockState.matchState.chainTable, | |
|
1768 | chainSize * sizeof(U32)); | |
|
1769 | memcpy(dstCCtx->blockState.matchState.hashTable3, | |
|
1770 | srcCCtx->blockState.matchState.hashTable3, | |
|
1771 | h3Size * sizeof(U32)); | |
|
1782 | 1772 | } |
|
1783 | 1773 | |
|
1774 | ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); | |
|
1775 | ||
|
1784 | 1776 | /* copy dictionary offsets */ |
|
1785 | 1777 | { |
|
1786 | 1778 | const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; |
@@ -1831,6 +1823,20 ZSTD_reduceTable_internal (U32* const ta | |||
|
1831 | 1823 | int rowNb; |
|
1832 | 1824 | assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ |
|
1833 | 1825 | assert(size < (1U<<31)); /* can be casted to int */ |
|
1826 | ||
|
1827 | #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |
|
1828 | /* To validate that the table re-use logic is sound, and that we don't | |
|
1829 | * access table space that we haven't cleaned, we re-"poison" the table | |
|
1830 | * space every time we mark it dirty. | |
|
1831 | * | |
|
1832 | * This function however is intended to operate on those dirty tables and | |
|
1833 | * re-clean them. So when this function is used correctly, we can unpoison | |
|
1834 | * the memory it operated on. This introduces a blind spot though, since | |
|
1835 | * if we now try to operate on __actually__ poisoned memory, we will not | |
|
1836 | * detect that. */ | |
|
1837 | __msan_unpoison(table, size * sizeof(U32)); | |
|
1838 | #endif | |
|
1839 | ||
|
1834 | 1840 | for (rowNb=0 ; rowNb < nbRows ; rowNb++) { |
|
1835 | 1841 | int column; |
|
1836 | 1842 | for (column=0; column<ZSTD_ROWSIZE; column++) { |
@@ -1938,7 +1944,7 ZSTD_compressSequences_internal(seqStore | |||
|
1938 | 1944 | ZSTD_entropyCTables_t* nextEntropy, |
|
1939 | 1945 | const ZSTD_CCtx_params* cctxParams, |
|
1940 | 1946 | void* dst, size_t dstCapacity, |
|
1941 |
void* |
|
|
1947 | void* entropyWorkspace, size_t entropyWkspSize, | |
|
1942 | 1948 | const int bmi2) |
|
1943 | 1949 | { |
|
1944 | 1950 | const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; |
@@ -1971,7 +1977,7 ZSTD_compressSequences_internal(seqStore | |||
|
1971 | 1977 | ZSTD_disableLiteralsCompression(cctxParams), |
|
1972 | 1978 | op, dstCapacity, |
|
1973 | 1979 | literals, litSize, |
|
1974 |
|
|
|
1980 | entropyWorkspace, entropyWkspSize, | |
|
1975 | 1981 | bmi2); |
|
1976 | 1982 | FORWARD_IF_ERROR(cSize); |
|
1977 | 1983 | assert(cSize <= dstCapacity); |
@@ -1981,12 +1987,17 ZSTD_compressSequences_internal(seqStore | |||
|
1981 | 1987 | /* Sequences Header */ |
|
1982 | 1988 | RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, |
|
1983 | 1989 | dstSize_tooSmall); |
|
1984 |
if (nbSeq < |
|
|
1990 | if (nbSeq < 128) { | |
|
1985 | 1991 | *op++ = (BYTE)nbSeq; |
|
1986 | else if (nbSeq < LONGNBSEQ) | |
|
1987 |
op[0] = (BYTE)((nbSeq>>8) + 0x80) |
|
|
1988 | else | |
|
1989 | op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; | |
|
1992 | } else if (nbSeq < LONGNBSEQ) { | |
|
1993 | op[0] = (BYTE)((nbSeq>>8) + 0x80); | |
|
1994 | op[1] = (BYTE)nbSeq; | |
|
1995 | op+=2; | |
|
1996 | } else { | |
|
1997 | op[0]=0xFF; | |
|
1998 | MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); | |
|
1999 | op+=3; | |
|
2000 | } | |
|
1990 | 2001 | assert(op <= oend); |
|
1991 | 2002 | if (nbSeq==0) { |
|
1992 | 2003 | /* Copy the old tables over as if we repeated them */ |
@@ -2002,7 +2013,7 ZSTD_compressSequences_internal(seqStore | |||
|
2002 | 2013 | ZSTD_seqToCodes(seqStorePtr); |
|
2003 | 2014 | /* build CTable for Literal Lengths */ |
|
2004 | 2015 | { unsigned max = MaxLL; |
|
2005 |
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, |
|
|
2016 | size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ | |
|
2006 | 2017 | DEBUGLOG(5, "Building LL table"); |
|
2007 | 2018 | nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; |
|
2008 | 2019 | LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, |
@@ -2012,10 +2023,14 ZSTD_compressSequences_internal(seqStore | |||
|
2012 | 2023 | ZSTD_defaultAllowed, strategy); |
|
2013 | 2024 | assert(set_basic < set_compressed && set_rle < set_compressed); |
|
2014 | 2025 | assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2015 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, | |
|
2016 | count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, | |
|
2017 | prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), | |
|
2018 | workspace, wkspSize); | |
|
2026 | { size_t const countSize = ZSTD_buildCTable( | |
|
2027 | op, (size_t)(oend - op), | |
|
2028 | CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, | |
|
2029 | count, max, llCodeTable, nbSeq, | |
|
2030 | LL_defaultNorm, LL_defaultNormLog, MaxLL, | |
|
2031 | prevEntropy->fse.litlengthCTable, | |
|
2032 | sizeof(prevEntropy->fse.litlengthCTable), | |
|
2033 | entropyWorkspace, entropyWkspSize); | |
|
2019 | 2034 | FORWARD_IF_ERROR(countSize); |
|
2020 | 2035 | if (LLtype == set_compressed) |
|
2021 | 2036 | lastNCount = op; |
@@ -2024,7 +2039,8 ZSTD_compressSequences_internal(seqStore | |||
|
2024 | 2039 | } } |
|
2025 | 2040 | /* build CTable for Offsets */ |
|
2026 | 2041 | { unsigned max = MaxOff; |
|
2027 |
size_t const mostFrequent = HIST_countFast_wksp( |
|
|
2042 | size_t const mostFrequent = HIST_countFast_wksp( | |
|
2043 | count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ | |
|
2028 | 2044 | /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ |
|
2029 | 2045 | ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; |
|
2030 | 2046 | DEBUGLOG(5, "Building OF table"); |
@@ -2035,10 +2051,14 ZSTD_compressSequences_internal(seqStore | |||
|
2035 | 2051 | OF_defaultNorm, OF_defaultNormLog, |
|
2036 | 2052 | defaultPolicy, strategy); |
|
2037 | 2053 | assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2038 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, | |
|
2039 | count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, | |
|
2040 | prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), | |
|
2041 | workspace, wkspSize); | |
|
2054 | { size_t const countSize = ZSTD_buildCTable( | |
|
2055 | op, (size_t)(oend - op), | |
|
2056 | CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, | |
|
2057 | count, max, ofCodeTable, nbSeq, | |
|
2058 | OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, | |
|
2059 | prevEntropy->fse.offcodeCTable, | |
|
2060 | sizeof(prevEntropy->fse.offcodeCTable), | |
|
2061 | entropyWorkspace, entropyWkspSize); | |
|
2042 | 2062 | FORWARD_IF_ERROR(countSize); |
|
2043 | 2063 | if (Offtype == set_compressed) |
|
2044 | 2064 | lastNCount = op; |
@@ -2047,7 +2067,8 ZSTD_compressSequences_internal(seqStore | |||
|
2047 | 2067 | } } |
|
2048 | 2068 | /* build CTable for MatchLengths */ |
|
2049 | 2069 | { unsigned max = MaxML; |
|
2050 |
size_t const mostFrequent = HIST_countFast_wksp( |
|
|
2070 | size_t const mostFrequent = HIST_countFast_wksp( | |
|
2071 | count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ | |
|
2051 | 2072 | DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); |
|
2052 | 2073 | nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; |
|
2053 | 2074 | MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, |
@@ -2056,10 +2077,14 ZSTD_compressSequences_internal(seqStore | |||
|
2056 | 2077 | ML_defaultNorm, ML_defaultNormLog, |
|
2057 | 2078 | ZSTD_defaultAllowed, strategy); |
|
2058 | 2079 | assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2059 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, | |
|
2060 | count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, | |
|
2061 | prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), | |
|
2062 | workspace, wkspSize); | |
|
2080 | { size_t const countSize = ZSTD_buildCTable( | |
|
2081 | op, (size_t)(oend - op), | |
|
2082 | CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, | |
|
2083 | count, max, mlCodeTable, nbSeq, | |
|
2084 | ML_defaultNorm, ML_defaultNormLog, MaxML, | |
|
2085 | prevEntropy->fse.matchlengthCTable, | |
|
2086 | sizeof(prevEntropy->fse.matchlengthCTable), | |
|
2087 | entropyWorkspace, entropyWkspSize); | |
|
2063 | 2088 | FORWARD_IF_ERROR(countSize); |
|
2064 | 2089 | if (MLtype == set_compressed) |
|
2065 | 2090 | lastNCount = op; |
@@ -2107,13 +2132,13 ZSTD_compressSequences(seqStore_t* seqSt | |||
|
2107 | 2132 | const ZSTD_CCtx_params* cctxParams, |
|
2108 | 2133 | void* dst, size_t dstCapacity, |
|
2109 | 2134 | size_t srcSize, |
|
2110 |
void* |
|
|
2135 | void* entropyWorkspace, size_t entropyWkspSize, | |
|
2111 | 2136 | int bmi2) |
|
2112 | 2137 | { |
|
2113 | 2138 | size_t const cSize = ZSTD_compressSequences_internal( |
|
2114 | 2139 | seqStorePtr, prevEntropy, nextEntropy, cctxParams, |
|
2115 | 2140 | dst, dstCapacity, |
|
2116 |
|
|
|
2141 | entropyWorkspace, entropyWkspSize, bmi2); | |
|
2117 | 2142 | if (cSize == 0) return 0; |
|
2118 | 2143 | /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. |
|
2119 | 2144 | * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. |
@@ -2264,11 +2289,99 static size_t ZSTD_buildSeqStore(ZSTD_CC | |||
|
2264 | 2289 | return ZSTDbss_compress; |
|
2265 | 2290 | } |
|
2266 | 2291 | |
|
2292 | static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) | |
|
2293 | { | |
|
2294 | const seqStore_t* seqStore = ZSTD_getSeqStore(zc); | |
|
2295 | const seqDef* seqs = seqStore->sequencesStart; | |
|
2296 | size_t seqsSize = seqStore->sequences - seqs; | |
|
2297 | ||
|
2298 | ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; | |
|
2299 | size_t i; size_t position; int repIdx; | |
|
2300 | ||
|
2301 | assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); | |
|
2302 | for (i = 0, position = 0; i < seqsSize; ++i) { | |
|
2303 | outSeqs[i].offset = seqs[i].offset; | |
|
2304 | outSeqs[i].litLength = seqs[i].litLength; | |
|
2305 | outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; | |
|
2306 | ||
|
2307 | if (i == seqStore->longLengthPos) { | |
|
2308 | if (seqStore->longLengthID == 1) { | |
|
2309 | outSeqs[i].litLength += 0x10000; | |
|
2310 | } else if (seqStore->longLengthID == 2) { | |
|
2311 | outSeqs[i].matchLength += 0x10000; | |
|
2312 | } | |
|
2313 | } | |
|
2314 | ||
|
2315 | if (outSeqs[i].offset <= ZSTD_REP_NUM) { | |
|
2316 | outSeqs[i].rep = outSeqs[i].offset; | |
|
2317 | repIdx = (unsigned int)i - outSeqs[i].offset; | |
|
2318 | ||
|
2319 | if (outSeqs[i].litLength == 0) { | |
|
2320 | if (outSeqs[i].offset < 3) { | |
|
2321 | --repIdx; | |
|
2322 | } else { | |
|
2323 | repIdx = (unsigned int)i - 1; | |
|
2324 | } | |
|
2325 | ++outSeqs[i].rep; | |
|
2326 | } | |
|
2327 | assert(repIdx >= -3); | |
|
2328 | outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; | |
|
2329 | if (outSeqs[i].rep == 4) { | |
|
2330 | --outSeqs[i].offset; | |
|
2331 | } | |
|
2332 | } else { | |
|
2333 | outSeqs[i].offset -= ZSTD_REP_NUM; | |
|
2334 | } | |
|
2335 | ||
|
2336 | position += outSeqs[i].litLength; | |
|
2337 | outSeqs[i].matchPos = (unsigned int)position; | |
|
2338 | position += outSeqs[i].matchLength; | |
|
2339 | } | |
|
2340 | zc->seqCollector.seqIndex += seqsSize; | |
|
2341 | } | |
|
2342 | ||
|
2343 | size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, | |
|
2344 | size_t outSeqsSize, const void* src, size_t srcSize) | |
|
2345 | { | |
|
2346 | const size_t dstCapacity = ZSTD_compressBound(srcSize); | |
|
2347 | void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); | |
|
2348 | SeqCollector seqCollector; | |
|
2349 | ||
|
2350 | RETURN_ERROR_IF(dst == NULL, memory_allocation); | |
|
2351 | ||
|
2352 | seqCollector.collectSequences = 1; | |
|
2353 | seqCollector.seqStart = outSeqs; | |
|
2354 | seqCollector.seqIndex = 0; | |
|
2355 | seqCollector.maxSequences = outSeqsSize; | |
|
2356 | zc->seqCollector = seqCollector; | |
|
2357 | ||
|
2358 | ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); | |
|
2359 | ZSTD_free(dst, ZSTD_defaultCMem); | |
|
2360 | return zc->seqCollector.seqIndex; | |
|
2361 | } | |
|
2362 | ||
|
2363 | /* Returns true if the given block is a RLE block */ | |
|
2364 | static int ZSTD_isRLE(const BYTE *ip, size_t length) { | |
|
2365 | size_t i; | |
|
2366 | if (length < 2) return 1; | |
|
2367 | for (i = 1; i < length; ++i) { | |
|
2368 | if (ip[0] != ip[i]) return 0; | |
|
2369 | } | |
|
2370 | return 1; | |
|
2371 | } | |
|
2372 | ||
|
2267 | 2373 | static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, |
|
2268 | 2374 | void* dst, size_t dstCapacity, |
|
2269 | const void* src, size_t srcSize) | |
|
2375 | const void* src, size_t srcSize, U32 frame) | |
|
2270 | 2376 | { |
|
2377 | /* This the upper bound for the length of an rle block. | |
|
2378 | * This isn't the actual upper bound. Finding the real threshold | |
|
2379 | * needs further investigation. | |
|
2380 | */ | |
|
2381 | const U32 rleMaxLength = 25; | |
|
2271 | 2382 | size_t cSize; |
|
2383 | const BYTE* ip = (const BYTE*)src; | |
|
2384 | BYTE* op = (BYTE*)dst; | |
|
2272 | 2385 | DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", |
|
2273 | 2386 | (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, |
|
2274 | 2387 | (unsigned)zc->blockState.matchState.nextToUpdate); |
@@ -2278,6 +2391,11 static size_t ZSTD_compressBlock_interna | |||
|
2278 | 2391 | if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } |
|
2279 | 2392 | } |
|
2280 | 2393 | |
|
2394 | if (zc->seqCollector.collectSequences) { | |
|
2395 | ZSTD_copyBlockSequences(zc); | |
|
2396 | return 0; | |
|
2397 | } | |
|
2398 | ||
|
2281 | 2399 | /* encode sequences and literals */ |
|
2282 | 2400 | cSize = ZSTD_compressSequences(&zc->seqStore, |
|
2283 | 2401 | &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, |
@@ -2287,8 +2405,21 static size_t ZSTD_compressBlock_interna | |||
|
2287 | 2405 | zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, |
|
2288 | 2406 | zc->bmi2); |
|
2289 | 2407 | |
|
2408 | if (frame && | |
|
2409 | /* We don't want to emit our first block as a RLE even if it qualifies because | |
|
2410 | * doing so will cause the decoder (cli only) to throw a "should consume all input error." | |
|
2411 | * This is only an issue for zstd <= v1.4.3 | |
|
2412 | */ | |
|
2413 | !zc->isFirstBlock && | |
|
2414 | cSize < rleMaxLength && | |
|
2415 | ZSTD_isRLE(ip, srcSize)) | |
|
2416 | { | |
|
2417 | cSize = 1; | |
|
2418 | op[0] = ip[0]; | |
|
2419 | } | |
|
2420 | ||
|
2290 | 2421 | out: |
|
2291 |
if (!ZSTD_isError(cSize) && cSize |
|
|
2422 | if (!ZSTD_isError(cSize) && cSize > 1) { | |
|
2292 | 2423 | /* confirm repcodes and entropy tables when emitting a compressed block */ |
|
2293 | 2424 | ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; |
|
2294 | 2425 | zc->blockState.prevCBlock = zc->blockState.nextCBlock; |
@@ -2305,7 +2436,11 out: | |||
|
2305 | 2436 | } |
|
2306 | 2437 | |
|
2307 | 2438 | |
|
2308 |
static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, |
|
|
2439 | static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, | |
|
2440 | ZSTD_cwksp* ws, | |
|
2441 | ZSTD_CCtx_params const* params, | |
|
2442 | void const* ip, | |
|
2443 | void const* iend) | |
|
2309 | 2444 | { |
|
2310 | 2445 | if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { |
|
2311 | 2446 | U32 const maxDist = (U32)1 << params->cParams.windowLog; |
@@ -2314,7 +2449,9 static void ZSTD_overflowCorrectIfNeeded | |||
|
2314 | 2449 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); |
|
2315 | 2450 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); |
|
2316 | 2451 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); |
|
2452 | ZSTD_cwksp_mark_tables_dirty(ws); | |
|
2317 | 2453 | ZSTD_reduceIndex(ms, params, correction); |
|
2454 | ZSTD_cwksp_mark_tables_clean(ws); | |
|
2318 | 2455 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; |
|
2319 | 2456 | else ms->nextToUpdate -= correction; |
|
2320 | 2457 | /* invalidate dictionaries on overflow correction */ |
@@ -2323,7 +2460,6 static void ZSTD_overflowCorrectIfNeeded | |||
|
2323 | 2460 | } |
|
2324 | 2461 | } |
|
2325 | 2462 | |
|
2326 | ||
|
2327 | 2463 | /*! ZSTD_compress_frameChunk() : |
|
2328 | 2464 | * Compress a chunk of data into one or multiple blocks. |
|
2329 | 2465 | * All blocks will be terminated, all input will be consumed. |
@@ -2357,7 +2493,8 static size_t ZSTD_compress_frameChunk ( | |||
|
2357 | 2493 | "not enough space to store compressed block"); |
|
2358 | 2494 | if (remaining < blockSize) blockSize = remaining; |
|
2359 | 2495 | |
|
2360 |
ZSTD_overflowCorrectIfNeeded( |
|
|
2496 | ZSTD_overflowCorrectIfNeeded( | |
|
2497 | ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); | |
|
2361 | 2498 | ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); |
|
2362 | 2499 | |
|
2363 | 2500 | /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ |
@@ -2365,15 +2502,16 static size_t ZSTD_compress_frameChunk ( | |||
|
2365 | 2502 | |
|
2366 | 2503 | { size_t cSize = ZSTD_compressBlock_internal(cctx, |
|
2367 | 2504 | op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, |
|
2368 | ip, blockSize); | |
|
2505 | ip, blockSize, 1 /* frame */); | |
|
2369 | 2506 | FORWARD_IF_ERROR(cSize); |
|
2370 | ||
|
2371 | 2507 | if (cSize == 0) { /* block is not compressible */ |
|
2372 | 2508 | cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); |
|
2373 | 2509 | FORWARD_IF_ERROR(cSize); |
|
2374 | 2510 | } else { |
|
2375 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); | |
|
2376 | MEM_writeLE24(op, cBlockHeader24); | |
|
2511 | const U32 cBlockHeader = cSize == 1 ? | |
|
2512 | lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : | |
|
2513 | lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); | |
|
2514 | MEM_writeLE24(op, cBlockHeader); | |
|
2377 | 2515 | cSize += ZSTD_blockHeaderSize; |
|
2378 | 2516 | } |
|
2379 | 2517 | |
@@ -2383,6 +2521,7 static size_t ZSTD_compress_frameChunk ( | |||
|
2383 | 2521 | op += cSize; |
|
2384 | 2522 | assert(dstCapacity >= cSize); |
|
2385 | 2523 | dstCapacity -= cSize; |
|
2524 | cctx->isFirstBlock = 0; | |
|
2386 | 2525 | DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", |
|
2387 | 2526 | (unsigned)cSize); |
|
2388 | 2527 | } } |
@@ -2393,25 +2532,25 static size_t ZSTD_compress_frameChunk ( | |||
|
2393 | 2532 | |
|
2394 | 2533 | |
|
2395 | 2534 | static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, |
|
2396 | ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) | |
|
2535 | const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) | |
|
2397 | 2536 | { BYTE* const op = (BYTE*)dst; |
|
2398 | 2537 | U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ |
|
2399 |
U32 const dictIDSizeCode = params |
|
|
2400 |
U32 const checksumFlag = params |
|
|
2401 |
U32 const windowSize = (U32)1 << params |
|
|
2402 |
U32 const singleSegment = params |
|
|
2403 |
BYTE const windowLogByte = (BYTE)((params |
|
|
2404 |
U32 const fcsCode = params |
|
|
2538 | U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ | |
|
2539 | U32 const checksumFlag = params->fParams.checksumFlag>0; | |
|
2540 | U32 const windowSize = (U32)1 << params->cParams.windowLog; | |
|
2541 | U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); | |
|
2542 | BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); | |
|
2543 | U32 const fcsCode = params->fParams.contentSizeFlag ? | |
|
2405 | 2544 | (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ |
|
2406 | 2545 | BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); |
|
2407 | 2546 | size_t pos=0; |
|
2408 | 2547 | |
|
2409 |
assert(!(params |
|
|
2548 | assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); | |
|
2410 | 2549 | RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); |
|
2411 | 2550 | DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", |
|
2412 |
!params |
|
|
2413 | ||
|
2414 |
if (params |
|
|
2551 | !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); | |
|
2552 | ||
|
2553 | if (params->format == ZSTD_f_zstd1) { | |
|
2415 | 2554 | MEM_writeLE32(dst, ZSTD_MAGICNUMBER); |
|
2416 | 2555 | pos = 4; |
|
2417 | 2556 | } |
@@ -2477,7 +2616,7 static size_t ZSTD_compressContinue_inte | |||
|
2477 | 2616 | "missing init (ZSTD_compressBegin)"); |
|
2478 | 2617 | |
|
2479 | 2618 | if (frame && (cctx->stage==ZSTDcs_init)) { |
|
2480 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, | |
|
2619 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, | |
|
2481 | 2620 | cctx->pledgedSrcSizePlusOne-1, cctx->dictID); |
|
2482 | 2621 | FORWARD_IF_ERROR(fhSize); |
|
2483 | 2622 | assert(fhSize <= dstCapacity); |
@@ -2497,13 +2636,15 static size_t ZSTD_compressContinue_inte | |||
|
2497 | 2636 | |
|
2498 | 2637 | if (!frame) { |
|
2499 | 2638 | /* overflow check and correction for block mode */ |
|
2500 | ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize); | |
|
2639 | ZSTD_overflowCorrectIfNeeded( | |
|
2640 | ms, &cctx->workspace, &cctx->appliedParams, | |
|
2641 | src, (BYTE const*)src + srcSize); | |
|
2501 | 2642 | } |
|
2502 | 2643 | |
|
2503 | 2644 | DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); |
|
2504 | 2645 | { size_t const cSize = frame ? |
|
2505 | 2646 | ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : |
|
2506 | ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); | |
|
2647 | ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); | |
|
2507 | 2648 | FORWARD_IF_ERROR(cSize); |
|
2508 | 2649 | cctx->consumedSrcSize += srcSize; |
|
2509 | 2650 | cctx->producedCSize += (cSize + fhSize); |
@@ -2550,6 +2691,7 size_t ZSTD_compressBlock(ZSTD_CCtx* cct | |||
|
2550 | 2691 | * @return : 0, or an error code |
|
2551 | 2692 | */ |
|
2552 | 2693 | static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, |
|
2694 | ZSTD_cwksp* ws, | |
|
2553 | 2695 | ZSTD_CCtx_params const* params, |
|
2554 | 2696 | const void* src, size_t srcSize, |
|
2555 | 2697 | ZSTD_dictTableLoadMethod_e dtlm) |
@@ -2570,7 +2712,7 static size_t ZSTD_loadDictionaryContent | |||
|
2570 | 2712 | size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); |
|
2571 | 2713 | const BYTE* const ichunk = ip + chunk; |
|
2572 | 2714 | |
|
2573 | ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk); | |
|
2715 | ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); | |
|
2574 | 2716 | |
|
2575 | 2717 | switch(params->cParams.strategy) |
|
2576 | 2718 | { |
@@ -2629,10 +2771,11 static size_t ZSTD_checkDictNCount(short | |||
|
2629 | 2771 | /*! ZSTD_loadZstdDictionary() : |
|
2630 | 2772 | * @return : dictID, or an error code |
|
2631 | 2773 | * assumptions : magic number supposed already checked |
|
2632 | * dictSize supposed > 8 | |
|
2774 | * dictSize supposed >= 8 | |
|
2633 | 2775 | */ |
|
2634 | 2776 | static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, |
|
2635 | 2777 | ZSTD_matchState_t* ms, |
|
2778 | ZSTD_cwksp* ws, | |
|
2636 | 2779 | ZSTD_CCtx_params const* params, |
|
2637 | 2780 | const void* dict, size_t dictSize, |
|
2638 | 2781 | ZSTD_dictTableLoadMethod_e dtlm, |
@@ -2645,7 +2788,7 static size_t ZSTD_loadZstdDictionary(ZS | |||
|
2645 | 2788 | size_t dictID; |
|
2646 | 2789 | |
|
2647 | 2790 | ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); |
|
2648 | assert(dictSize > 8); | |
|
2791 | assert(dictSize >= 8); | |
|
2649 | 2792 | assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); |
|
2650 | 2793 | |
|
2651 | 2794 | dictPtr += 4; /* skip magic number */ |
@@ -2728,7 +2871,8 static size_t ZSTD_loadZstdDictionary(ZS | |||
|
2728 | 2871 | bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; |
|
2729 | 2872 | bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; |
|
2730 | 2873 | bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; |
|
2731 |
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( |
|
|
2874 | FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( | |
|
2875 | ms, ws, params, dictPtr, dictContentSize, dtlm)); | |
|
2732 | 2876 | return dictID; |
|
2733 | 2877 | } |
|
2734 | 2878 | } |
@@ -2738,6 +2882,7 static size_t ZSTD_loadZstdDictionary(ZS | |||
|
2738 | 2882 | static size_t |
|
2739 | 2883 | ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, |
|
2740 | 2884 | ZSTD_matchState_t* ms, |
|
2885 | ZSTD_cwksp* ws, | |
|
2741 | 2886 | const ZSTD_CCtx_params* params, |
|
2742 | 2887 | const void* dict, size_t dictSize, |
|
2743 | 2888 | ZSTD_dictContentType_e dictContentType, |
@@ -2745,27 +2890,35 ZSTD_compress_insertDictionary(ZSTD_comp | |||
|
2745 | 2890 | void* workspace) |
|
2746 | 2891 | { |
|
2747 | 2892 | DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); |
|
2748 |
if ((dict==NULL) || (dictSize< |
|
|
2893 | if ((dict==NULL) || (dictSize<8)) { | |
|
2894 | RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); | |
|
2895 | return 0; | |
|
2896 | } | |
|
2749 | 2897 | |
|
2750 | 2898 | ZSTD_reset_compressedBlockState(bs); |
|
2751 | 2899 | |
|
2752 | 2900 | /* dict restricted modes */ |
|
2753 | 2901 | if (dictContentType == ZSTD_dct_rawContent) |
|
2754 | return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); | |
|
2902 | return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm); | |
|
2755 | 2903 | |
|
2756 | 2904 | if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { |
|
2757 | 2905 | if (dictContentType == ZSTD_dct_auto) { |
|
2758 | 2906 | DEBUGLOG(4, "raw content dictionary detected"); |
|
2759 |
return ZSTD_loadDictionaryContent( |
|
|
2907 | return ZSTD_loadDictionaryContent( | |
|
2908 | ms, ws, params, dict, dictSize, dtlm); | |
|
2760 | 2909 | } |
|
2761 | 2910 | RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); |
|
2762 | 2911 | assert(0); /* impossible */ |
|
2763 | 2912 | } |
|
2764 | 2913 | |
|
2765 | 2914 | /* dict as full zstd dictionary */ |
|
2766 | return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace); | |
|
2915 | return ZSTD_loadZstdDictionary( | |
|
2916 | bs, ms, ws, params, dict, dictSize, dtlm, workspace); | |
|
2767 | 2917 | } |
|
2768 | 2918 | |
|
2919 | #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) | |
|
2920 | #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) | |
|
2921 | ||
|
2769 | 2922 | /*! ZSTD_compressBegin_internal() : |
|
2770 | 2923 | * @return : 0, or an error code */ |
|
2771 | 2924 | static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, |
@@ -2773,23 +2926,34 static size_t ZSTD_compressBegin_interna | |||
|
2773 | 2926 | ZSTD_dictContentType_e dictContentType, |
|
2774 | 2927 | ZSTD_dictTableLoadMethod_e dtlm, |
|
2775 | 2928 | const ZSTD_CDict* cdict, |
|
2776 | ZSTD_CCtx_params params, U64 pledgedSrcSize, | |
|
2929 | const ZSTD_CCtx_params* params, U64 pledgedSrcSize, | |
|
2777 | 2930 | ZSTD_buffered_policy_e zbuff) |
|
2778 | 2931 | { |
|
2779 |
DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params |
|
|
2932 | DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); | |
|
2780 | 2933 | /* params are supposed to be fully validated at this point */ |
|
2781 |
assert(!ZSTD_isError(ZSTD_checkCParams(params |
|
|
2934 | assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); | |
|
2782 | 2935 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
|
2783 | ||
|
2784 |
|
|
|
2936 | if ( (cdict) | |
|
2937 | && (cdict->dictContentSize > 0) | |
|
2938 | && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF | |
|
2939 | || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER | |
|
2940 | || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN | |
|
2941 | || cdict->compressionLevel == 0) | |
|
2942 | && (params->attachDictPref != ZSTD_dictForceLoad) ) { | |
|
2785 | 2943 | return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); |
|
2786 | 2944 | } |
|
2787 | 2945 | |
|
2788 | FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, | |
|
2789 |
ZSTDcrp_ |
|
|
2790 |
{ size_t const dictID = |
|
|
2791 | cctx->blockState.prevCBlock, &cctx->blockState.matchState, | |
|
2792 | ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); | |
|
2946 | FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, | |
|
2947 | ZSTDcrp_makeClean, zbuff) ); | |
|
2948 | { size_t const dictID = cdict ? | |
|
2949 | ZSTD_compress_insertDictionary( | |
|
2950 | cctx->blockState.prevCBlock, &cctx->blockState.matchState, | |
|
2951 | &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, | |
|
2952 | dictContentType, dtlm, cctx->entropyWorkspace) | |
|
2953 | : ZSTD_compress_insertDictionary( | |
|
2954 | cctx->blockState.prevCBlock, &cctx->blockState.matchState, | |
|
2955 | &cctx->workspace, params, dict, dictSize, | |
|
2956 | dictContentType, dtlm, cctx->entropyWorkspace); | |
|
2793 | 2957 | FORWARD_IF_ERROR(dictID); |
|
2794 | 2958 | assert(dictID <= UINT_MAX); |
|
2795 | 2959 | cctx->dictID = (U32)dictID; |
@@ -2802,12 +2966,12 size_t ZSTD_compressBegin_advanced_inter | |||
|
2802 | 2966 | ZSTD_dictContentType_e dictContentType, |
|
2803 | 2967 | ZSTD_dictTableLoadMethod_e dtlm, |
|
2804 | 2968 | const ZSTD_CDict* cdict, |
|
2805 | ZSTD_CCtx_params params, | |
|
2969 | const ZSTD_CCtx_params* params, | |
|
2806 | 2970 | unsigned long long pledgedSrcSize) |
|
2807 | 2971 | { |
|
2808 |
DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params |
|
|
2972 | DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); | |
|
2809 | 2973 | /* compression parameters verification and optimization */ |
|
2810 |
FORWARD_IF_ERROR( ZSTD_checkCParams(params |
|
|
2974 | FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) ); | |
|
2811 | 2975 | return ZSTD_compressBegin_internal(cctx, |
|
2812 | 2976 | dict, dictSize, dictContentType, dtlm, |
|
2813 | 2977 | cdict, |
@@ -2822,21 +2986,21 size_t ZSTD_compressBegin_advanced(ZSTD_ | |||
|
2822 | 2986 | ZSTD_parameters params, unsigned long long pledgedSrcSize) |
|
2823 | 2987 | { |
|
2824 | 2988 | ZSTD_CCtx_params const cctxParams = |
|
2825 | ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); | |
|
2989 | ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); | |
|
2826 | 2990 | return ZSTD_compressBegin_advanced_internal(cctx, |
|
2827 | 2991 | dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, |
|
2828 | 2992 | NULL /*cdict*/, |
|
2829 | cctxParams, pledgedSrcSize); | |
|
2993 | &cctxParams, pledgedSrcSize); | |
|
2830 | 2994 | } |
|
2831 | 2995 | |
|
2832 | 2996 | size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) |
|
2833 | 2997 | { |
|
2834 | 2998 | ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); |
|
2835 | 2999 | ZSTD_CCtx_params const cctxParams = |
|
2836 | ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); | |
|
3000 | ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); | |
|
2837 | 3001 | DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); |
|
2838 | 3002 | return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, |
|
2839 | cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); | |
|
3003 | &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); | |
|
2840 | 3004 | } |
|
2841 | 3005 | |
|
2842 | 3006 | size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) |
@@ -2859,7 +3023,7 static size_t ZSTD_writeEpilogue(ZSTD_CC | |||
|
2859 | 3023 | |
|
2860 | 3024 | /* special case : empty frame */ |
|
2861 | 3025 | if (cctx->stage == ZSTDcs_init) { |
|
2862 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); | |
|
3026 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); | |
|
2863 | 3027 | FORWARD_IF_ERROR(fhSize); |
|
2864 | 3028 | dstCapacity -= fhSize; |
|
2865 | 3029 | op += fhSize; |
@@ -2920,13 +3084,13 static size_t ZSTD_compress_internal (ZS | |||
|
2920 | 3084 | ZSTD_parameters params) |
|
2921 | 3085 | { |
|
2922 | 3086 | ZSTD_CCtx_params const cctxParams = |
|
2923 | ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); | |
|
3087 | ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); | |
|
2924 | 3088 | DEBUGLOG(4, "ZSTD_compress_internal"); |
|
2925 | 3089 | return ZSTD_compress_advanced_internal(cctx, |
|
2926 | 3090 | dst, dstCapacity, |
|
2927 | 3091 | src, srcSize, |
|
2928 | 3092 | dict, dictSize, |
|
2929 | cctxParams); | |
|
3093 | &cctxParams); | |
|
2930 | 3094 | } |
|
2931 | 3095 | |
|
2932 | 3096 | size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, |
@@ -2950,7 +3114,7 size_t ZSTD_compress_advanced_internal( | |||
|
2950 | 3114 | void* dst, size_t dstCapacity, |
|
2951 | 3115 | const void* src, size_t srcSize, |
|
2952 | 3116 | const void* dict,size_t dictSize, |
|
2953 | ZSTD_CCtx_params params) | |
|
3117 | const ZSTD_CCtx_params* params) | |
|
2954 | 3118 | { |
|
2955 | 3119 | DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); |
|
2956 | 3120 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, |
@@ -2966,9 +3130,9 size_t ZSTD_compress_usingDict(ZSTD_CCtx | |||
|
2966 | 3130 | int compressionLevel) |
|
2967 | 3131 | { |
|
2968 | 3132 | ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); |
|
2969 | ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); | |
|
3133 | ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); | |
|
2970 | 3134 | assert(params.fParams.contentSizeFlag == 1); |
|
2971 | return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); | |
|
3135 | return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); | |
|
2972 | 3136 | } |
|
2973 | 3137 | |
|
2974 | 3138 | size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, |
@@ -3003,8 +3167,11 size_t ZSTD_estimateCDictSize_advanced( | |||
|
3003 | 3167 | ZSTD_dictLoadMethod_e dictLoadMethod) |
|
3004 | 3168 | { |
|
3005 | 3169 | DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); |
|
3006 | return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) | |
|
3007 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); | |
|
3170 | return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) | |
|
3171 | + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) | |
|
3172 | + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) | |
|
3173 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 | |
|
3174 | : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); | |
|
3008 | 3175 | } |
|
3009 | 3176 | |
|
3010 | 3177 | size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) |
@@ -3017,7 +3184,9 size_t ZSTD_sizeof_CDict(const ZSTD_CDic | |||
|
3017 | 3184 | { |
|
3018 | 3185 | if (cdict==NULL) return 0; /* support sizeof on NULL */ |
|
3019 | 3186 | DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); |
|
3020 | return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); | |
|
3187 | /* cdict may be in the workspace */ | |
|
3188 | return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) | |
|
3189 | + ZSTD_cwksp_sizeof(&cdict->workspace); | |
|
3021 | 3190 | } |
|
3022 | 3191 | |
|
3023 | 3192 | static size_t ZSTD_initCDict_internal( |
@@ -3031,28 +3200,29 static size_t ZSTD_initCDict_internal( | |||
|
3031 | 3200 | assert(!ZSTD_checkCParams(cParams)); |
|
3032 | 3201 | cdict->matchState.cParams = cParams; |
|
3033 | 3202 | if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { |
|
3034 | cdict->dictBuffer = NULL; | |
|
3035 | 3203 | cdict->dictContent = dictBuffer; |
|
3036 | 3204 | } else { |
|
3037 | void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); | |
|
3038 | cdict->dictBuffer = internalBuffer; | |
|
3205 | void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); | |
|
3206 | RETURN_ERROR_IF(!internalBuffer, memory_allocation); | |
|
3039 | 3207 | cdict->dictContent = internalBuffer; |
|
3040 | RETURN_ERROR_IF(!internalBuffer, memory_allocation); | |
|
3041 | 3208 | memcpy(internalBuffer, dictBuffer, dictSize); |
|
3042 | 3209 | } |
|
3043 | 3210 | cdict->dictContentSize = dictSize; |
|
3044 | 3211 | |
|
3212 | cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); | |
|
3213 | ||
|
3214 | ||
|
3045 | 3215 | /* Reset the state to no dictionary */ |
|
3046 | 3216 | ZSTD_reset_compressedBlockState(&cdict->cBlockState); |
|
3047 | { void* const end = ZSTD_reset_matchState(&cdict->matchState, | |
|
3048 | (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, | |
|
3049 | &cParams, | |
|
3050 | ZSTDcrp_continue, ZSTD_resetTarget_CDict); | |
|
3051 | assert(end == (char*)cdict->workspace + cdict->workspaceSize); | |
|
3052 | (void)end; | |
|
3053 | } | |
|
3217 | FORWARD_IF_ERROR(ZSTD_reset_matchState( | |
|
3218 | &cdict->matchState, | |
|
3219 | &cdict->workspace, | |
|
3220 | &cParams, | |
|
3221 | ZSTDcrp_makeClean, | |
|
3222 | ZSTDirp_reset, | |
|
3223 | ZSTD_resetTarget_CDict)); | |
|
3054 | 3224 | /* (Maybe) load the dictionary |
|
3055 |
* Skips loading the dictionary if it is < |
|
|
3225 | * Skips loading the dictionary if it is < 8 bytes. | |
|
3056 | 3226 | */ |
|
3057 | 3227 | { ZSTD_CCtx_params params; |
|
3058 | 3228 | memset(¶ms, 0, sizeof(params)); |
@@ -3060,9 +3230,9 static size_t ZSTD_initCDict_internal( | |||
|
3060 | 3230 | params.fParams.contentSizeFlag = 1; |
|
3061 | 3231 | params.cParams = cParams; |
|
3062 | 3232 | { size_t const dictID = ZSTD_compress_insertDictionary( |
|
3063 |
&cdict->cBlockState, &cdict->matchState, & |
|
|
3064 | cdict->dictContent, cdict->dictContentSize, | |
|
3065 |
dictContentType, ZSTD_dtlm_full, cdict-> |
|
|
3233 | &cdict->cBlockState, &cdict->matchState, &cdict->workspace, | |
|
3234 | ¶ms, cdict->dictContent, cdict->dictContentSize, | |
|
3235 | dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); | |
|
3066 | 3236 | FORWARD_IF_ERROR(dictID); |
|
3067 | 3237 | assert(dictID <= (size_t)(U32)-1); |
|
3068 | 3238 | cdict->dictID = (U32)dictID; |
@@ -3080,18 +3250,29 ZSTD_CDict* ZSTD_createCDict_advanced(co | |||
|
3080 | 3250 | DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); |
|
3081 | 3251 | if (!customMem.customAlloc ^ !customMem.customFree) return NULL; |
|
3082 | 3252 | |
|
3083 | { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); | |
|
3084 | size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); | |
|
3253 | { size_t const workspaceSize = | |
|
3254 | ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + | |
|
3255 | ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + | |
|
3256 | ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + | |
|
3257 | (dictLoadMethod == ZSTD_dlm_byRef ? 0 | |
|
3258 | : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); | |
|
3085 | 3259 | void* const workspace = ZSTD_malloc(workspaceSize, customMem); |
|
3086 | ||
|
3087 | if (!cdict || !workspace) { | |
|
3088 | ZSTD_free(cdict, customMem); | |
|
3260 | ZSTD_cwksp ws; | |
|
3261 | ZSTD_CDict* cdict; | |
|
3262 | ||
|
3263 | if (!workspace) { | |
|
3089 | 3264 | ZSTD_free(workspace, customMem); |
|
3090 | 3265 | return NULL; |
|
3091 | 3266 | } |
|
3267 | ||
|
3268 | ZSTD_cwksp_init(&ws, workspace, workspaceSize); | |
|
3269 | ||
|
3270 | cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); | |
|
3271 | assert(cdict != NULL); | |
|
3272 | ZSTD_cwksp_move(&cdict->workspace, &ws); | |
|
3092 | 3273 | cdict->customMem = customMem; |
|
3093 | cdict->workspace = workspace; | |
|
3094 | cdict->workspaceSize = workspaceSize; | |
|
3274 | cdict->compressionLevel = 0; /* signals advanced API usage */ | |
|
3275 | ||
|
3095 | 3276 | if (ZSTD_isError( ZSTD_initCDict_internal(cdict, |
|
3096 | 3277 | dictBuffer, dictSize, |
|
3097 | 3278 | dictLoadMethod, dictContentType, |
@@ -3107,9 +3288,12 ZSTD_CDict* ZSTD_createCDict_advanced(co | |||
|
3107 | 3288 | ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) |
|
3108 | 3289 | { |
|
3109 | 3290 | ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); |
|
3110 |
|
|
|
3111 | ZSTD_dlm_byCopy, ZSTD_dct_auto, | |
|
3112 | cParams, ZSTD_defaultCMem); | |
|
3291 | ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, | |
|
3292 | ZSTD_dlm_byCopy, ZSTD_dct_auto, | |
|
3293 | cParams, ZSTD_defaultCMem); | |
|
3294 | if (cdict) | |
|
3295 | cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; | |
|
3296 | return cdict; | |
|
3113 | 3297 | } |
|
3114 | 3298 | |
|
3115 | 3299 | ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) |
@@ -3124,9 +3308,11 size_t ZSTD_freeCDict(ZSTD_CDict* cdict) | |||
|
3124 | 3308 | { |
|
3125 | 3309 | if (cdict==NULL) return 0; /* support free on NULL */ |
|
3126 | 3310 | { ZSTD_customMem const cMem = cdict->customMem; |
|
3127 |
ZSTD_fr |
|
|
3128 |
ZSTD_free(cdict-> |
|
|
3129 | ZSTD_free(cdict, cMem); | |
|
3311 | int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); | |
|
3312 | ZSTD_cwksp_free(&cdict->workspace, cMem); | |
|
3313 | if (!cdictInWorkspace) { | |
|
3314 | ZSTD_free(cdict, cMem); | |
|
3315 | } | |
|
3130 | 3316 | return 0; |
|
3131 | 3317 | } |
|
3132 | 3318 | } |
@@ -3152,28 +3338,30 const ZSTD_CDict* ZSTD_initStaticCDict( | |||
|
3152 | 3338 | ZSTD_compressionParameters cParams) |
|
3153 | 3339 | { |
|
3154 | 3340 | size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); |
|
3155 | size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) | |
|
3156 | + HUF_WORKSPACE_SIZE + matchStateSize; | |
|
3157 | ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; | |
|
3158 | void* ptr; | |
|
3341 | size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) | |
|
3342 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 | |
|
3343 | : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) | |
|
3344 | + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) | |
|
3345 | + matchStateSize; | |
|
3346 | ZSTD_CDict* cdict; | |
|
3347 | ||
|
3159 | 3348 | if ((size_t)workspace & 7) return NULL; /* 8-aligned */ |
|
3349 | ||
|
3350 | { | |
|
3351 | ZSTD_cwksp ws; | |
|
3352 | ZSTD_cwksp_init(&ws, workspace, workspaceSize); | |
|
3353 | cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); | |
|
3354 | if (cdict == NULL) return NULL; | |
|
3355 | ZSTD_cwksp_move(&cdict->workspace, &ws); | |
|
3356 | } | |
|
3357 | ||
|
3160 | 3358 | DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", |
|
3161 | 3359 | (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); |
|
3162 | 3360 | if (workspaceSize < neededSize) return NULL; |
|
3163 | 3361 | |
|
3164 | if (dictLoadMethod == ZSTD_dlm_byCopy) { | |
|
3165 | memcpy(cdict+1, dict, dictSize); | |
|
3166 | dict = cdict+1; | |
|
3167 | ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; | |
|
3168 | } else { | |
|
3169 | ptr = cdict+1; | |
|
3170 | } | |
|
3171 | cdict->workspace = ptr; | |
|
3172 | cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; | |
|
3173 | ||
|
3174 | 3362 | if (ZSTD_isError( ZSTD_initCDict_internal(cdict, |
|
3175 | 3363 | dict, dictSize, |
|
3176 |
|
|
|
3364 | dictLoadMethod, dictContentType, | |
|
3177 | 3365 | cParams) )) |
|
3178 | 3366 | return NULL; |
|
3179 | 3367 | |
@@ -3195,7 +3383,15 size_t ZSTD_compressBegin_usingCDict_adv | |||
|
3195 | 3383 | DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); |
|
3196 | 3384 | RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); |
|
3197 | 3385 | { ZSTD_CCtx_params params = cctx->requestedParams; |
|
3198 | params.cParams = ZSTD_getCParamsFromCDict(cdict); | |
|
3386 | params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF | |
|
3387 | || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER | |
|
3388 | || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN | |
|
3389 | || cdict->compressionLevel == 0 ) | |
|
3390 | && (params.attachDictPref != ZSTD_dictForceLoad) ? | |
|
3391 | ZSTD_getCParamsFromCDict(cdict) | |
|
3392 | : ZSTD_getCParams(cdict->compressionLevel, | |
|
3393 | pledgedSrcSize, | |
|
3394 | cdict->dictContentSize); | |
|
3199 | 3395 | /* Increase window log to fit the entire dictionary and source if the |
|
3200 | 3396 | * source size is known. Limit the increase to 19, which is the |
|
3201 | 3397 | * window log for compression level 1 with the largest source size. |
@@ -3209,7 +3405,7 size_t ZSTD_compressBegin_usingCDict_adv | |||
|
3209 | 3405 | return ZSTD_compressBegin_internal(cctx, |
|
3210 | 3406 | NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, |
|
3211 | 3407 | cdict, |
|
3212 | params, pledgedSrcSize, | |
|
3408 | ¶ms, pledgedSrcSize, | |
|
3213 | 3409 | ZSTDb_not_buffered); |
|
3214 | 3410 | } |
|
3215 | 3411 | } |
@@ -3300,7 +3496,7 static size_t ZSTD_resetCStream_internal | |||
|
3300 | 3496 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, |
|
3301 | 3497 | dict, dictSize, dictContentType, ZSTD_dtlm_fast, |
|
3302 | 3498 | cdict, |
|
3303 | params, pledgedSrcSize, | |
|
3499 | ¶ms, pledgedSrcSize, | |
|
3304 | 3500 | ZSTDb_buffered) ); |
|
3305 | 3501 | |
|
3306 | 3502 | cctx->inToCompress = 0; |
@@ -3334,13 +3530,14 size_t ZSTD_resetCStream(ZSTD_CStream* z | |||
|
3334 | 3530 | * Assumption 2 : either dict, or cdict, is defined, not both */ |
|
3335 | 3531 | size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, |
|
3336 | 3532 | const void* dict, size_t dictSize, const ZSTD_CDict* cdict, |
|
3337 |
ZSTD_CCtx_params params, |
|
|
3533 | const ZSTD_CCtx_params* params, | |
|
3534 | unsigned long long pledgedSrcSize) | |
|
3338 | 3535 | { |
|
3339 | 3536 | DEBUGLOG(4, "ZSTD_initCStream_internal"); |
|
3340 | 3537 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); |
|
3341 | 3538 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); |
|
3342 |
assert(!ZSTD_isError(ZSTD_checkCParams(params |
|
|
3343 | zcs->requestedParams = params; | |
|
3539 | assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); | |
|
3540 | zcs->requestedParams = *params; | |
|
3344 | 3541 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
|
3345 | 3542 | if (dict) { |
|
3346 | 3543 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); |
@@ -3379,7 +3576,7 size_t ZSTD_initCStream_usingCDict(ZSTD_ | |||
|
3379 | 3576 | /* ZSTD_initCStream_advanced() : |
|
3380 | 3577 | * pledgedSrcSize must be exact. |
|
3381 | 3578 | * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. |
|
3382 |
* dict is loaded with default parameters ZSTD_d |
|
|
3579 | * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ | |
|
3383 | 3580 | size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, |
|
3384 | 3581 | const void* dict, size_t dictSize, |
|
3385 | 3582 | ZSTD_parameters params, unsigned long long pss) |
@@ -3393,7 +3590,7 size_t ZSTD_initCStream_advanced(ZSTD_CS | |||
|
3393 | 3590 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); |
|
3394 | 3591 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); |
|
3395 | 3592 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); |
|
3396 | zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); | |
|
3593 | zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params); | |
|
3397 | 3594 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); |
|
3398 | 3595 | return 0; |
|
3399 | 3596 | } |
@@ -3643,7 +3840,7 size_t ZSTD_compressStream2( ZSTD_CCtx* | |||
|
3643 | 3840 | if (cctx->mtctx == NULL) { |
|
3644 | 3841 | DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", |
|
3645 | 3842 | params.nbWorkers); |
|
3646 | cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); | |
|
3843 | cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); | |
|
3647 | 3844 | RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); |
|
3648 | 3845 | } |
|
3649 | 3846 | /* mt compression */ |
@@ -3771,8 +3968,8 static const ZSTD_compressionParameters | |||
|
3771 | 3968 | { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ |
|
3772 | 3969 | { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ |
|
3773 | 3970 | { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ |
|
3774 |
{ 21, 16, 17, 1, 5, |
|
|
3775 |
{ 21, 18, 18, 1, 5, |
|
|
3971 | { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ | |
|
3972 | { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ | |
|
3776 | 3973 | { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ |
|
3777 | 3974 | { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ |
|
3778 | 3975 | { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ |
@@ -3796,8 +3993,8 static const ZSTD_compressionParameters | |||
|
3796 | 3993 | /* W, C, H, S, L, T, strat */ |
|
3797 | 3994 | { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ |
|
3798 | 3995 | { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ |
|
3799 |
{ 18, 14, 14, 1, 5, |
|
|
3800 |
{ 18, 16, 16, 1, 4, |
|
|
3996 | { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ | |
|
3997 | { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ | |
|
3801 | 3998 | { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ |
|
3802 | 3999 | { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ |
|
3803 | 4000 | { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ |
@@ -3823,8 +4020,8 static const ZSTD_compressionParameters | |||
|
3823 | 4020 | { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ |
|
3824 | 4021 | { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ |
|
3825 | 4022 | { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ |
|
3826 |
{ 17, 15, 16, 2, 5, |
|
|
3827 |
{ 17, 17, 17, 2, 4, |
|
|
4023 | { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ | |
|
4024 | { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ | |
|
3828 | 4025 | { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ |
|
3829 | 4026 | { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ |
|
3830 | 4027 | { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ |
@@ -3849,7 +4046,7 static const ZSTD_compressionParameters | |||
|
3849 | 4046 | { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ |
|
3850 | 4047 | { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ |
|
3851 | 4048 | { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ |
|
3852 |
{ 14, 14, 15, 2, 4, |
|
|
4049 | { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ | |
|
3853 | 4050 | { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ |
|
3854 | 4051 | { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ |
|
3855 | 4052 | { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ |
@@ -19,6 +19,7 | |||
|
19 | 19 | * Dependencies |
|
20 | 20 | ***************************************/ |
|
21 | 21 | #include "zstd_internal.h" |
|
22 | #include "zstd_cwksp.h" | |
|
22 | 23 | #ifdef ZSTD_MULTITHREAD |
|
23 | 24 | # include "zstdmt_compress.h" |
|
24 | 25 | #endif |
@@ -192,6 +193,13 typedef struct { | |||
|
192 | 193 | size_t capacity; /* The capacity starting from `seq` pointer */ |
|
193 | 194 | } rawSeqStore_t; |
|
194 | 195 | |
|
196 | typedef struct { | |
|
197 | int collectSequences; | |
|
198 | ZSTD_Sequence* seqStart; | |
|
199 | size_t seqIndex; | |
|
200 | size_t maxSequences; | |
|
201 | } SeqCollector; | |
|
202 | ||
|
195 | 203 | struct ZSTD_CCtx_params_s { |
|
196 | 204 | ZSTD_format_e format; |
|
197 | 205 | ZSTD_compressionParameters cParams; |
@@ -203,6 +211,9 struct ZSTD_CCtx_params_s { | |||
|
203 | 211 | size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. |
|
204 | 212 | * No target when targetCBlockSize == 0. |
|
205 | 213 | * There is no guarantee on compressed block size */ |
|
214 | int srcSizeHint; /* User's best guess of source size. | |
|
215 | * Hint is not valid when srcSizeHint == 0. | |
|
216 | * There is no guarantee that hint is close to actual source size */ | |
|
206 | 217 | |
|
207 | 218 | ZSTD_dictAttachPref_e attachDictPref; |
|
208 | 219 | ZSTD_literalCompressionMode_e literalCompressionMode; |
@@ -228,9 +239,7 struct ZSTD_CCtx_s { | |||
|
228 | 239 | ZSTD_CCtx_params appliedParams; |
|
229 | 240 | U32 dictID; |
|
230 | 241 | |
|
231 | int workSpaceOversizedDuration; | |
|
232 | void* workSpace; | |
|
233 | size_t workSpaceSize; | |
|
242 | ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ | |
|
234 | 243 | size_t blockSize; |
|
235 | 244 | unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ |
|
236 | 245 | unsigned long long consumedSrcSize; |
@@ -238,6 +247,8 struct ZSTD_CCtx_s { | |||
|
238 | 247 | XXH64_state_t xxhState; |
|
239 | 248 | ZSTD_customMem customMem; |
|
240 | 249 | size_t staticSize; |
|
250 | SeqCollector seqCollector; | |
|
251 | int isFirstBlock; | |
|
241 | 252 | |
|
242 | 253 | seqStore_t seqStore; /* sequences storage ptrs */ |
|
243 | 254 | ldmState_t ldmState; /* long distance matching state */ |
@@ -337,26 +348,57 MEM_STATIC size_t ZSTD_minGain(size_t sr | |||
|
337 | 348 | return (srcSize >> minlog) + 2; |
|
338 | 349 | } |
|
339 | 350 | |
|
351 | /*! ZSTD_safecopyLiterals() : | |
|
352 | * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. | |
|
353 | * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single | |
|
354 | * large copies. | |
|
355 | */ | |
|
356 | static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { | |
|
357 | assert(iend > ilimit_w); | |
|
358 | if (ip <= ilimit_w) { | |
|
359 | ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); | |
|
360 | op += ilimit_w - ip; | |
|
361 | ip = ilimit_w; | |
|
362 | } | |
|
363 | while (ip < iend) *op++ = *ip++; | |
|
364 | } | |
|
365 | ||
|
340 | 366 | /*! ZSTD_storeSeq() : |
|
341 |
* Store a sequence (lit |
|
|
342 |
* `off |
|
|
367 | * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. | |
|
368 | * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). | |
|
343 | 369 | * `mlBase` : matchLength - MINMATCH |
|
370 | * Allowed to overread literals up to litLimit. | |
|
344 | 371 | */ |
|
345 | MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) | |
|
372 | HINT_INLINE UNUSED_ATTR | |
|
373 | void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) | |
|
346 | 374 | { |
|
375 | BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; | |
|
376 | BYTE const* const litEnd = literals + litLength; | |
|
347 | 377 | #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) |
|
348 | 378 | static const BYTE* g_start = NULL; |
|
349 | 379 | if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ |
|
350 | 380 | { U32 const pos = (U32)((const BYTE*)literals - g_start); |
|
351 | 381 | DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", |
|
352 |
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)off |
|
|
382 | pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); | |
|
353 | 383 | } |
|
354 | 384 | #endif |
|
355 | 385 | assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); |
|
356 | 386 | /* copy Literals */ |
|
357 | 387 | assert(seqStorePtr->maxNbLit <= 128 KB); |
|
358 | 388 | assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); |
|
359 | ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); | |
|
389 | assert(literals + litLength <= litLimit); | |
|
390 | if (litEnd <= litLimit_w) { | |
|
391 | /* Common case we can use wildcopy. | |
|
392 | * First copy 16 bytes, because literals are likely short. | |
|
393 | */ | |
|
394 | assert(WILDCOPY_OVERLENGTH >= 16); | |
|
395 | ZSTD_copy16(seqStorePtr->lit, literals); | |
|
396 | if (litLength > 16) { | |
|
397 | ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); | |
|
398 | } | |
|
399 | } else { | |
|
400 | ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); | |
|
401 | } | |
|
360 | 402 | seqStorePtr->lit += litLength; |
|
361 | 403 | |
|
362 | 404 | /* literal Length */ |
@@ -368,7 +410,7 MEM_STATIC void ZSTD_storeSeq(seqStore_t | |||
|
368 | 410 | seqStorePtr->sequences[0].litLength = (U16)litLength; |
|
369 | 411 | |
|
370 | 412 | /* match offset */ |
|
371 |
seqStorePtr->sequences[0].offset = off |
|
|
413 | seqStorePtr->sequences[0].offset = offCode + 1; | |
|
372 | 414 | |
|
373 | 415 | /* match Length */ |
|
374 | 416 | if (mlBase>0xFFFF) { |
@@ -910,7 +952,7 ZSTD_compressionParameters ZSTD_getCPara | |||
|
910 | 952 | size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, |
|
911 | 953 | const void* dict, size_t dictSize, |
|
912 | 954 | const ZSTD_CDict* cdict, |
|
913 |
ZSTD_CCtx_params |
|
|
955 | const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); | |
|
914 | 956 | |
|
915 | 957 | void ZSTD_resetSeqStore(seqStore_t* ssPtr); |
|
916 | 958 | |
@@ -925,7 +967,7 size_t ZSTD_compressBegin_advanced_inter | |||
|
925 | 967 | ZSTD_dictContentType_e dictContentType, |
|
926 | 968 | ZSTD_dictTableLoadMethod_e dtlm, |
|
927 | 969 | const ZSTD_CDict* cdict, |
|
928 | ZSTD_CCtx_params params, | |
|
970 | const ZSTD_CCtx_params* params, | |
|
929 | 971 | unsigned long long pledgedSrcSize); |
|
930 | 972 | |
|
931 | 973 | /* ZSTD_compress_advanced_internal() : |
@@ -934,7 +976,7 size_t ZSTD_compress_advanced_internal(Z | |||
|
934 | 976 | void* dst, size_t dstCapacity, |
|
935 | 977 | const void* src, size_t srcSize, |
|
936 | 978 | const void* dict,size_t dictSize, |
|
937 | ZSTD_CCtx_params params); | |
|
979 | const ZSTD_CCtx_params* params); | |
|
938 | 980 | |
|
939 | 981 | |
|
940 | 982 | /* ZSTD_writeLastEmptyBlock() : |
@@ -70,7 +70,7 size_t ZSTD_compressLiterals (ZSTD_hufCT | |||
|
70 | 70 | ZSTD_strategy strategy, int disableLiteralCompression, |
|
71 | 71 | void* dst, size_t dstCapacity, |
|
72 | 72 | const void* src, size_t srcSize, |
|
73 |
void* |
|
|
73 | void* entropyWorkspace, size_t entropyWorkspaceSize, | |
|
74 | 74 | const int bmi2) |
|
75 | 75 | { |
|
76 | 76 | size_t const minGain = ZSTD_minGain(srcSize, strategy); |
@@ -99,10 +99,15 size_t ZSTD_compressLiterals (ZSTD_hufCT | |||
|
99 | 99 | { HUF_repeat repeat = prevHuf->repeatMode; |
|
100 | 100 | int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; |
|
101 | 101 | if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; |
|
102 | cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, | |
|
103 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) | |
|
104 |
|
|
|
105 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); | |
|
102 | cLitSize = singleStream ? | |
|
103 | HUF_compress1X_repeat( | |
|
104 | ostart+lhSize, dstCapacity-lhSize, src, srcSize, | |
|
105 | 255, 11, entropyWorkspace, entropyWorkspaceSize, | |
|
106 | (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : | |
|
107 | HUF_compress4X_repeat( | |
|
108 | ostart+lhSize, dstCapacity-lhSize, src, srcSize, | |
|
109 | 255, 11, entropyWorkspace, entropyWorkspaceSize, | |
|
110 | (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); | |
|
106 | 111 | if (repeat != HUF_repeat_none) { |
|
107 | 112 | /* reused the existing table */ |
|
108 | 113 | hType = set_repeat; |
@@ -23,7 +23,7 size_t ZSTD_compressLiterals (ZSTD_hufCT | |||
|
23 | 23 | ZSTD_strategy strategy, int disableLiteralCompression, |
|
24 | 24 | void* dst, size_t dstCapacity, |
|
25 | 25 | const void* src, size_t srcSize, |
|
26 |
void* |
|
|
26 | void* entropyWorkspace, size_t entropyWorkspaceSize, | |
|
27 | 27 | const int bmi2); |
|
28 | 28 | |
|
29 | 29 | #endif /* ZSTD_COMPRESS_LITERALS_H */ |
@@ -222,7 +222,7 ZSTD_buildCTable(void* dst, size_t dstCa | |||
|
222 | 222 | const BYTE* codeTable, size_t nbSeq, |
|
223 | 223 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, |
|
224 | 224 | const FSE_CTable* prevCTable, size_t prevCTableSize, |
|
225 |
void* |
|
|
225 | void* entropyWorkspace, size_t entropyWorkspaceSize) | |
|
226 | 226 | { |
|
227 | 227 | BYTE* op = (BYTE*)dst; |
|
228 | 228 | const BYTE* const oend = op + dstCapacity; |
@@ -238,7 +238,7 ZSTD_buildCTable(void* dst, size_t dstCa | |||
|
238 | 238 | memcpy(nextCTable, prevCTable, prevCTableSize); |
|
239 | 239 | return 0; |
|
240 | 240 | case set_basic: |
|
241 |
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, |
|
|
241 | FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */ | |
|
242 | 242 | return 0; |
|
243 | 243 | case set_compressed: { |
|
244 | 244 | S16 norm[MaxSeq + 1]; |
@@ -252,7 +252,7 ZSTD_buildCTable(void* dst, size_t dstCa | |||
|
252 | 252 | FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); |
|
253 | 253 | { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ |
|
254 | 254 | FORWARD_IF_ERROR(NCountSize); |
|
255 |
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, |
|
|
255 | FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize)); | |
|
256 | 256 | return NCountSize; |
|
257 | 257 | } |
|
258 | 258 | } |
@@ -35,7 +35,7 ZSTD_buildCTable(void* dst, size_t dstCa | |||
|
35 | 35 | const BYTE* codeTable, size_t nbSeq, |
|
36 | 36 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, |
|
37 | 37 | const FSE_CTable* prevCTable, size_t prevCTableSize, |
|
38 |
void* |
|
|
38 | void* entropyWorkspace, size_t entropyWorkspaceSize); | |
|
39 | 39 | |
|
40 | 40 | size_t ZSTD_encodeSequences( |
|
41 | 41 | void* dst, size_t dstCapacity, |
@@ -148,7 +148,7 size_t ZSTD_compressBlock_doubleFast_gen | |||
|
148 | 148 | const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; |
|
149 | 149 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; |
|
150 | 150 | ip++; |
|
151 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
151 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |
|
152 | 152 | goto _match_stored; |
|
153 | 153 | } |
|
154 | 154 | |
@@ -157,7 +157,7 size_t ZSTD_compressBlock_doubleFast_gen | |||
|
157 | 157 | && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { |
|
158 | 158 | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; |
|
159 | 159 | ip++; |
|
160 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
160 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |
|
161 | 161 | goto _match_stored; |
|
162 | 162 | } |
|
163 | 163 | |
@@ -247,7 +247,7 size_t ZSTD_compressBlock_doubleFast_gen | |||
|
247 | 247 | offset_2 = offset_1; |
|
248 | 248 | offset_1 = offset; |
|
249 | 249 | |
|
250 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
250 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
251 | 251 | |
|
252 | 252 | _match_stored: |
|
253 | 253 | /* match found */ |
@@ -278,7 +278,7 size_t ZSTD_compressBlock_doubleFast_gen | |||
|
278 | 278 | const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; |
|
279 | 279 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; |
|
280 | 280 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
|
281 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); | |
|
281 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |
|
282 | 282 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; |
|
283 | 283 | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; |
|
284 | 284 | ip += repLength2; |
@@ -297,7 +297,7 size_t ZSTD_compressBlock_doubleFast_gen | |||
|
297 | 297 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ |
|
298 | 298 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); |
|
299 | 299 | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); |
|
300 | ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); | |
|
300 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); | |
|
301 | 301 | ip += rLength; |
|
302 | 302 | anchor = ip; |
|
303 | 303 | continue; /* faster when present ... (?) */ |
@@ -411,7 +411,7 static size_t ZSTD_compressBlock_doubleF | |||
|
411 | 411 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
412 | 412 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
413 | 413 | ip++; |
|
414 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
414 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |
|
415 | 415 | } else { |
|
416 | 416 | if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { |
|
417 | 417 | const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; |
@@ -422,7 +422,7 static size_t ZSTD_compressBlock_doubleF | |||
|
422 | 422 | while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
|
423 | 423 | offset_2 = offset_1; |
|
424 | 424 | offset_1 = offset; |
|
425 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
425 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
426 | 426 | |
|
427 | 427 | } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { |
|
428 | 428 | size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
@@ -447,7 +447,7 static size_t ZSTD_compressBlock_doubleF | |||
|
447 | 447 | } |
|
448 | 448 | offset_2 = offset_1; |
|
449 | 449 | offset_1 = offset; |
|
450 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
450 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
451 | 451 | |
|
452 | 452 | } else { |
|
453 | 453 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
@@ -479,7 +479,7 static size_t ZSTD_compressBlock_doubleF | |||
|
479 | 479 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
|
480 | 480 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
|
481 | 481 | U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
|
482 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); | |
|
482 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |
|
483 | 483 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; |
|
484 | 484 | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; |
|
485 | 485 | ip += repLength2; |
@@ -8,7 +8,7 | |||
|
8 | 8 | * You may select, at your option, one of the above-listed licenses. |
|
9 | 9 | */ |
|
10 | 10 | |
|
11 | #include "zstd_compress_internal.h" | |
|
11 | #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ | |
|
12 | 12 | #include "zstd_fast.h" |
|
13 | 13 | |
|
14 | 14 | |
@@ -43,8 +43,8 void ZSTD_fillHashTable(ZSTD_matchState_ | |||
|
43 | 43 | } |
|
44 | 44 | |
|
45 | 45 | |
|
46 | FORCE_INLINE_TEMPLATE | |
|
47 |
|
|
|
46 | FORCE_INLINE_TEMPLATE size_t | |
|
47 | ZSTD_compressBlock_fast_generic( | |
|
48 | 48 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
49 | 49 | void const* src, size_t srcSize, |
|
50 | 50 | U32 const mls) |
@@ -74,8 +74,7 size_t ZSTD_compressBlock_fast_generic( | |||
|
74 | 74 | DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); |
|
75 | 75 | ip0 += (ip0 == prefixStart); |
|
76 | 76 | ip1 = ip0 + 1; |
|
77 | { | |
|
78 | U32 const maxRep = (U32)(ip0 - prefixStart); | |
|
77 | { U32 const maxRep = (U32)(ip0 - prefixStart); | |
|
79 | 78 | if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; |
|
80 | 79 | if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; |
|
81 | 80 | } |
@@ -118,8 +117,7 size_t ZSTD_compressBlock_fast_generic( | |||
|
118 | 117 | match0 = match1; |
|
119 | 118 | goto _offset; |
|
120 | 119 | } |
|
121 | { | |
|
122 | size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; | |
|
120 | { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; | |
|
123 | 121 | assert(step >= 2); |
|
124 | 122 | ip0 += step; |
|
125 | 123 | ip1 += step; |
@@ -138,7 +136,7 size_t ZSTD_compressBlock_fast_generic( | |||
|
138 | 136 | _match: /* Requires: ip0, match0, offcode */ |
|
139 | 137 | /* Count the forward length */ |
|
140 | 138 | mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; |
|
141 | ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); | |
|
139 | ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); | |
|
142 | 140 | /* match found */ |
|
143 | 141 | ip0 += mLength; |
|
144 | 142 | anchor = ip0; |
@@ -150,16 +148,15 size_t ZSTD_compressBlock_fast_generic( | |||
|
150 | 148 | hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ |
|
151 | 149 | hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); |
|
152 | 150 | |
|
153 | while ( (ip0 <= ilimit) | |
|
154 |
&& ( (offset_2 |
|
|
155 | & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { | |
|
151 | while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */ | |
|
152 | && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { | |
|
156 | 153 | /* store sequence */ |
|
157 | 154 | size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; |
|
158 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ | |
|
155 | { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ | |
|
159 | 156 | hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); |
|
160 | 157 | ip0 += rLength; |
|
161 | 158 | ip1 = ip0 + 1; |
|
162 | ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); | |
|
159 | ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); | |
|
163 | 160 | anchor = ip0; |
|
164 | 161 | continue; /* faster when present (confirmed on gcc-8) ... (?) */ |
|
165 | 162 | } |
@@ -179,8 +176,7 size_t ZSTD_compressBlock_fast( | |||
|
179 | 176 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
180 | 177 | void const* src, size_t srcSize) |
|
181 | 178 | { |
|
182 | ZSTD_compressionParameters const* cParams = &ms->cParams; | |
|
183 | U32 const mls = cParams->minMatch; | |
|
179 | U32 const mls = ms->cParams.minMatch; | |
|
184 | 180 | assert(ms->dictMatchState == NULL); |
|
185 | 181 | switch(mls) |
|
186 | 182 | { |
@@ -265,7 +261,7 size_t ZSTD_compressBlock_fast_dictMatch | |||
|
265 | 261 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
266 | 262 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
267 | 263 | ip++; |
|
268 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
264 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |
|
269 | 265 | } else if ( (matchIndex <= prefixStartIndex) ) { |
|
270 | 266 | size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); |
|
271 | 267 | U32 const dictMatchIndex = dictHashTable[dictHash]; |
@@ -285,7 +281,7 size_t ZSTD_compressBlock_fast_dictMatch | |||
|
285 | 281 | } /* catch up */ |
|
286 | 282 | offset_2 = offset_1; |
|
287 | 283 | offset_1 = offset; |
|
288 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
284 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
289 | 285 | } |
|
290 | 286 | } else if (MEM_read32(match) != MEM_read32(ip)) { |
|
291 | 287 | /* it's not a match, and we're not going to check the dictionary */ |
@@ -300,7 +296,7 size_t ZSTD_compressBlock_fast_dictMatch | |||
|
300 | 296 | && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
|
301 | 297 | offset_2 = offset_1; |
|
302 | 298 | offset_1 = offset; |
|
303 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
299 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
304 | 300 | } |
|
305 | 301 | |
|
306 | 302 | /* match found */ |
@@ -325,7 +321,7 size_t ZSTD_compressBlock_fast_dictMatch | |||
|
325 | 321 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
|
326 | 322 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
|
327 | 323 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
|
328 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); | |
|
324 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |
|
329 | 325 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; |
|
330 | 326 | ip += repLength2; |
|
331 | 327 | anchor = ip; |
@@ -348,8 +344,7 size_t ZSTD_compressBlock_fast_dictMatch | |||
|
348 | 344 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
349 | 345 | void const* src, size_t srcSize) |
|
350 | 346 | { |
|
351 | ZSTD_compressionParameters const* cParams = &ms->cParams; | |
|
352 | U32 const mls = cParams->minMatch; | |
|
347 | U32 const mls = ms->cParams.minMatch; | |
|
353 | 348 | assert(ms->dictMatchState != NULL); |
|
354 | 349 | switch(mls) |
|
355 | 350 | { |
@@ -408,16 +403,17 static size_t ZSTD_compressBlock_fast_ex | |||
|
408 | 403 | const U32 repIndex = current + 1 - offset_1; |
|
409 | 404 | const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; |
|
410 | 405 | const BYTE* const repMatch = repBase + repIndex; |
|
411 | size_t mLength; | |
|
412 | 406 | hashTable[h] = current; /* update hash table */ |
|
413 | 407 | assert(offset_1 <= current +1); /* check repIndex */ |
|
414 | 408 | |
|
415 | 409 | if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) |
|
416 | 410 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
|
417 | 411 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
418 |
|
|
|
412 | size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; | |
|
419 | 413 | ip++; |
|
420 |
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, |
|
|
414 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); | |
|
415 | ip += rLength; | |
|
416 | anchor = ip; | |
|
421 | 417 | } else { |
|
422 | 418 | if ( (matchIndex < dictStartIndex) || |
|
423 | 419 | (MEM_read32(match) != MEM_read32(ip)) ) { |
@@ -427,19 +423,15 static size_t ZSTD_compressBlock_fast_ex | |||
|
427 | 423 | } |
|
428 | 424 | { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; |
|
429 | 425 | const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; |
|
430 | U32 offset; | |
|
431 | mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; | |
|
426 | U32 const offset = current - matchIndex; | |
|
427 | size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; | |
|
432 | 428 | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
|
433 | offset = current - matchIndex; | |
|
434 | offset_2 = offset_1; | |
|
435 |
|
|
|
436 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
429 | offset_2 = offset_1; offset_1 = offset; /* update offset history */ | |
|
430 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
431 | ip += mLength; | |
|
432 | anchor = ip; | |
|
437 | 433 | } } |
|
438 | 434 | |
|
439 | /* found a match : store it */ | |
|
440 | ip += mLength; | |
|
441 | anchor = ip; | |
|
442 | ||
|
443 | 435 | if (ip <= ilimit) { |
|
444 | 436 | /* Fill Table */ |
|
445 | 437 | hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; |
@@ -448,13 +440,13 static size_t ZSTD_compressBlock_fast_ex | |||
|
448 | 440 | while (ip <= ilimit) { |
|
449 | 441 | U32 const current2 = (U32)(ip-base); |
|
450 | 442 | U32 const repIndex2 = current2 - offset_2; |
|
451 | const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; | |
|
443 | const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; | |
|
452 | 444 | if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ |
|
453 | 445 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
|
454 | 446 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
|
455 | 447 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
|
456 | U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |
|
457 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); | |
|
448 | { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ | |
|
449 | ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); | |
|
458 | 450 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; |
|
459 | 451 | ip += repLength2; |
|
460 | 452 | anchor = ip; |
@@ -476,8 +468,7 size_t ZSTD_compressBlock_fast_extDict( | |||
|
476 | 468 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
477 | 469 | void const* src, size_t srcSize) |
|
478 | 470 | { |
|
479 | ZSTD_compressionParameters const* cParams = &ms->cParams; | |
|
480 | U32 const mls = cParams->minMatch; | |
|
471 | U32 const mls = ms->cParams.minMatch; | |
|
481 | 472 | switch(mls) |
|
482 | 473 | { |
|
483 | 474 | default: /* includes case 3 */ |
@@ -810,7 +810,7 ZSTD_compressBlock_lazy_generic( | |||
|
810 | 810 | /* store sequence */ |
|
811 | 811 | _storeSequence: |
|
812 | 812 | { size_t const litLength = start - anchor; |
|
813 | ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); | |
|
813 | ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); | |
|
814 | 814 | anchor = ip = start + matchLength; |
|
815 | 815 | } |
|
816 | 816 | |
@@ -828,7 +828,7 ZSTD_compressBlock_lazy_generic( | |||
|
828 | 828 | const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; |
|
829 | 829 | matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; |
|
830 | 830 | offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ |
|
831 | ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); | |
|
831 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); | |
|
832 | 832 | ip += matchLength; |
|
833 | 833 | anchor = ip; |
|
834 | 834 | continue; |
@@ -843,7 +843,7 ZSTD_compressBlock_lazy_generic( | |||
|
843 | 843 | /* store sequence */ |
|
844 | 844 | matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; |
|
845 | 845 | offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ |
|
846 | ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); | |
|
846 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); | |
|
847 | 847 | ip += matchLength; |
|
848 | 848 | anchor = ip; |
|
849 | 849 | continue; /* faster when present ... (?) */ |
@@ -1051,7 +1051,7 size_t ZSTD_compressBlock_lazy_extDict_g | |||
|
1051 | 1051 | /* store sequence */ |
|
1052 | 1052 | _storeSequence: |
|
1053 | 1053 | { size_t const litLength = start - anchor; |
|
1054 | ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); | |
|
1054 | ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); | |
|
1055 | 1055 | anchor = ip = start + matchLength; |
|
1056 | 1056 | } |
|
1057 | 1057 | |
@@ -1066,7 +1066,7 size_t ZSTD_compressBlock_lazy_extDict_g | |||
|
1066 | 1066 | const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; |
|
1067 | 1067 | matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; |
|
1068 | 1068 | offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ |
|
1069 | ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); | |
|
1069 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); | |
|
1070 | 1070 | ip += matchLength; |
|
1071 | 1071 | anchor = ip; |
|
1072 | 1072 | continue; /* faster when present ... (?) */ |
@@ -49,9 +49,9 size_t ZSTD_ldm_getTableSize(ldmParams_t | |||
|
49 | 49 | { |
|
50 | 50 | size_t const ldmHSize = ((size_t)1) << params.hashLog; |
|
51 | 51 | size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); |
|
52 | size_t const ldmBucketSize = | |
|
53 | ((size_t)1) << (params.hashLog - ldmBucketSizeLog); | |
|
54 | size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); | |
|
52 | size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); | |
|
53 | size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) | |
|
54 | + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); | |
|
55 | 55 | return params.enableLdm ? totalSize : 0; |
|
56 | 56 | } |
|
57 | 57 | |
@@ -583,7 +583,7 size_t ZSTD_ldm_blockCompress(rawSeqStor | |||
|
583 | 583 | rep[i] = rep[i-1]; |
|
584 | 584 | rep[0] = sequence.offset; |
|
585 | 585 | /* Store the sequence */ |
|
586 | ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, | |
|
586 | ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, | |
|
587 | 587 | sequence.offset + ZSTD_REP_MOVE, |
|
588 | 588 | sequence.matchLength - MINMATCH); |
|
589 | 589 | ip += sequence.matchLength; |
@@ -1098,7 +1098,7 ZSTD_compressBlock_opt_generic(ZSTD_matc | |||
|
1098 | 1098 | |
|
1099 | 1099 | assert(anchor + llen <= iend); |
|
1100 | 1100 | ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); |
|
1101 | ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH); | |
|
1101 | ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); | |
|
1102 | 1102 | anchor += advance; |
|
1103 | 1103 | ip = anchor; |
|
1104 | 1104 | } } |
@@ -668,7 +668,7 static void ZSTDMT_compressionJob(void* | |||
|
668 | 668 | |
|
669 | 669 | /* init */ |
|
670 | 670 | if (job->cdict) { |
|
671 | size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize); | |
|
671 | size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize); | |
|
672 | 672 | assert(job->firstJob); /* only allowed for first job */ |
|
673 | 673 | if (ZSTD_isError(initError)) JOB_ERROR(initError); |
|
674 | 674 | } else { /* srcStart points at reloaded section */ |
@@ -680,7 +680,7 static void ZSTDMT_compressionJob(void* | |||
|
680 | 680 | job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ |
|
681 | 681 | ZSTD_dtlm_fast, |
|
682 | 682 | NULL, /*cdict*/ |
|
683 | jobParams, pledgedSrcSize); | |
|
683 | &jobParams, pledgedSrcSize); | |
|
684 | 684 | if (ZSTD_isError(initError)) JOB_ERROR(initError); |
|
685 | 685 | } } |
|
686 | 686 | |
@@ -927,12 +927,18 static void ZSTDMT_releaseAllJobResource | |||
|
927 | 927 | unsigned jobID; |
|
928 | 928 | DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); |
|
929 | 929 | for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { |
|
930 | /* Copy the mutex/cond out */ | |
|
931 | ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex; | |
|
932 | ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond; | |
|
933 | ||
|
930 | 934 | DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); |
|
931 | 935 | ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); |
|
932 | mtctx->jobs[jobID].dstBuff = g_nullBuffer; | |
|
933 | mtctx->jobs[jobID].cSize = 0; | |
|
936 | ||
|
937 | /* Clear the job description, but keep the mutex/cond */ | |
|
938 | memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); | |
|
939 | mtctx->jobs[jobID].job_mutex = mutex; | |
|
940 | mtctx->jobs[jobID].job_cond = cond; | |
|
934 | 941 | } |
|
935 | memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); | |
|
936 | 942 | mtctx->inBuff.buffer = g_nullBuffer; |
|
937 | 943 | mtctx->inBuff.filled = 0; |
|
938 | 944 | mtctx->allJobsCompleted = 1; |
@@ -1028,9 +1034,9 size_t ZSTDMT_getMTCtxParameter(ZSTDMT_C | |||
|
1028 | 1034 | |
|
1029 | 1035 | /* Sets parameters relevant to the compression job, |
|
1030 | 1036 | * initializing others to default values. */ |
|
1031 |
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params |
|
|
1037 | static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params) | |
|
1032 | 1038 | { |
|
1033 | ZSTD_CCtx_params jobParams = params; | |
|
1039 | ZSTD_CCtx_params jobParams = *params; | |
|
1034 | 1040 | /* Clear parameters related to multithreading */ |
|
1035 | 1041 | jobParams.forceWindow = 0; |
|
1036 | 1042 | jobParams.nbWorkers = 0; |
@@ -1151,16 +1157,16 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt | |||
|
1151 | 1157 | /* ===== Multi-threaded compression ===== */ |
|
1152 | 1158 | /* ------------------------------------------ */ |
|
1153 | 1159 | |
|
1154 |
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params |
|
|
1160 | static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) | |
|
1155 | 1161 | { |
|
1156 | 1162 | unsigned jobLog; |
|
1157 |
if (params |
|
|
1163 | if (params->ldmParams.enableLdm) { | |
|
1158 | 1164 | /* In Long Range Mode, the windowLog is typically oversized. |
|
1159 | 1165 | * In which case, it's preferable to determine the jobSize |
|
1160 | 1166 | * based on chainLog instead. */ |
|
1161 |
jobLog = MAX(21, params |
|
|
1167 | jobLog = MAX(21, params->cParams.chainLog + 4); | |
|
1162 | 1168 | } else { |
|
1163 |
jobLog = MAX(20, params |
|
|
1169 | jobLog = MAX(20, params->cParams.windowLog + 2); | |
|
1164 | 1170 | } |
|
1165 | 1171 | return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); |
|
1166 | 1172 | } |
@@ -1193,27 +1199,27 static int ZSTDMT_overlapLog(int ovlog, | |||
|
1193 | 1199 | return ovlog; |
|
1194 | 1200 | } |
|
1195 | 1201 | |
|
1196 |
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params |
|
|
1202 | static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) | |
|
1197 | 1203 | { |
|
1198 |
int const overlapRLog = 9 - ZSTDMT_overlapLog(params |
|
|
1199 |
int ovLog = (overlapRLog >= 8) ? 0 : (params |
|
|
1204 | int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy); | |
|
1205 | int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog); | |
|
1200 | 1206 | assert(0 <= overlapRLog && overlapRLog <= 8); |
|
1201 |
if (params |
|
|
1207 | if (params->ldmParams.enableLdm) { | |
|
1202 | 1208 | /* In Long Range Mode, the windowLog is typically oversized. |
|
1203 | 1209 | * In which case, it's preferable to determine the jobSize |
|
1204 | 1210 | * based on chainLog instead. |
|
1205 | 1211 | * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ |
|
1206 |
ovLog = MIN(params |
|
|
1212 | ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) | |
|
1207 | 1213 | - overlapRLog; |
|
1208 | 1214 | } |
|
1209 | 1215 | assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); |
|
1210 |
DEBUGLOG(4, "overlapLog : %i", params |
|
|
1216 | DEBUGLOG(4, "overlapLog : %i", params->overlapLog); | |
|
1211 | 1217 | DEBUGLOG(4, "overlap size : %i", 1 << ovLog); |
|
1212 | 1218 | return (ovLog==0) ? 0 : (size_t)1 << ovLog; |
|
1213 | 1219 | } |
|
1214 | 1220 | |
|
1215 | 1221 | static unsigned |
|
1216 | ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) | |
|
1222 | ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers) | |
|
1217 | 1223 | { |
|
1218 | 1224 | assert(nbWorkers>0); |
|
1219 | 1225 | { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); |
@@ -1236,9 +1242,9 static size_t ZSTDMT_compress_advanced_i | |||
|
1236 | 1242 | const ZSTD_CDict* cdict, |
|
1237 | 1243 | ZSTD_CCtx_params params) |
|
1238 | 1244 | { |
|
1239 | ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); | |
|
1240 | size_t const overlapSize = ZSTDMT_computeOverlapSize(params); | |
|
1241 | unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers); | |
|
1245 | ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms); | |
|
1246 | size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms); | |
|
1247 | unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers); | |
|
1242 | 1248 | size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; |
|
1243 | 1249 | size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ |
|
1244 | 1250 | const char* const srcStart = (const char*)src; |
@@ -1256,7 +1262,7 static size_t ZSTDMT_compress_advanced_i | |||
|
1256 | 1262 | ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; |
|
1257 | 1263 | DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); |
|
1258 | 1264 | if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); |
|
1259 | return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); | |
|
1265 | return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams); | |
|
1260 | 1266 | } |
|
1261 | 1267 | |
|
1262 | 1268 | assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ |
@@ -1404,12 +1410,12 size_t ZSTDMT_initCStream_internal( | |||
|
1404 | 1410 | |
|
1405 | 1411 | mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ |
|
1406 | 1412 | if (mtctx->singleBlockingThread) { |
|
1407 | ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); | |
|
1413 | ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms); | |
|
1408 | 1414 | DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); |
|
1409 | 1415 | assert(singleThreadParams.nbWorkers == 0); |
|
1410 | 1416 | return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], |
|
1411 | 1417 | dict, dictSize, cdict, |
|
1412 | singleThreadParams, pledgedSrcSize); | |
|
1418 | &singleThreadParams, pledgedSrcSize); | |
|
1413 | 1419 | } |
|
1414 | 1420 | |
|
1415 | 1421 | DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); |
@@ -1435,11 +1441,11 size_t ZSTDMT_initCStream_internal( | |||
|
1435 | 1441 | mtctx->cdict = cdict; |
|
1436 | 1442 | } |
|
1437 | 1443 | |
|
1438 | mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params); | |
|
1444 | mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms); | |
|
1439 | 1445 | DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); |
|
1440 | 1446 | mtctx->targetSectionSize = params.jobSize; |
|
1441 | 1447 | if (mtctx->targetSectionSize == 0) { |
|
1442 | mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); | |
|
1448 | mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms); | |
|
1443 | 1449 | } |
|
1444 | 1450 | assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); |
|
1445 | 1451 |
@@ -61,7 +61,9 | |||
|
61 | 61 | * Error Management |
|
62 | 62 | ****************************************************************/ |
|
63 | 63 | #define HUF_isError ERR_isError |
|
64 | #ifndef CHECK_F | |
|
64 | 65 | #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } |
|
66 | #endif | |
|
65 | 67 | |
|
66 | 68 | |
|
67 | 69 | /* ************************************************************** |
@@ -88,10 +88,7 size_t ZSTD_estimateDCtxSize(void) { ret | |||
|
88 | 88 | |
|
89 | 89 | static size_t ZSTD_startingInputLength(ZSTD_format_e format) |
|
90 | 90 | { |
|
91 |
size_t const startingInputLength = (format |
|
|
92 | ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE : | |
|
93 | ZSTD_FRAMEHEADERSIZE_PREFIX; | |
|
94 | ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); | |
|
91 | size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); | |
|
95 | 92 | /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ |
|
96 | 93 | assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); |
|
97 | 94 | return startingInputLength; |
@@ -376,7 +373,7 unsigned long long ZSTD_findDecompressed | |||
|
376 | 373 | { |
|
377 | 374 | unsigned long long totalDstSize = 0; |
|
378 | 375 | |
|
379 | while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { | |
|
376 | while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { | |
|
380 | 377 | U32 const magicNumber = MEM_readLE32(src); |
|
381 | 378 | |
|
382 | 379 | if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { |
@@ -629,11 +626,12 static size_t ZSTD_decompressFrame(ZSTD_ | |||
|
629 | 626 | |
|
630 | 627 | /* check */ |
|
631 | 628 | RETURN_ERROR_IF( |
|
632 | remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize, | |
|
629 | remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, | |
|
633 | 630 | srcSize_wrong); |
|
634 | 631 | |
|
635 | 632 | /* Frame Header */ |
|
636 |
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize( |
|
|
633 | { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( | |
|
634 | ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); | |
|
637 | 635 | if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; |
|
638 | 636 | RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, |
|
639 | 637 | srcSize_wrong); |
@@ -714,7 +712,7 static size_t ZSTD_decompressMultiFrame( | |||
|
714 | 712 | dictSize = ZSTD_DDict_dictSize(ddict); |
|
715 | 713 | } |
|
716 | 714 | |
|
717 | while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { | |
|
715 | while (srcSize >= ZSTD_startingInputLength(dctx->format)) { | |
|
718 | 716 | |
|
719 | 717 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) |
|
720 | 718 | if (ZSTD_isLegacy(src, srcSize)) { |
@@ -1098,7 +1096,7 ZSTD_loadDEntropy(ZSTD_entropyDTables_t* | |||
|
1098 | 1096 | size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); |
|
1099 | 1097 | for (i=0; i<3; i++) { |
|
1100 | 1098 | U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; |
|
1101 |
RETURN_ERROR_IF(rep==0 || rep > |
|
|
1099 | RETURN_ERROR_IF(rep==0 || rep > dictContentSize, | |
|
1102 | 1100 | dictionary_corrupted); |
|
1103 | 1101 | entropy->rep[i] = rep; |
|
1104 | 1102 | } } |
@@ -1267,7 +1265,7 size_t ZSTD_DCtx_loadDictionary_advanced | |||
|
1267 | 1265 | { |
|
1268 | 1266 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); |
|
1269 | 1267 | ZSTD_clearDict(dctx); |
|
1270 |
if (dict && dictSize |
|
|
1268 | if (dict && dictSize != 0) { | |
|
1271 | 1269 | dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); |
|
1272 | 1270 | RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); |
|
1273 | 1271 | dctx->ddict = dctx->ddictLocal; |
@@ -1300,14 +1298,14 size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dc | |||
|
1300 | 1298 | |
|
1301 | 1299 | |
|
1302 | 1300 | /* ZSTD_initDStream_usingDict() : |
|
1303 |
* return : expected size, aka ZSTD_ |
|
|
1301 | * return : expected size, aka ZSTD_startingInputLength(). | |
|
1304 | 1302 | * this function cannot fail */ |
|
1305 | 1303 | size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) |
|
1306 | 1304 | { |
|
1307 | 1305 | DEBUGLOG(4, "ZSTD_initDStream_usingDict"); |
|
1308 | 1306 | FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); |
|
1309 | 1307 | FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); |
|
1310 | return ZSTD_FRAMEHEADERSIZE_PREFIX; | |
|
1308 | return ZSTD_startingInputLength(zds->format); | |
|
1311 | 1309 | } |
|
1312 | 1310 | |
|
1313 | 1311 | /* note : this variant can't fail */ |
@@ -1324,16 +1322,16 size_t ZSTD_initDStream_usingDDict(ZSTD_ | |||
|
1324 | 1322 | { |
|
1325 | 1323 | FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); |
|
1326 | 1324 | FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); |
|
1327 | return ZSTD_FRAMEHEADERSIZE_PREFIX; | |
|
1325 | return ZSTD_startingInputLength(dctx->format); | |
|
1328 | 1326 | } |
|
1329 | 1327 | |
|
1330 | 1328 | /* ZSTD_resetDStream() : |
|
1331 |
* return : expected size, aka ZSTD_ |
|
|
1329 | * return : expected size, aka ZSTD_startingInputLength(). | |
|
1332 | 1330 | * this function cannot fail */ |
|
1333 | 1331 | size_t ZSTD_resetDStream(ZSTD_DStream* dctx) |
|
1334 | 1332 | { |
|
1335 | 1333 | FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); |
|
1336 | return ZSTD_FRAMEHEADERSIZE_PREFIX; | |
|
1334 | return ZSTD_startingInputLength(dctx->format); | |
|
1337 | 1335 | } |
|
1338 | 1336 | |
|
1339 | 1337 | |
@@ -1564,7 +1562,7 size_t ZSTD_decompressStream(ZSTD_DStrea | |||
|
1564 | 1562 | zds->lhSize += remainingInput; |
|
1565 | 1563 | } |
|
1566 | 1564 | input->pos = input->size; |
|
1567 | return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ | |
|
1565 | return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ | |
|
1568 | 1566 | } |
|
1569 | 1567 | assert(ip != NULL); |
|
1570 | 1568 | memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; |
@@ -573,38 +573,118 typedef struct { | |||
|
573 | 573 | size_t pos; |
|
574 | 574 | } seqState_t; |
|
575 | 575 | |
|
576 | /*! ZSTD_overlapCopy8() : | |
|
577 | * Copies 8 bytes from ip to op and updates op and ip where ip <= op. | |
|
578 | * If the offset is < 8 then the offset is spread to at least 8 bytes. | |
|
579 | * | |
|
580 | * Precondition: *ip <= *op | |
|
581 | * Postcondition: *op - *op >= 8 | |
|
582 | */ | |
|
583 | static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { | |
|
584 | assert(*ip <= *op); | |
|
585 | if (offset < 8) { | |
|
586 | /* close range match, overlap */ | |
|
587 | static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ | |
|
588 | static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ | |
|
589 | int const sub2 = dec64table[offset]; | |
|
590 | (*op)[0] = (*ip)[0]; | |
|
591 | (*op)[1] = (*ip)[1]; | |
|
592 | (*op)[2] = (*ip)[2]; | |
|
593 | (*op)[3] = (*ip)[3]; | |
|
594 | *ip += dec32table[offset]; | |
|
595 | ZSTD_copy4(*op+4, *ip); | |
|
596 | *ip -= sub2; | |
|
597 | } else { | |
|
598 | ZSTD_copy8(*op, *ip); | |
|
599 | } | |
|
600 | *ip += 8; | |
|
601 | *op += 8; | |
|
602 | assert(*op - *ip >= 8); | |
|
603 | } | |
|
576 | 604 | |
|
577 | /* ZSTD_execSequenceLast7(): | |
|
578 | * exceptional case : decompress a match starting within last 7 bytes of output buffer. | |
|
579 | * requires more careful checks, to ensure there is no overflow. | |
|
580 | * performance does not matter though. | |
|
581 | * note : this case is supposed to be never generated "naturally" by reference encoder, | |
|
582 | * since in most cases it needs at least 8 bytes to look for a match. | |
|
583 | * but it's allowed by the specification. */ | |
|
605 | /*! ZSTD_safecopy() : | |
|
606 | * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer | |
|
607 | * and write up to 16 bytes past oend_w (op >= oend_w is allowed). | |
|
608 | * This function is only called in the uncommon case where the sequence is near the end of the block. It | |
|
609 | * should be fast for a single long sequence, but can be slow for several short sequences. | |
|
610 | * | |
|
611 | * @param ovtype controls the overlap detection | |
|
612 | * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. | |
|
613 | * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. | |
|
614 | * The src buffer must be before the dst buffer. | |
|
615 | */ | |
|
616 | static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { | |
|
617 | ptrdiff_t const diff = op - ip; | |
|
618 | BYTE* const oend = op + length; | |
|
619 | ||
|
620 | assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || | |
|
621 | (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); | |
|
622 | ||
|
623 | if (length < 8) { | |
|
624 | /* Handle short lengths. */ | |
|
625 | while (op < oend) *op++ = *ip++; | |
|
626 | return; | |
|
627 | } | |
|
628 | if (ovtype == ZSTD_overlap_src_before_dst) { | |
|
629 | /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ | |
|
630 | assert(length >= 8); | |
|
631 | ZSTD_overlapCopy8(&op, &ip, diff); | |
|
632 | assert(op - ip >= 8); | |
|
633 | assert(op <= oend); | |
|
634 | } | |
|
635 | ||
|
636 | if (oend <= oend_w) { | |
|
637 | /* No risk of overwrite. */ | |
|
638 | ZSTD_wildcopy(op, ip, length, ovtype); | |
|
639 | return; | |
|
640 | } | |
|
641 | if (op <= oend_w) { | |
|
642 | /* Wildcopy until we get close to the end. */ | |
|
643 | assert(oend > oend_w); | |
|
644 | ZSTD_wildcopy(op, ip, oend_w - op, ovtype); | |
|
645 | ip += oend_w - op; | |
|
646 | op = oend_w; | |
|
647 | } | |
|
648 | /* Handle the leftovers. */ | |
|
649 | while (op < oend) *op++ = *ip++; | |
|
650 | } | |
|
651 | ||
|
652 | /* ZSTD_execSequenceEnd(): | |
|
653 | * This version handles cases that are near the end of the output buffer. It requires | |
|
654 | * more careful checks to make sure there is no overflow. By separating out these hard | |
|
655 | * and unlikely cases, we can speed up the common cases. | |
|
656 | * | |
|
657 | * NOTE: This function needs to be fast for a single long sequence, but doesn't need | |
|
658 | * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). | |
|
659 | */ | |
|
584 | 660 | FORCE_NOINLINE |
|
585 |
size_t ZSTD_execSequence |
|
|
586 |
|
|
|
587 |
|
|
|
588 |
|
|
|
661 | size_t ZSTD_execSequenceEnd(BYTE* op, | |
|
662 | BYTE* const oend, seq_t sequence, | |
|
663 | const BYTE** litPtr, const BYTE* const litLimit, | |
|
664 | const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) | |
|
589 | 665 | { |
|
590 | 666 | BYTE* const oLitEnd = op + sequence.litLength; |
|
591 | 667 | size_t const sequenceLength = sequence.litLength + sequence.matchLength; |
|
592 | 668 | BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ |
|
593 | 669 | const BYTE* const iLitEnd = *litPtr + sequence.litLength; |
|
594 | 670 | const BYTE* match = oLitEnd - sequence.offset; |
|
671 | BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; | |
|
595 | 672 | |
|
596 | /* check */ | |
|
597 | RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer"); | |
|
673 | /* bounds checks */ | |
|
674 | assert(oLitEnd < oMatchEnd); | |
|
675 | RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer"); | |
|
598 | 676 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); |
|
599 | 677 | |
|
600 | 678 | /* copy literals */ |
|
601 | while (op < oLitEnd) *op++ = *(*litPtr)++; | |
|
679 | ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); | |
|
680 | op = oLitEnd; | |
|
681 | *litPtr = iLitEnd; | |
|
602 | 682 | |
|
603 | 683 | /* copy Match */ |
|
604 |
if (sequence.offset > (size_t)(oLitEnd - |
|
|
684 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { | |
|
605 | 685 | /* offset beyond prefix */ |
|
606 |
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - v |
|
|
607 |
match = dictEnd - ( |
|
|
686 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); | |
|
687 | match = dictEnd - (prefixStart-match); | |
|
608 | 688 | if (match + sequence.matchLength <= dictEnd) { |
|
609 | 689 | memmove(oLitEnd, match, sequence.matchLength); |
|
610 | 690 | return sequenceLength; |
@@ -614,13 +694,12 size_t ZSTD_execSequenceLast7(BYTE* op, | |||
|
614 | 694 | memmove(oLitEnd, match, length1); |
|
615 | 695 | op = oLitEnd + length1; |
|
616 | 696 | sequence.matchLength -= length1; |
|
617 |
match = |
|
|
697 | match = prefixStart; | |
|
618 | 698 | } } |
|
619 | while (op < oMatchEnd) *op++ = *match++; | |
|
699 | ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); | |
|
620 | 700 | return sequenceLength; |
|
621 | 701 | } |
|
622 | 702 | |
|
623 | ||
|
624 | 703 | HINT_INLINE |
|
625 | 704 | size_t ZSTD_execSequence(BYTE* op, |
|
626 | 705 | BYTE* const oend, seq_t sequence, |
@@ -634,20 +713,29 size_t ZSTD_execSequence(BYTE* op, | |||
|
634 | 713 | const BYTE* const iLitEnd = *litPtr + sequence.litLength; |
|
635 | 714 | const BYTE* match = oLitEnd - sequence.offset; |
|
636 | 715 | |
|
637 | /* check */ | |
|
638 | RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); | |
|
639 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); | |
|
640 |
|
|
|
716 | /* Errors and uncommon cases handled here. */ | |
|
717 | assert(oLitEnd < oMatchEnd); | |
|
718 | if (iLitEnd > litLimit || oMatchEnd > oend_w) | |
|
719 | return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); | |
|
720 | ||
|
721 | /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ | |
|
722 | assert(iLitEnd <= litLimit /* Literal length is in bounds */); | |
|
723 | assert(oLitEnd <= oend_w /* Can wildcopy literals */); | |
|
724 | assert(oMatchEnd <= oend_w /* Can wildcopy matches */); | |
|
641 | 725 | |
|
642 |
/* |
|
|
643 | if (sequence.litLength > 8) | |
|
644 | ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
645 | else | |
|
646 | ZSTD_copy8(op, *litPtr); | |
|
726 | /* Copy Literals: | |
|
727 | * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. | |
|
728 | * We likely don't need the full 32-byte wildcopy. | |
|
729 | */ | |
|
730 | assert(WILDCOPY_OVERLENGTH >= 16); | |
|
731 | ZSTD_copy16(op, (*litPtr)); | |
|
732 | if (sequence.litLength > 16) { | |
|
733 | ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); | |
|
734 | } | |
|
647 | 735 | op = oLitEnd; |
|
648 | 736 | *litPtr = iLitEnd; /* update for next sequence */ |
|
649 | 737 | |
|
650 |
/* |
|
|
738 | /* Copy Match */ | |
|
651 | 739 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { |
|
652 | 740 | /* offset beyond prefix -> go into extDict */ |
|
653 | 741 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); |
@@ -662,123 +750,33 size_t ZSTD_execSequence(BYTE* op, | |||
|
662 | 750 | op = oLitEnd + length1; |
|
663 | 751 | sequence.matchLength -= length1; |
|
664 | 752 | match = prefixStart; |
|
665 | if (op > oend_w || sequence.matchLength < MINMATCH) { | |
|
666 | U32 i; | |
|
667 | for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; | |
|
668 | return sequenceLength; | |
|
669 | } | |
|
670 | 753 | } } |
|
671 | /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ | |
|
672 | ||
|
673 | /* match within prefix */ | |
|
674 | if (sequence.offset < 8) { | |
|
675 | /* close range match, overlap */ | |
|
676 | static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ | |
|
677 | static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ | |
|
678 | int const sub2 = dec64table[sequence.offset]; | |
|
679 | op[0] = match[0]; | |
|
680 | op[1] = match[1]; | |
|
681 | op[2] = match[2]; | |
|
682 | op[3] = match[3]; | |
|
683 | match += dec32table[sequence.offset]; | |
|
684 | ZSTD_copy4(op+4, match); | |
|
685 | match -= sub2; | |
|
686 | } else { | |
|
687 | ZSTD_copy8(op, match); | |
|
688 | } | |
|
689 | op += 8; match += 8; | |
|
690 | ||
|
691 | if (oMatchEnd > oend-(16-MINMATCH)) { | |
|
692 | if (op < oend_w) { | |
|
693 | ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); | |
|
694 | match += oend_w - op; | |
|
695 | op = oend_w; | |
|
696 | } | |
|
697 | while (op < oMatchEnd) *op++ = *match++; | |
|
698 | } else { | |
|
699 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ | |
|
700 | } | |
|
701 | return sequenceLength; | |
|
702 | } | |
|
703 | ||
|
704 | ||
|
705 | HINT_INLINE | |
|
706 | size_t ZSTD_execSequenceLong(BYTE* op, | |
|
707 | BYTE* const oend, seq_t sequence, | |
|
708 | const BYTE** litPtr, const BYTE* const litLimit, | |
|
709 | const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd) | |
|
710 | { | |
|
711 | BYTE* const oLitEnd = op + sequence.litLength; | |
|
712 | size_t const sequenceLength = sequence.litLength + sequence.matchLength; | |
|
713 | BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ | |
|
714 | BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; | |
|
715 | const BYTE* const iLitEnd = *litPtr + sequence.litLength; | |
|
716 | const BYTE* match = sequence.match; | |
|
717 | ||
|
718 | /* check */ | |
|
719 | RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); | |
|
720 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); | |
|
721 | if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); | |
|
754 | /* Match within prefix of 1 or more bytes */ | |
|
755 | assert(op <= oMatchEnd); | |
|
756 | assert(oMatchEnd <= oend_w); | |
|
757 | assert(match >= prefixStart); | |
|
758 | assert(sequence.matchLength >= 1); | |
|
722 | 759 | |
|
723 | /* copy Literals */ | |
|
724 | if (sequence.litLength > 8) | |
|
725 | ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
726 | else | |
|
727 | ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ | |
|
728 | ||
|
729 | op = oLitEnd; | |
|
730 | *litPtr = iLitEnd; /* update for next sequence */ | |
|
760 | /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy | |
|
761 | * without overlap checking. | |
|
762 | */ | |
|
763 | if (sequence.offset >= WILDCOPY_VECLEN) { | |
|
764 | /* We bet on a full wildcopy for matches, since we expect matches to be | |
|
765 | * longer than literals (in general). In silesia, ~10% of matches are longer | |
|
766 | * than 16 bytes. | |
|
767 | */ | |
|
768 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); | |
|
769 | return sequenceLength; | |
|
770 | } | |
|
771 | assert(sequence.offset < WILDCOPY_VECLEN); | |
|
731 | 772 | |
|
732 | /* copy Match */ | |
|
733 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { | |
|
734 | /* offset beyond prefix */ | |
|
735 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected); | |
|
736 | if (match + sequence.matchLength <= dictEnd) { | |
|
737 | memmove(oLitEnd, match, sequence.matchLength); | |
|
738 | return sequenceLength; | |
|
739 | } | |
|
740 | /* span extDict & currentPrefixSegment */ | |
|
741 | { size_t const length1 = dictEnd - match; | |
|
742 | memmove(oLitEnd, match, length1); | |
|
743 | op = oLitEnd + length1; | |
|
744 | sequence.matchLength -= length1; | |
|
745 | match = prefixStart; | |
|
746 | if (op > oend_w || sequence.matchLength < MINMATCH) { | |
|
747 | U32 i; | |
|
748 | for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; | |
|
749 | return sequenceLength; | |
|
750 | } | |
|
751 | } } | |
|
752 | assert(op <= oend_w); | |
|
753 | assert(sequence.matchLength >= MINMATCH); | |
|
773 | /* Copy 8 bytes and spread the offset to be >= 8. */ | |
|
774 | ZSTD_overlapCopy8(&op, &match, sequence.offset); | |
|
754 | 775 | |
|
755 | /* match within prefix */ | |
|
756 |
if (sequence. |
|
|
757 | /* close range match, overlap */ | |
|
758 | static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ | |
|
759 | static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ | |
|
760 | int const sub2 = dec64table[sequence.offset]; | |
|
761 | op[0] = match[0]; | |
|
762 | op[1] = match[1]; | |
|
763 | op[2] = match[2]; | |
|
764 | op[3] = match[3]; | |
|
765 | match += dec32table[sequence.offset]; | |
|
766 | ZSTD_copy4(op+4, match); | |
|
767 | match -= sub2; | |
|
768 | } else { | |
|
769 | ZSTD_copy8(op, match); | |
|
770 | } | |
|
771 | op += 8; match += 8; | |
|
772 | ||
|
773 | if (oMatchEnd > oend-(16-MINMATCH)) { | |
|
774 | if (op < oend_w) { | |
|
775 | ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); | |
|
776 | match += oend_w - op; | |
|
777 | op = oend_w; | |
|
778 | } | |
|
779 | while (op < oMatchEnd) *op++ = *match++; | |
|
780 | } else { | |
|
781 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ | |
|
776 | /* If the match length is > 8 bytes, then continue with the wildcopy. */ | |
|
777 | if (sequence.matchLength > 8) { | |
|
778 | assert(op < oMatchEnd); | |
|
779 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); | |
|
782 | 780 | } |
|
783 | 781 | return sequenceLength; |
|
784 | 782 | } |
@@ -1098,7 +1096,7 ZSTD_decompressSequencesLong_body( | |||
|
1098 | 1096 | /* decode and decompress */ |
|
1099 | 1097 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) { |
|
1100 | 1098 | seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset); |
|
1101 |
size_t const oneSeqSize = ZSTD_execSequence |
|
|
1099 | size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd); | |
|
1102 | 1100 | if (ZSTD_isError(oneSeqSize)) return oneSeqSize; |
|
1103 | 1101 | PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ |
|
1104 | 1102 | sequences[seqNb & STORED_SEQS_MASK] = sequence; |
@@ -1109,7 +1107,7 ZSTD_decompressSequencesLong_body( | |||
|
1109 | 1107 | /* finish queue */ |
|
1110 | 1108 | seqNb -= seqAdvance; |
|
1111 | 1109 | for ( ; seqNb<nbSeq ; seqNb++) { |
|
1112 |
size_t const oneSeqSize = ZSTD_execSequence |
|
|
1110 | size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd); | |
|
1113 | 1111 | if (ZSTD_isError(oneSeqSize)) return oneSeqSize; |
|
1114 | 1112 | op += oneSeqSize; |
|
1115 | 1113 | } |
@@ -36,16 +36,17 extern "C" { | |||
|
36 | 36 | *****************************************************************/ |
|
37 | 37 | /* Deprecation warnings */ |
|
38 | 38 | /* Should these warnings be a problem, |
|
39 |
|
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
39 | * it is generally possible to disable them, | |
|
40 | * typically with -Wno-deprecated-declarations for gcc | |
|
41 | * or _CRT_SECURE_NO_WARNINGS in Visual. | |
|
42 | * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS | |
|
43 | */ | |
|
43 | 44 | #ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS |
|
44 | 45 | # define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ |
|
45 | 46 | #else |
|
46 | 47 | # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ |
|
47 | 48 | # define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API |
|
48 |
# elif (defined( |
|
|
49 | # elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) | |
|
49 | 50 | # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) |
|
50 | 51 | # elif defined(__GNUC__) && (__GNUC__ >= 3) |
|
51 | 52 | # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) |
@@ -638,8 +638,8 void COVER_warnOnSmallCorpus(size_t maxD | |||
|
638 | 638 | "compared to the source size %u! " |
|
639 | 639 | "size(source)/size(dictionary) = %f, but it should be >= " |
|
640 | 640 | "10! This may lead to a subpar dictionary! We recommend " |
|
641 |
"training on sources at least 10x, and |
|
|
642 | "size of the dictionary!\n", (U32)maxDictSize, | |
|
641 | "training on sources at least 10x, and preferably 100x " | |
|
642 | "the size of the dictionary! \n", (U32)maxDictSize, | |
|
643 | 643 | (U32)nbDmers, ratio); |
|
644 | 644 | } |
|
645 | 645 |
@@ -571,7 +571,7 static void ZDICT_fillNoise(void* buffer | |||
|
571 | 571 | unsigned const prime1 = 2654435761U; |
|
572 | 572 | unsigned const prime2 = 2246822519U; |
|
573 | 573 | unsigned acc = prime1; |
|
574 |
size_t p=0; |
|
|
574 | size_t p=0; | |
|
575 | 575 | for (p=0; p<length; p++) { |
|
576 | 576 | acc *= prime2; |
|
577 | 577 | ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21); |
@@ -15,6 +15,7 extern "C" { | |||
|
15 | 15 | #define ZSTD_H_235446 |
|
16 | 16 | |
|
17 | 17 | /* ====== Dependency ======*/ |
|
18 | #include <limits.h> /* INT_MAX */ | |
|
18 | 19 | #include <stddef.h> /* size_t */ |
|
19 | 20 | |
|
20 | 21 | |
@@ -71,7 +72,7 extern "C" { | |||
|
71 | 72 | /*------ Version ------*/ |
|
72 | 73 | #define ZSTD_VERSION_MAJOR 1 |
|
73 | 74 | #define ZSTD_VERSION_MINOR 4 |
|
74 |
#define ZSTD_VERSION_RELEASE |
|
|
75 | #define ZSTD_VERSION_RELEASE 4 | |
|
75 | 76 | |
|
76 | 77 | #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) |
|
77 | 78 | ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ |
@@ -196,9 +197,13 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(v | |||
|
196 | 197 | ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); |
|
197 | 198 | |
|
198 | 199 | /*! ZSTD_compressCCtx() : |
|
199 | * Same as ZSTD_compress(), using an explicit ZSTD_CCtx | |
|
200 | * The function will compress at requested compression level, | |
|
201 | * ignoring any other parameter */ | |
|
200 | * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. | |
|
201 | * Important : in order to behave similarly to `ZSTD_compress()`, | |
|
202 | * this function compresses at requested compression level, | |
|
203 | * __ignoring any other parameter__ . | |
|
204 | * If any advanced parameter was set using the advanced API, | |
|
205 | * they will all be reset. Only `compressionLevel` remains. | |
|
206 | */ | |
|
202 | 207 | ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, |
|
203 | 208 | void* dst, size_t dstCapacity, |
|
204 | 209 | const void* src, size_t srcSize, |
@@ -233,7 +238,7 ZSTDLIB_API size_t ZSTD_decompressDCtx(Z | |||
|
233 | 238 | * using ZSTD_CCtx_set*() functions. |
|
234 | 239 | * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. |
|
235 | 240 | * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! |
|
236 | * They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx() | |
|
241 | * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . | |
|
237 | 242 | * |
|
238 | 243 | * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). |
|
239 | 244 | * |
@@ -261,18 +266,26 typedef enum { | |||
|
261 | 266 | |
|
262 | 267 | /* compression parameters |
|
263 | 268 | * Note: When compressing with a ZSTD_CDict these parameters are superseded |
|
264 |
* by the parameters used to construct the ZSTD_CDict. |
|
|
265 | * for more info (superseded-by-cdict). */ | |
|
266 |
ZSTD_c_compressionLevel=100, /* |
|
|
269 | * by the parameters used to construct the ZSTD_CDict. | |
|
270 | * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ | |
|
271 | ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. | |
|
272 | * Note that exact compression parameters are dynamically determined, | |
|
273 | * depending on both compression level and srcSize (when known). | |
|
267 | 274 | * Default level is ZSTD_CLEVEL_DEFAULT==3. |
|
268 | 275 | * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. |
|
269 | 276 | * Note 1 : it's possible to pass a negative compression level. |
|
270 |
* Note 2 : setting a level sets all |
|
|
277 | * Note 2 : setting a level resets all other compression parameters to default */ | |
|
278 | /* Advanced compression parameters : | |
|
279 | * It's possible to pin down compression parameters to some specific values. | |
|
280 | * In which case, these values are no longer dynamically selected by the compressor */ | |
|
271 | 281 | ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. |
|
282 | * This will set a memory budget for streaming decompression, | |
|
283 | * with larger values requiring more memory | |
|
284 | * and typically compressing more. | |
|
272 | 285 | * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. |
|
273 | 286 | * Special: value 0 means "use default windowLog". |
|
274 | 287 | * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT |
|
275 |
* requires explicitly allowing such |
|
|
288 | * requires explicitly allowing such size at streaming decompression stage. */ | |
|
276 | 289 | ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. |
|
277 | 290 | * Resulting memory usage is (1 << (hashLog+2)). |
|
278 | 291 | * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. |
@@ -283,13 +296,13 typedef enum { | |||
|
283 | 296 | * Resulting memory usage is (1 << (chainLog+2)). |
|
284 | 297 | * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. |
|
285 | 298 | * Larger tables result in better and slower compression. |
|
286 |
* This parameter is useless |
|
|
299 | * This parameter is useless for "fast" strategy. | |
|
287 | 300 | * It's still useful when using "dfast" strategy, |
|
288 | 301 | * in which case it defines a secondary probe table. |
|
289 | 302 | * Special: value 0 means "use default chainLog". */ |
|
290 | 303 | ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. |
|
291 | 304 | * More attempts result in better and slower compression. |
|
292 |
* This parameter is useless |
|
|
305 | * This parameter is useless for "fast" and "dFast" strategies. | |
|
293 | 306 | * Special: value 0 means "use default searchLog". */ |
|
294 | 307 | ZSTD_c_minMatch=105, /* Minimum size of searched matches. |
|
295 | 308 | * Note that Zstandard can still find matches of smaller size, |
@@ -344,7 +357,7 typedef enum { | |||
|
344 | 357 | ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) |
|
345 | 358 | * Content size must be known at the beginning of compression. |
|
346 | 359 | * This is automatically the case when using ZSTD_compress2(), |
|
347 |
* For streaming |
|
|
360 | * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ | |
|
348 | 361 | ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ |
|
349 | 362 | ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ |
|
350 | 363 | |
@@ -363,7 +376,7 typedef enum { | |||
|
363 | 376 | * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. |
|
364 | 377 | * 0 means default, which is dynamically determined based on compression parameters. |
|
365 | 378 | * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. |
|
366 | * The minimum size is automatically and transparently enforced */ | |
|
379 | * The minimum size is automatically and transparently enforced. */ | |
|
367 | 380 | ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. |
|
368 | 381 | * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. |
|
369 | 382 | * It helps preserve compression ratio, while each job is compressed in parallel. |
@@ -386,6 +399,7 typedef enum { | |||
|
386 | 399 | * ZSTD_c_forceAttachDict |
|
387 | 400 | * ZSTD_c_literalCompressionMode |
|
388 | 401 | * ZSTD_c_targetCBlockSize |
|
402 | * ZSTD_c_srcSizeHint | |
|
389 | 403 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. |
|
390 | 404 | * note : never ever use experimentalParam? names directly; |
|
391 | 405 | * also, the enums values themselves are unstable and can still change. |
@@ -396,6 +410,7 typedef enum { | |||
|
396 | 410 | ZSTD_c_experimentalParam4=1001, |
|
397 | 411 | ZSTD_c_experimentalParam5=1002, |
|
398 | 412 | ZSTD_c_experimentalParam6=1003, |
|
413 | ZSTD_c_experimentalParam7=1004 | |
|
399 | 414 | } ZSTD_cParameter; |
|
400 | 415 | |
|
401 | 416 | typedef struct { |
@@ -793,12 +808,17 ZSTDLIB_API size_t ZSTD_decompress_using | |||
|
793 | 808 | typedef struct ZSTD_CDict_s ZSTD_CDict; |
|
794 | 809 | |
|
795 | 810 | /*! ZSTD_createCDict() : |
|
796 |
* When compressing multiple messages |
|
|
797 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. | |
|
811 | * When compressing multiple messages or blocks using the same dictionary, | |
|
812 | * it's recommended to digest the dictionary only once, since it's a costly operation. | |
|
813 | * ZSTD_createCDict() will create a state from digesting a dictionary. | |
|
814 | * The resulting state can be used for future compression operations with very limited startup cost. | |
|
798 | 815 | * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. |
|
799 |
* |
|
|
800 |
* Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate |
|
|
801 |
* Note : A ZSTD_CDict can be created from an empty dictBuffer, |
|
|
816 | * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. | |
|
817 | * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. | |
|
818 | * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, | |
|
819 | * in which case the only thing that it transports is the @compressionLevel. | |
|
820 | * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, | |
|
821 | * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ | |
|
802 | 822 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, |
|
803 | 823 | int compressionLevel); |
|
804 | 824 | |
@@ -925,7 +945,7 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZS | |||
|
925 | 945 | * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. |
|
926 | 946 | * It's a CPU consuming operation, with non-negligible impact on latency. |
|
927 | 947 | * If there is a need to use the same prefix multiple times, consider loadDictionary instead. |
|
928 |
* Note 4 : By default, the prefix is interpreted as raw content (ZSTD_d |
|
|
948 | * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). | |
|
929 | 949 | * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ |
|
930 | 950 | ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, |
|
931 | 951 | const void* prefix, size_t prefixSize); |
@@ -969,7 +989,7 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZS | |||
|
969 | 989 | * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. |
|
970 | 990 | * Prefix buffer must remain unmodified up to the end of frame, |
|
971 | 991 | * reached when ZSTD_decompressStream() returns 0. |
|
972 |
* Note 3 : By default, the prefix is treated as raw content (ZSTD_d |
|
|
992 | * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). | |
|
973 | 993 | * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) |
|
974 | 994 | * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. |
|
975 | 995 | * A full dictionary is more costly, as it requires building tables. |
@@ -1014,8 +1034,8 ZSTDLIB_API size_t ZSTD_sizeof_DDict(con | |||
|
1014 | 1034 | * Some of them might be removed in the future (especially when redundant with existing stable functions) |
|
1015 | 1035 | * ***************************************************************************************/ |
|
1016 | 1036 | |
|
1017 | #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */ | |
|
1018 | #define ZSTD_FRAMEHEADERSIZE_MIN 6 | |
|
1037 | #define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ | |
|
1038 | #define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) | |
|
1019 | 1039 | #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ |
|
1020 | 1040 | #define ZSTD_SKIPPABLEHEADERSIZE 8 |
|
1021 | 1041 | |
@@ -1063,6 +1083,8 ZSTDLIB_API size_t ZSTD_sizeof_DDict(con | |||
|
1063 | 1083 | /* Advanced parameter bounds */ |
|
1064 | 1084 | #define ZSTD_TARGETCBLOCKSIZE_MIN 64 |
|
1065 | 1085 | #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX |
|
1086 | #define ZSTD_SRCSIZEHINT_MIN 0 | |
|
1087 | #define ZSTD_SRCSIZEHINT_MAX INT_MAX | |
|
1066 | 1088 | |
|
1067 | 1089 | /* internal */ |
|
1068 | 1090 | #define ZSTD_HASHLOG3_MAX 17 |
@@ -1073,6 +1095,24 ZSTDLIB_API size_t ZSTD_sizeof_DDict(con | |||
|
1073 | 1095 | typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; |
|
1074 | 1096 | |
|
1075 | 1097 | typedef struct { |
|
1098 | unsigned int matchPos; /* Match pos in dst */ | |
|
1099 | /* If seqDef.offset > 3, then this is seqDef.offset - 3 | |
|
1100 | * If seqDef.offset < 3, then this is the corresponding repeat offset | |
|
1101 | * But if seqDef.offset < 3 and litLength == 0, this is the | |
|
1102 | * repeat offset before the corresponding repeat offset | |
|
1103 | * And if seqDef.offset == 3 and litLength == 0, this is the | |
|
1104 | * most recent repeat offset - 1 | |
|
1105 | */ | |
|
1106 | unsigned int offset; | |
|
1107 | unsigned int litLength; /* Literal length */ | |
|
1108 | unsigned int matchLength; /* Match length */ | |
|
1109 | /* 0 when seq not rep and seqDef.offset otherwise | |
|
1110 | * when litLength == 0 this will be <= 4, otherwise <= 3 like normal | |
|
1111 | */ | |
|
1112 | unsigned int rep; | |
|
1113 | } ZSTD_Sequence; | |
|
1114 | ||
|
1115 | typedef struct { | |
|
1076 | 1116 | unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ |
|
1077 | 1117 | unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ |
|
1078 | 1118 | unsigned hashLog; /**< dispatch table : larger == faster, more memory */ |
@@ -1101,21 +1141,12 typedef enum { | |||
|
1101 | 1141 | |
|
1102 | 1142 | typedef enum { |
|
1103 | 1143 | ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ |
|
1104 |
ZSTD_dlm_byRef = 1 |
|
|
1144 | ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ | |
|
1105 | 1145 | } ZSTD_dictLoadMethod_e; |
|
1106 | 1146 | |
|
1107 | 1147 | typedef enum { |
|
1108 | /* Opened question : should we have a format ZSTD_f_auto ? | |
|
1109 | * Today, it would mean exactly the same as ZSTD_f_zstd1. | |
|
1110 | * But, in the future, should several formats become supported, | |
|
1111 | * on the compression side, it would mean "default format". | |
|
1112 | * On the decompression side, it would mean "automatic format detection", | |
|
1113 | * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames". | |
|
1114 | * Since meaning is a little different, another option could be to define different enums for compression and decompression. | |
|
1115 | * This question could be kept for later, when there are actually multiple formats to support, | |
|
1116 | * but there is also the question of pinning enum values, and pinning value `0` is especially important */ | |
|
1117 | 1148 | ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ |
|
1118 |
ZSTD_f_zstd1_magicless = 1 |
|
|
1149 | ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. | |
|
1119 | 1150 | * Useful to save 4 bytes per generated frame. |
|
1120 | 1151 | * Decoder cannot recognise automatically this format, requiring this instruction. */ |
|
1121 | 1152 | } ZSTD_format_e; |
@@ -1126,7 +1157,7 typedef enum { | |||
|
1126 | 1157 | * to evolve and should be considered only in the context of extremely |
|
1127 | 1158 | * advanced performance tuning. |
|
1128 | 1159 | * |
|
1129 |
* Zstd currently supports the use of a CDict in t |
|
|
1160 | * Zstd currently supports the use of a CDict in three ways: | |
|
1130 | 1161 | * |
|
1131 | 1162 | * - The contents of the CDict can be copied into the working context. This |
|
1132 | 1163 | * means that the compression can search both the dictionary and input |
@@ -1142,6 +1173,12 typedef enum { | |||
|
1142 | 1173 | * working context's tables can be reused). For small inputs, this can be |
|
1143 | 1174 | * faster than copying the CDict's tables. |
|
1144 | 1175 | * |
|
1176 | * - The CDict's tables are not used at all, and instead we use the working | |
|
1177 | * context alone to reload the dictionary and use params based on the source | |
|
1178 | * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). | |
|
1179 | * This method is effective when the dictionary sizes are very small relative | |
|
1180 | * to the input size, and the input size is fairly large to begin with. | |
|
1181 | * | |
|
1145 | 1182 | * Zstd has a simple internal heuristic that selects which strategy to use |
|
1146 | 1183 | * at the beginning of a compression. However, if experimentation shows that |
|
1147 | 1184 | * Zstd is making poor choices, it is possible to override that choice with |
@@ -1150,6 +1187,7 typedef enum { | |||
|
1150 | 1187 | ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ |
|
1151 | 1188 | ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ |
|
1152 | 1189 | ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ |
|
1190 | ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ | |
|
1153 | 1191 | } ZSTD_dictAttachPref_e; |
|
1154 | 1192 | |
|
1155 | 1193 | typedef enum { |
@@ -1158,7 +1196,7 typedef enum { | |||
|
1158 | 1196 | * levels will be compressed. */ |
|
1159 | 1197 | ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be |
|
1160 | 1198 | * emitted if Huffman compression is not profitable. */ |
|
1161 |
ZSTD_lcm_uncompressed = 2 |
|
|
1199 | ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ | |
|
1162 | 1200 | } ZSTD_literalCompressionMode_e; |
|
1163 | 1201 | |
|
1164 | 1202 | |
@@ -1210,20 +1248,38 ZSTDLIB_API unsigned long long ZSTD_deco | |||
|
1210 | 1248 | * or an error code (if srcSize is too small) */ |
|
1211 | 1249 | ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); |
|
1212 | 1250 | |
|
1251 | /*! ZSTD_getSequences() : | |
|
1252 | * Extract sequences from the sequence store | |
|
1253 | * zc can be used to insert custom compression params. | |
|
1254 | * This function invokes ZSTD_compress2 | |
|
1255 | * @return : number of sequences extracted | |
|
1256 | */ | |
|
1257 | ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, | |
|
1258 | size_t outSeqsSize, const void* src, size_t srcSize); | |
|
1259 | ||
|
1213 | 1260 | |
|
1214 | 1261 | /*************************************** |
|
1215 | 1262 | * Memory management |
|
1216 | 1263 | ***************************************/ |
|
1217 | 1264 | |
|
1218 | 1265 | /*! ZSTD_estimate*() : |
|
1219 | * These functions make it possible to estimate memory usage | |
|
1220 |
* |
|
|
1221 | * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. | |
|
1222 | * It will also consider src size to be arbitrarily "large", which is worst case. | |
|
1223 | * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. | |
|
1224 | * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. | |
|
1225 | * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. | |
|
1226 | * Note : CCtx size estimation is only correct for single-threaded compression. */ | |
|
1266 | * These functions make it possible to estimate memory usage of a future | |
|
1267 | * {D,C}Ctx, before its creation. | |
|
1268 | * | |
|
1269 | * ZSTD_estimateCCtxSize() will provide a budget large enough for any | |
|
1270 | * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), | |
|
1271 | * this estimate does not include space for a window buffer, so this estimate | |
|
1272 | * is guaranteed to be enough for single-shot compressions, but not streaming | |
|
1273 | * compressions. It will however assume the input may be arbitrarily large, | |
|
1274 | * which is the worst case. If srcSize is known to always be small, | |
|
1275 | * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. | |
|
1276 | * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with | |
|
1277 | * ZSTD_getCParams() to create cParams from compressionLevel. | |
|
1278 | * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with | |
|
1279 | * ZSTD_CCtxParams_setParameter(). | |
|
1280 | * | |
|
1281 | * Note: only single-threaded compression is supported. This function will | |
|
1282 | * return an error code if ZSTD_c_nbWorkers is >= 1. */ | |
|
1227 | 1283 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); |
|
1228 | 1284 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); |
|
1229 | 1285 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); |
@@ -1334,7 +1390,8 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict | |||
|
1334 | 1390 | * Create a digested dictionary for compression |
|
1335 | 1391 | * Dictionary content is just referenced, not duplicated. |
|
1336 | 1392 | * As a consequence, `dictBuffer` **must** outlive CDict, |
|
1337 |
* and its content must remain unmodified throughout the lifetime of CDict. |
|
|
1393 | * and its content must remain unmodified throughout the lifetime of CDict. | |
|
1394 | * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ | |
|
1338 | 1395 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); |
|
1339 | 1396 | |
|
1340 | 1397 | /*! ZSTD_getCParams() : |
@@ -1361,7 +1418,9 ZSTDLIB_API size_t ZSTD_checkCParams(ZST | |||
|
1361 | 1418 | ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); |
|
1362 | 1419 | |
|
1363 | 1420 | /*! ZSTD_compress_advanced() : |
|
1364 | * Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */ | |
|
1421 | * Note : this function is now DEPRECATED. | |
|
1422 | * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. | |
|
1423 | * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ | |
|
1365 | 1424 | ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, |
|
1366 | 1425 | void* dst, size_t dstCapacity, |
|
1367 | 1426 | const void* src, size_t srcSize, |
@@ -1369,7 +1428,9 ZSTDLIB_API size_t ZSTD_compress_advance | |||
|
1369 | 1428 | ZSTD_parameters params); |
|
1370 | 1429 | |
|
1371 | 1430 | /*! ZSTD_compress_usingCDict_advanced() : |
|
1372 | * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */ | |
|
1431 | * Note : this function is now REDUNDANT. | |
|
1432 | * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. | |
|
1433 | * This prototype will be marked as deprecated and generate compilation warning in some future version */ | |
|
1373 | 1434 | ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, |
|
1374 | 1435 | void* dst, size_t dstCapacity, |
|
1375 | 1436 | const void* src, size_t srcSize, |
@@ -1441,6 +1502,12 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_a | |||
|
1441 | 1502 | * There is no guarantee on compressed block size (default:0) */ |
|
1442 | 1503 | #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 |
|
1443 | 1504 | |
|
1505 | /* User's best guess of source size. | |
|
1506 | * Hint is not valid when srcSizeHint == 0. | |
|
1507 | * There is no guarantee that hint is close to actual source size, | |
|
1508 | * but compression ratio may regress significantly if guess considerably underestimates */ | |
|
1509 | #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 | |
|
1510 | ||
|
1444 | 1511 | /*! ZSTD_CCtx_getParameter() : |
|
1445 | 1512 | * Get the requested compression parameter value, selected by enum ZSTD_cParameter, |
|
1446 | 1513 | * and store it into int* value. |
@@ -1613,8 +1680,13 ZSTDLIB_API size_t ZSTD_decompressStream | |||
|
1613 | 1680 | * pledgedSrcSize must be correct. If it is not known at init time, use |
|
1614 | 1681 | * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, |
|
1615 | 1682 | * "0" also disables frame content size field. It may be enabled in the future. |
|
1683 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1616 | 1684 | */ |
|
1617 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); | |
|
1685 | ZSTDLIB_API size_t | |
|
1686 | ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, | |
|
1687 | int compressionLevel, | |
|
1688 | unsigned long long pledgedSrcSize); | |
|
1689 | ||
|
1618 | 1690 | /**! ZSTD_initCStream_usingDict() : |
|
1619 | 1691 | * This function is deprecated, and is equivalent to: |
|
1620 | 1692 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); |
@@ -1623,42 +1695,66 ZSTDLIB_API size_t ZSTD_initCStream_srcS | |||
|
1623 | 1695 | * |
|
1624 | 1696 | * Creates of an internal CDict (incompatible with static CCtx), except if |
|
1625 | 1697 | * dict == NULL or dictSize < 8, in which case no dict is used. |
|
1626 |
* Note: dict is loaded with ZSTD_d |
|
|
1698 | * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if | |
|
1627 | 1699 | * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. |
|
1700 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1628 | 1701 | */ |
|
1629 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); | |
|
1702 | ZSTDLIB_API size_t | |
|
1703 | ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, | |
|
1704 | const void* dict, size_t dictSize, | |
|
1705 | int compressionLevel); | |
|
1706 | ||
|
1630 | 1707 | /**! ZSTD_initCStream_advanced() : |
|
1631 | 1708 | * This function is deprecated, and is approximately equivalent to: |
|
1632 | 1709 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); |
|
1633 |
* |
|
|
1710 | * // Pseudocode: Set each zstd parameter and leave the rest as-is. | |
|
1711 | * for ((param, value) : params) { | |
|
1712 | * ZSTD_CCtx_setParameter(zcs, param, value); | |
|
1713 | * } | |
|
1634 | 1714 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); |
|
1635 | 1715 | * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); |
|
1636 | 1716 | * |
|
1637 | * pledgedSrcSize must be correct. If srcSize is not known at init time, use | |
|
1638 | * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. | |
|
1717 | * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. | |
|
1718 | * pledgedSrcSize must be correct. | |
|
1719 | * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. | |
|
1720 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1639 | 1721 | */ |
|
1640 | ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, | |
|
1641 | ZSTD_parameters params, unsigned long long pledgedSrcSize); | |
|
1722 | ZSTDLIB_API size_t | |
|
1723 | ZSTD_initCStream_advanced(ZSTD_CStream* zcs, | |
|
1724 | const void* dict, size_t dictSize, | |
|
1725 | ZSTD_parameters params, | |
|
1726 | unsigned long long pledgedSrcSize); | |
|
1727 | ||
|
1642 | 1728 | /**! ZSTD_initCStream_usingCDict() : |
|
1643 | 1729 | * This function is deprecated, and equivalent to: |
|
1644 | 1730 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); |
|
1645 | 1731 | * ZSTD_CCtx_refCDict(zcs, cdict); |
|
1646 | 1732 | * |
|
1647 | 1733 | * note : cdict will just be referenced, and must outlive compression session |
|
1734 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1648 | 1735 | */ |
|
1649 | 1736 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); |
|
1737 | ||
|
1650 | 1738 | /**! ZSTD_initCStream_usingCDict_advanced() : |
|
1651 |
* This function is |
|
|
1739 | * This function is DEPRECATED, and is approximately equivalent to: | |
|
1652 | 1740 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); |
|
1653 |
* |
|
|
1741 | * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. | |
|
1742 | * for ((fParam, value) : fParams) { | |
|
1743 | * ZSTD_CCtx_setParameter(zcs, fParam, value); | |
|
1744 | * } | |
|
1654 | 1745 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); |
|
1655 | 1746 | * ZSTD_CCtx_refCDict(zcs, cdict); |
|
1656 | 1747 | * |
|
1657 | 1748 | * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. |
|
1658 | 1749 | * pledgedSrcSize must be correct. If srcSize is not known at init time, use |
|
1659 | 1750 | * value ZSTD_CONTENTSIZE_UNKNOWN. |
|
1751 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1660 | 1752 | */ |
|
1661 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); | |
|
1753 | ZSTDLIB_API size_t | |
|
1754 | ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, | |
|
1755 | const ZSTD_CDict* cdict, | |
|
1756 | ZSTD_frameParameters fParams, | |
|
1757 | unsigned long long pledgedSrcSize); | |
|
1662 | 1758 | |
|
1663 | 1759 | /*! ZSTD_resetCStream() : |
|
1664 | 1760 | * This function is deprecated, and is equivalent to: |
@@ -1673,6 +1769,7 ZSTDLIB_API size_t ZSTD_initCStream_usin | |||
|
1673 | 1769 | * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, |
|
1674 | 1770 | * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. |
|
1675 | 1771 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) |
|
1772 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1676 | 1773 | */ |
|
1677 | 1774 | ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); |
|
1678 | 1775 | |
@@ -1718,8 +1815,10 ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_ | |||
|
1718 | 1815 | * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); |
|
1719 | 1816 | * |
|
1720 | 1817 | * note: no dictionary will be used if dict == NULL or dictSize < 8 |
|
1818 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1721 | 1819 | */ |
|
1722 | 1820 | ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); |
|
1821 | ||
|
1723 | 1822 | /** |
|
1724 | 1823 | * This function is deprecated, and is equivalent to: |
|
1725 | 1824 | * |
@@ -1727,14 +1826,17 ZSTDLIB_API size_t ZSTD_initDStream_usin | |||
|
1727 | 1826 | * ZSTD_DCtx_refDDict(zds, ddict); |
|
1728 | 1827 | * |
|
1729 | 1828 | * note : ddict is referenced, it must outlive decompression session |
|
1829 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1730 | 1830 | */ |
|
1731 | 1831 | ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); |
|
1832 | ||
|
1732 | 1833 | /** |
|
1733 | 1834 | * This function is deprecated, and is equivalent to: |
|
1734 | 1835 | * |
|
1735 | 1836 | * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); |
|
1736 | 1837 | * |
|
1737 | 1838 | * re-use decompression parameters from previous init; saves dictionary loading |
|
1839 | * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x | |
|
1738 | 1840 | */ |
|
1739 | 1841 | ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); |
|
1740 | 1842 | |
@@ -1908,7 +2010,7 ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne | |||
|
1908 | 2010 | |
|
1909 | 2011 | /*! |
|
1910 | 2012 | Block functions produce and decode raw zstd blocks, without frame metadata. |
|
1911 |
Frame metadata cost is typically ~1 |
|
|
2013 | Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). | |
|
1912 | 2014 | But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. |
|
1913 | 2015 | |
|
1914 | 2016 | A few rules to respect : |
General Comments 0
You need to be logged in to leave comments.
Login now