##// END OF EJS Templates
zstandard: vendor python-zstandard 0.13.0...
Gregory Szorc -
r44446:de783805 default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (535 lines changed) Show them Hide them
@@ -0,0 +1,535
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 #ifndef ZSTD_CWKSP_H
12 #define ZSTD_CWKSP_H
13
14 /*-*************************************
15 * Dependencies
16 ***************************************/
17 #include "zstd_internal.h"
18
19 #if defined (__cplusplus)
20 extern "C" {
21 #endif
22
23 /*-*************************************
24 * Constants
25 ***************************************/
26
27 /* define "workspace is too large" as this number of times larger than needed */
28 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
29
30 /* when workspace is continuously too large
31 * during at least this number of times,
32 * context's memory usage is considered wasteful,
33 * because it's sized to handle a worst case scenario which rarely happens.
34 * In which case, resize it down to free some memory */
35 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
36
37 /* Since the workspace is effectively its own little malloc implementation /
38 * arena, when we run under ASAN, we should similarly insert redzones between
39 * each internal element of the workspace, so ASAN will catch overruns that
40 * reach outside an object but that stay inside the workspace.
41 *
42 * This defines the size of that redzone.
43 */
44 #ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
45 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
46 #endif
47
48 /*-*************************************
49 * Structures
50 ***************************************/
51 typedef enum {
52 ZSTD_cwksp_alloc_objects,
53 ZSTD_cwksp_alloc_buffers,
54 ZSTD_cwksp_alloc_aligned
55 } ZSTD_cwksp_alloc_phase_e;
56
57 /**
58 * Zstd fits all its internal datastructures into a single continuous buffer,
59 * so that it only needs to perform a single OS allocation (or so that a buffer
60 * can be provided to it and it can perform no allocations at all). This buffer
61 * is called the workspace.
62 *
63 * Several optimizations complicate that process of allocating memory ranges
64 * from this workspace for each internal datastructure:
65 *
66 * - These different internal datastructures have different setup requirements:
67 *
68 * - The static objects need to be cleared once and can then be trivially
69 * reused for each compression.
70 *
71 * - Various buffers don't need to be initialized at all--they are always
72 * written into before they're read.
73 *
74 * - The matchstate tables have a unique requirement that they don't need
75 * their memory to be totally cleared, but they do need the memory to have
76 * some bound, i.e., a guarantee that all values in the memory they've been
77 * allocated is less than some maximum value (which is the starting value
78 * for the indices that they will then use for compression). When this
79 * guarantee is provided to them, they can use the memory without any setup
80 * work. When it can't, they have to clear the area.
81 *
82 * - These buffers also have different alignment requirements.
83 *
84 * - We would like to reuse the objects in the workspace for multiple
85 * compressions without having to perform any expensive reallocation or
86 * reinitialization work.
87 *
88 * - We would like to be able to efficiently reuse the workspace across
89 * multiple compressions **even when the compression parameters change** and
90 * we need to resize some of the objects (where possible).
91 *
92 * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
93 * abstraction was created. It works as follows:
94 *
95 * Workspace Layout:
96 *
97 * [ ... workspace ... ]
98 * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
99 *
100 * The various objects that live in the workspace are divided into the
101 * following categories, and are allocated separately:
102 *
103 * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
104 * so that literally everything fits in a single buffer. Note: if present,
105 * this must be the first object in the workspace, since ZSTD_free{CCtx,
106 * CDict}() rely on a pointer comparison to see whether one or two frees are
107 * required.
108 *
109 * - Fixed size objects: these are fixed-size, fixed-count objects that are
110 * nonetheless "dynamically" allocated in the workspace so that we can
111 * control how they're initialized separately from the broader ZSTD_CCtx.
112 * Examples:
113 * - Entropy Workspace
114 * - 2 x ZSTD_compressedBlockState_t
115 * - CDict dictionary contents
116 *
117 * - Tables: these are any of several different datastructures (hash tables,
118 * chain tables, binary trees) that all respect a common format: they are
119 * uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
120 * Their sizes depend on the cparams.
121 *
122 * - Aligned: these buffers are used for various purposes that require 4 byte
123 * alignment, but don't require any initialization before they're used.
124 *
125 * - Buffers: these buffers are used for various purposes that don't require
126 * any alignment or initialization before they're used. This means they can
127 * be moved around at no cost for a new compression.
128 *
129 * Allocating Memory:
130 *
131 * The various types of objects must be allocated in order, so they can be
132 * correctly packed into the workspace buffer. That order is:
133 *
134 * 1. Objects
135 * 2. Buffers
136 * 3. Aligned
137 * 4. Tables
138 *
139 * Attempts to reserve objects of different types out of order will fail.
140 */
141 typedef struct {
142 void* workspace;
143 void* workspaceEnd;
144
145 void* objectEnd;
146 void* tableEnd;
147 void* tableValidEnd;
148 void* allocStart;
149
150 int allocFailed;
151 int workspaceOversizedDuration;
152 ZSTD_cwksp_alloc_phase_e phase;
153 } ZSTD_cwksp;
154
155 /*-*************************************
156 * Functions
157 ***************************************/
158
159 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
160
161 MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
162 (void)ws;
163 assert(ws->workspace <= ws->objectEnd);
164 assert(ws->objectEnd <= ws->tableEnd);
165 assert(ws->objectEnd <= ws->tableValidEnd);
166 assert(ws->tableEnd <= ws->allocStart);
167 assert(ws->tableValidEnd <= ws->allocStart);
168 assert(ws->allocStart <= ws->workspaceEnd);
169 }
170
171 /**
172 * Align must be a power of 2.
173 */
174 MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
175 size_t const mask = align - 1;
176 assert((align & mask) == 0);
177 return (size + mask) & ~mask;
178 }
179
180 /**
181 * Use this to determine how much space in the workspace we will consume to
182 * allocate this object. (Normally it should be exactly the size of the object,
183 * but under special conditions, like ASAN, where we pad each object, it might
184 * be larger.)
185 *
186 * Since tables aren't currently redzoned, you don't need to call through this
187 * to figure out how much space you need for the matchState tables. Everything
188 * else is though.
189 */
190 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
191 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
192 return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
193 #else
194 return size;
195 #endif
196 }
197
198 MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
199 ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
200 assert(phase >= ws->phase);
201 if (phase > ws->phase) {
202 if (ws->phase < ZSTD_cwksp_alloc_buffers &&
203 phase >= ZSTD_cwksp_alloc_buffers) {
204 ws->tableValidEnd = ws->objectEnd;
205 }
206 if (ws->phase < ZSTD_cwksp_alloc_aligned &&
207 phase >= ZSTD_cwksp_alloc_aligned) {
208 /* If unaligned allocations down from a too-large top have left us
209 * unaligned, we need to realign our alloc ptr. Technically, this
210 * can consume space that is unaccounted for in the neededSpace
211 * calculation. However, I believe this can only happen when the
212 * workspace is too large, and specifically when it is too large
213 * by a larger margin than the space that will be consumed. */
214 /* TODO: cleaner, compiler warning friendly way to do this??? */
215 ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
216 if (ws->allocStart < ws->tableValidEnd) {
217 ws->tableValidEnd = ws->allocStart;
218 }
219 }
220 ws->phase = phase;
221 }
222 }
223
224 /**
225 * Returns whether this object/buffer/etc was allocated in this workspace.
226 */
227 MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
228 return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
229 }
230
231 /**
232 * Internal function. Do not use directly.
233 */
234 MEM_STATIC void* ZSTD_cwksp_reserve_internal(
235 ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
236 void* alloc;
237 void* bottom = ws->tableEnd;
238 ZSTD_cwksp_internal_advance_phase(ws, phase);
239 alloc = (BYTE *)ws->allocStart - bytes;
240
241 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
242 /* over-reserve space */
243 alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
244 #endif
245
246 DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
247 alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
248 ZSTD_cwksp_assert_internal_consistency(ws);
249 assert(alloc >= bottom);
250 if (alloc < bottom) {
251 DEBUGLOG(4, "cwksp: alloc failed!");
252 ws->allocFailed = 1;
253 return NULL;
254 }
255 if (alloc < ws->tableValidEnd) {
256 ws->tableValidEnd = alloc;
257 }
258 ws->allocStart = alloc;
259
260 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
261 /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
262 * either size. */
263 alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
264 __asan_unpoison_memory_region(alloc, bytes);
265 #endif
266
267 return alloc;
268 }
269
270 /**
271 * Reserves and returns unaligned memory.
272 */
273 MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
274 return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
275 }
276
277 /**
278 * Reserves and returns memory sized on and aligned on sizeof(unsigned).
279 */
280 MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
281 assert((bytes & (sizeof(U32)-1)) == 0);
282 return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
283 }
284
285 /**
286 * Aligned on sizeof(unsigned). These buffers have the special property that
287 * their values remain constrained, allowing us to re-use them without
288 * memset()-ing them.
289 */
290 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
291 const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
292 void* alloc = ws->tableEnd;
293 void* end = (BYTE *)alloc + bytes;
294 void* top = ws->allocStart;
295
296 DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
297 alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
298 assert((bytes & (sizeof(U32)-1)) == 0);
299 ZSTD_cwksp_internal_advance_phase(ws, phase);
300 ZSTD_cwksp_assert_internal_consistency(ws);
301 assert(end <= top);
302 if (end > top) {
303 DEBUGLOG(4, "cwksp: table alloc failed!");
304 ws->allocFailed = 1;
305 return NULL;
306 }
307 ws->tableEnd = end;
308
309 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
310 __asan_unpoison_memory_region(alloc, bytes);
311 #endif
312
313 return alloc;
314 }
315
316 /**
317 * Aligned on sizeof(void*).
318 */
319 MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
320 size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
321 void* alloc = ws->objectEnd;
322 void* end = (BYTE*)alloc + roundedBytes;
323
324 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
325 /* over-reserve space */
326 end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
327 #endif
328
329 DEBUGLOG(5,
330 "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
331 alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
332 assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
333 assert((bytes & (sizeof(void*)-1)) == 0);
334 ZSTD_cwksp_assert_internal_consistency(ws);
335 /* we must be in the first phase, no advance is possible */
336 if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
337 DEBUGLOG(4, "cwksp: object alloc failed!");
338 ws->allocFailed = 1;
339 return NULL;
340 }
341 ws->objectEnd = end;
342 ws->tableEnd = end;
343 ws->tableValidEnd = end;
344
345 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
346 /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
347 * either size. */
348 alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
349 __asan_unpoison_memory_region(alloc, bytes);
350 #endif
351
352 return alloc;
353 }
354
355 MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
356 DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
357
358 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
359 /* To validate that the table re-use logic is sound, and that we don't
360 * access table space that we haven't cleaned, we re-"poison" the table
361 * space every time we mark it dirty. */
362 {
363 size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
364 assert(__msan_test_shadow(ws->objectEnd, size) == -1);
365 __msan_poison(ws->objectEnd, size);
366 }
367 #endif
368
369 assert(ws->tableValidEnd >= ws->objectEnd);
370 assert(ws->tableValidEnd <= ws->allocStart);
371 ws->tableValidEnd = ws->objectEnd;
372 ZSTD_cwksp_assert_internal_consistency(ws);
373 }
374
375 MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
376 DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
377 assert(ws->tableValidEnd >= ws->objectEnd);
378 assert(ws->tableValidEnd <= ws->allocStart);
379 if (ws->tableValidEnd < ws->tableEnd) {
380 ws->tableValidEnd = ws->tableEnd;
381 }
382 ZSTD_cwksp_assert_internal_consistency(ws);
383 }
384
385 /**
386 * Zero the part of the allocated tables not already marked clean.
387 */
388 MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
389 DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
390 assert(ws->tableValidEnd >= ws->objectEnd);
391 assert(ws->tableValidEnd <= ws->allocStart);
392 if (ws->tableValidEnd < ws->tableEnd) {
393 memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
394 }
395 ZSTD_cwksp_mark_tables_clean(ws);
396 }
397
398 /**
399 * Invalidates table allocations.
400 * All other allocations remain valid.
401 */
402 MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
403 DEBUGLOG(4, "cwksp: clearing tables!");
404
405 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
406 {
407 size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
408 __asan_poison_memory_region(ws->objectEnd, size);
409 }
410 #endif
411
412 ws->tableEnd = ws->objectEnd;
413 ZSTD_cwksp_assert_internal_consistency(ws);
414 }
415
416 /**
417 * Invalidates all buffer, aligned, and table allocations.
418 * Object allocations remain valid.
419 */
420 MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
421 DEBUGLOG(4, "cwksp: clearing!");
422
423 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
424 /* To validate that the context re-use logic is sound, and that we don't
425 * access stuff that this compression hasn't initialized, we re-"poison"
426 * the workspace (or at least the non-static, non-table parts of it)
427 * every time we start a new compression. */
428 {
429 size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
430 __msan_poison(ws->tableValidEnd, size);
431 }
432 #endif
433
434 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
435 {
436 size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
437 __asan_poison_memory_region(ws->objectEnd, size);
438 }
439 #endif
440
441 ws->tableEnd = ws->objectEnd;
442 ws->allocStart = ws->workspaceEnd;
443 ws->allocFailed = 0;
444 if (ws->phase > ZSTD_cwksp_alloc_buffers) {
445 ws->phase = ZSTD_cwksp_alloc_buffers;
446 }
447 ZSTD_cwksp_assert_internal_consistency(ws);
448 }
449
450 /**
451 * The provided workspace takes ownership of the buffer [start, start+size).
452 * Any existing values in the workspace are ignored (the previously managed
453 * buffer, if present, must be separately freed).
454 */
455 MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
456 DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
457 assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
458 ws->workspace = start;
459 ws->workspaceEnd = (BYTE*)start + size;
460 ws->objectEnd = ws->workspace;
461 ws->tableValidEnd = ws->objectEnd;
462 ws->phase = ZSTD_cwksp_alloc_objects;
463 ZSTD_cwksp_clear(ws);
464 ws->workspaceOversizedDuration = 0;
465 ZSTD_cwksp_assert_internal_consistency(ws);
466 }
467
468 MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
469 void* workspace = ZSTD_malloc(size, customMem);
470 DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
471 RETURN_ERROR_IF(workspace == NULL, memory_allocation);
472 ZSTD_cwksp_init(ws, workspace, size);
473 return 0;
474 }
475
476 MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
477 void *ptr = ws->workspace;
478 DEBUGLOG(4, "cwksp: freeing workspace");
479 memset(ws, 0, sizeof(ZSTD_cwksp));
480 ZSTD_free(ptr, customMem);
481 }
482
483 /**
484 * Moves the management of a workspace from one cwksp to another. The src cwksp
485 * is left in an invalid state (src must be re-init()'ed before its used again).
486 */
487 MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
488 *dst = *src;
489 memset(src, 0, sizeof(ZSTD_cwksp));
490 }
491
492 MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
493 return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
494 }
495
496 MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
497 return ws->allocFailed;
498 }
499
500 /*-*************************************
501 * Functions Checking Free Space
502 ***************************************/
503
504 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
505 return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
506 }
507
508 MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
509 return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
510 }
511
512 MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
513 return ZSTD_cwksp_check_available(
514 ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
515 }
516
517 MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
518 return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
519 && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
520 }
521
522 MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
523 ZSTD_cwksp* ws, size_t additionalNeededSpace) {
524 if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
525 ws->workspaceOversizedDuration++;
526 } else {
527 ws->workspaceOversizedDuration = 0;
528 }
529 }
530
531 #if defined (__cplusplus)
532 }
533 #endif
534
535 #endif /* ZSTD_CWKSP_H */
@@ -52,6 +52,7 contrib/python-zstandard/zstd/compress/z
52 52 contrib/python-zstandard/zstd/compress/zstd_compress_literals.h
53 53 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c
54 54 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h
55 contrib/python-zstandard/zstd/compress/zstd_cwksp.h
55 56 contrib/python-zstandard/zstd/compress/zstd_double_fast.c
56 57 contrib/python-zstandard/zstd/compress/zstd_double_fast.h
57 58 contrib/python-zstandard/zstd/compress/zstd_fast.c
@@ -43,13 +43,18 Actions Blocking Release
43 43 * Support modifying compression parameters mid operation when supported by
44 44 zstd API.
45 45 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
46 * Expose ``ZSTD_SRCSIZEHINT_{MIN,MAX}`` constants.
46 47 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
47 * Support ``ZSTD_c_literalCompressionMode `` compression parameter.
48 * Support ``ZSTD_dictForceLoad`` dictionary compression parameter.
49 * Support ``ZSTD_c_targetCBlockSize`` compression parameter.
50 * Support ``ZSTD_c_literalCompressionMode`` compression parameter.
51 * Support ``ZSTD_c_srcSizeHint`` compression parameter.
48 52 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
49 53 compression parameters.
50 54 * Consider exposing ``ZSTDMT_toFlushNow()``.
51 55 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
52 56 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
57 * Expose ``ZSTD_Sequence`` struct and related ``ZSTD_getSequences()`` API.
53 58 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
54 59 * Consider a ``chunker()`` API for decompression.
55 60 * Consider stats for ``chunker()`` API, including finding the last consumed
@@ -67,6 +72,20 Other Actions Not Blocking Release
67 72 * API for ensuring max memory ceiling isn't exceeded.
68 73 * Move off nose for testing.
69 74
75 0.13.0 (released 2019-12-28)
76 ============================
77
78 Changes
79 -------
80
81 * ``pytest-xdist`` ``pytest`` extension is now installed so tests can be
82 run in parallel.
83 * CI now builds ``manylinux2010`` and ``manylinux2014`` binary wheels
84 instead of a mix of ``manylinux2010`` and ``manylinux1``.
85 * Official support for Python 3.8 has been added.
86 * Bundled zstandard library upgraded from 1.4.3 to 1.4.4.
87 * Python code has been reformatted with black.
88
70 89 0.12.0 (released 2019-09-15)
71 90 ============================
72 91
@@ -20,7 +20,7 https://github.com/indygreg/python-zstan
20 20 Requirements
21 21 ============
22 22
23 This extension is designed to run with Python 2.7, 3.4, 3.5, 3.6, and 3.7
23 This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8
24 24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
25 25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
26 26
@@ -16,7 +16,7
16 16 #include <zdict.h>
17 17
18 18 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.12.0"
19 #define PYTHON_ZSTANDARD_VERSION "0.13.0"
20 20
21 21 typedef enum {
22 22 compressorobj_flush_finish,
@@ -16,80 +16,82 import tempfile
16 16
17 17 HERE = os.path.abspath(os.path.dirname(__file__))
18 18
19 SOURCES = ['zstd/%s' % p for p in (
20 'common/debug.c',
21 'common/entropy_common.c',
22 'common/error_private.c',
23 'common/fse_decompress.c',
24 'common/pool.c',
25 'common/threading.c',
26 'common/xxhash.c',
27 'common/zstd_common.c',
28 'compress/fse_compress.c',
29 'compress/hist.c',
30 'compress/huf_compress.c',
31 'compress/zstd_compress.c',
32 'compress/zstd_compress_literals.c',
33 'compress/zstd_compress_sequences.c',
34 'compress/zstd_double_fast.c',
35 'compress/zstd_fast.c',
36 'compress/zstd_lazy.c',
37 'compress/zstd_ldm.c',
38 'compress/zstd_opt.c',
39 'compress/zstdmt_compress.c',
40 'decompress/huf_decompress.c',
41 'decompress/zstd_ddict.c',
42 'decompress/zstd_decompress.c',
43 'decompress/zstd_decompress_block.c',
44 'dictBuilder/cover.c',
45 'dictBuilder/fastcover.c',
46 'dictBuilder/divsufsort.c',
47 'dictBuilder/zdict.c',
48 )]
19 SOURCES = [
20 "zstd/%s" % p
21 for p in (
22 "common/debug.c",
23 "common/entropy_common.c",
24 "common/error_private.c",
25 "common/fse_decompress.c",
26 "common/pool.c",
27 "common/threading.c",
28 "common/xxhash.c",
29 "common/zstd_common.c",
30 "compress/fse_compress.c",
31 "compress/hist.c",
32 "compress/huf_compress.c",
33 "compress/zstd_compress.c",
34 "compress/zstd_compress_literals.c",
35 "compress/zstd_compress_sequences.c",
36 "compress/zstd_double_fast.c",
37 "compress/zstd_fast.c",
38 "compress/zstd_lazy.c",
39 "compress/zstd_ldm.c",
40 "compress/zstd_opt.c",
41 "compress/zstdmt_compress.c",
42 "decompress/huf_decompress.c",
43 "decompress/zstd_ddict.c",
44 "decompress/zstd_decompress.c",
45 "decompress/zstd_decompress_block.c",
46 "dictBuilder/cover.c",
47 "dictBuilder/fastcover.c",
48 "dictBuilder/divsufsort.c",
49 "dictBuilder/zdict.c",
50 )
51 ]
49 52
50 53 # Headers whose preprocessed output will be fed into cdef().
51 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
52 ('zstd.h',),
53 ('dictBuilder', 'zdict.h'),
54 )]
54 HEADERS = [
55 os.path.join(HERE, "zstd", *p) for p in (("zstd.h",), ("dictBuilder", "zdict.h"),)
56 ]
55 57
56 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
57 'zstd',
58 'zstd/common',
59 'zstd/compress',
60 'zstd/decompress',
61 'zstd/dictBuilder',
62 )]
58 INCLUDE_DIRS = [
59 os.path.join(HERE, d)
60 for d in (
61 "zstd",
62 "zstd/common",
63 "zstd/compress",
64 "zstd/decompress",
65 "zstd/dictBuilder",
66 )
67 ]
63 68
64 69 # cffi can't parse some of the primitives in zstd.h. So we invoke the
65 70 # preprocessor and feed its output into cffi.
66 71 compiler = distutils.ccompiler.new_compiler()
67 72
68 73 # Needed for MSVC.
69 if hasattr(compiler, 'initialize'):
74 if hasattr(compiler, "initialize"):
70 75 compiler.initialize()
71 76
72 77 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
73 78 # manually.
74 if compiler.compiler_type == 'unix':
75 args = list(compiler.executables['compiler'])
76 args.extend([
77 '-E',
78 '-DZSTD_STATIC_LINKING_ONLY',
79 '-DZDICT_STATIC_LINKING_ONLY',
80 ])
81 elif compiler.compiler_type == 'msvc':
79 if compiler.compiler_type == "unix":
80 args = list(compiler.executables["compiler"])
81 args.extend(
82 ["-E", "-DZSTD_STATIC_LINKING_ONLY", "-DZDICT_STATIC_LINKING_ONLY",]
83 )
84 elif compiler.compiler_type == "msvc":
82 85 args = [compiler.cc]
83 args.extend([
84 '/EP',
85 '/DZSTD_STATIC_LINKING_ONLY',
86 '/DZDICT_STATIC_LINKING_ONLY',
87 ])
86 args.extend(
87 ["/EP", "/DZSTD_STATIC_LINKING_ONLY", "/DZDICT_STATIC_LINKING_ONLY",]
88 )
88 89 else:
89 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
90 raise Exception("unsupported compiler type: %s" % compiler.compiler_type)
91
90 92
91 93 def preprocess(path):
92 with open(path, 'rb') as fh:
94 with open(path, "rb") as fh:
93 95 lines = []
94 96 it = iter(fh)
95 97
@@ -104,32 +106,44 def preprocess(path):
104 106 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
105 107 # #define in zstdmt_compress.h and results in a compiler warning. So drop
106 108 # the inline #define.
107 if l.startswith((b'#include <stddef.h>',
108 b'#include "zstd.h"',
109 b'#define ZSTD_STATIC_LINKING_ONLY')):
109 if l.startswith(
110 (
111 b"#include <stddef.h>",
112 b'#include "zstd.h"',
113 b"#define ZSTD_STATIC_LINKING_ONLY",
114 )
115 ):
110 116 continue
111 117
118 # The preprocessor environment on Windows doesn't define include
119 # paths, so the #include of limits.h fails. We work around this
120 # by removing that import and defining INT_MAX ourselves. This is
121 # a bit hacky. But it gets the job done.
122 # TODO make limits.h work on Windows so we ensure INT_MAX is
123 # correct.
124 if l.startswith(b"#include <limits.h>"):
125 l = b"#define INT_MAX 2147483647\n"
126
112 127 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
113 128 # important so just filter it out.
114 if l.startswith(b'ZSTDLIB_API'):
115 l = l[len(b'ZSTDLIB_API '):]
129 if l.startswith(b"ZSTDLIB_API"):
130 l = l[len(b"ZSTDLIB_API ") :]
116 131
117 132 lines.append(l)
118 133
119 fd, input_file = tempfile.mkstemp(suffix='.h')
120 os.write(fd, b''.join(lines))
134 fd, input_file = tempfile.mkstemp(suffix=".h")
135 os.write(fd, b"".join(lines))
121 136 os.close(fd)
122 137
123 138 try:
124 139 env = dict(os.environ)
125 if getattr(compiler, '_paths', None):
126 env['PATH'] = compiler._paths
127 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE,
128 env=env)
140 if getattr(compiler, "_paths", None):
141 env["PATH"] = compiler._paths
142 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, env=env)
129 143 output = process.communicate()[0]
130 144 ret = process.poll()
131 145 if ret:
132 raise Exception('preprocessor exited with error')
146 raise Exception("preprocessor exited with error")
133 147
134 148 return output
135 149 finally:
@@ -141,16 +155,16 def normalize_output(output):
141 155 for line in output.splitlines():
142 156 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
143 157 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
144 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
158 line = line[len(b'__attribute__ ((visibility ("default"))) ') :]
145 159
146 if line.startswith(b'__attribute__((deprecated('):
160 if line.startswith(b"__attribute__((deprecated("):
147 161 continue
148 elif b'__declspec(deprecated(' in line:
162 elif b"__declspec(deprecated(" in line:
149 163 continue
150 164
151 165 lines.append(line)
152 166
153 return b'\n'.join(lines)
167 return b"\n".join(lines)
154 168
155 169
156 170 ffi = cffi.FFI()
@@ -159,18 +173,22 ffi = cffi.FFI()
159 173 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
160 174 # when cffi uses the function. Since we statically link against zstd, even
161 175 # if we use the deprecated functions it shouldn't be a huge problem.
162 ffi.set_source('_zstd_cffi', '''
176 ffi.set_source(
177 "_zstd_cffi",
178 """
163 179 #define MIN(a,b) ((a)<(b) ? (a) : (b))
164 180 #define ZSTD_STATIC_LINKING_ONLY
165 181 #include <zstd.h>
166 182 #define ZDICT_STATIC_LINKING_ONLY
167 183 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
168 184 #include <zdict.h>
169 ''', sources=SOURCES,
170 include_dirs=INCLUDE_DIRS,
171 extra_compile_args=['-DZSTD_MULTITHREAD'])
185 """,
186 sources=SOURCES,
187 include_dirs=INCLUDE_DIRS,
188 extra_compile_args=["-DZSTD_MULTITHREAD"],
189 )
172 190
173 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
191 DEFINE = re.compile(b"^\\#define ([a-zA-Z0-9_]+) ")
174 192
175 193 sources = []
176 194
@@ -181,27 +199,27 for header in HEADERS:
181 199
182 200 # #define's are effectively erased as part of going through preprocessor.
183 201 # So perform a manual pass to re-add those to the cdef source.
184 with open(header, 'rb') as fh:
202 with open(header, "rb") as fh:
185 203 for line in fh:
186 204 line = line.strip()
187 205 m = DEFINE.match(line)
188 206 if not m:
189 207 continue
190 208
191 if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
209 if m.group(1) == b"ZSTD_STATIC_LINKING_ONLY":
192 210 continue
193 211
194 212 # The parser doesn't like some constants with complex values.
195 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
213 if m.group(1) in (b"ZSTD_LIB_VERSION", b"ZSTD_VERSION_STRING"):
196 214 continue
197 215
198 216 # The ... is magic syntax by the cdef parser to resolve the
199 217 # value at compile time.
200 sources.append(m.group(0) + b' ...')
218 sources.append(m.group(0) + b" ...")
201 219
202 cdeflines = b'\n'.join(sources).splitlines()
220 cdeflines = b"\n".join(sources).splitlines()
203 221 cdeflines = [l for l in cdeflines if l.strip()]
204 ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
222 ffi.cdef(b"\n".join(cdeflines).decode("latin1"))
205 223
206 if __name__ == '__main__':
224 if __name__ == "__main__":
207 225 ffi.compile()
@@ -16,7 +16,7 from setuptools import setup
16 16 # (like memoryview).
17 17 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
18 18 # garbage collection pitfalls.
19 MINIMUM_CFFI_VERSION = '1.11'
19 MINIMUM_CFFI_VERSION = "1.11"
20 20
21 21 try:
22 22 import cffi
@@ -26,9 +26,11 try:
26 26 # out the CFFI version here and reject CFFI if it is too old.
27 27 cffi_version = LooseVersion(cffi.__version__)
28 28 if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION):
29 print('CFFI 1.11 or newer required (%s found); '
30 'not building CFFI backend' % cffi_version,
31 file=sys.stderr)
29 print(
30 "CFFI 1.11 or newer required (%s found); "
31 "not building CFFI backend" % cffi_version,
32 file=sys.stderr,
33 )
32 34 cffi = None
33 35
34 36 except ImportError:
@@ -40,73 +42,77 SUPPORT_LEGACY = False
40 42 SYSTEM_ZSTD = False
41 43 WARNINGS_AS_ERRORS = False
42 44
43 if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''):
45 if os.environ.get("ZSTD_WARNINGS_AS_ERRORS", ""):
44 46 WARNINGS_AS_ERRORS = True
45 47
46 if '--legacy' in sys.argv:
48 if "--legacy" in sys.argv:
47 49 SUPPORT_LEGACY = True
48 sys.argv.remove('--legacy')
50 sys.argv.remove("--legacy")
49 51
50 if '--system-zstd' in sys.argv:
52 if "--system-zstd" in sys.argv:
51 53 SYSTEM_ZSTD = True
52 sys.argv.remove('--system-zstd')
54 sys.argv.remove("--system-zstd")
53 55
54 if '--warnings-as-errors' in sys.argv:
56 if "--warnings-as-errors" in sys.argv:
55 57 WARNINGS_AS_ERRORS = True
56 sys.argv.remove('--warning-as-errors')
58 sys.argv.remove("--warning-as-errors")
57 59
58 60 # Code for obtaining the Extension instance is in its own module to
59 61 # facilitate reuse in other projects.
60 62 extensions = [
61 setup_zstd.get_c_extension(name='zstd',
62 support_legacy=SUPPORT_LEGACY,
63 system_zstd=SYSTEM_ZSTD,
64 warnings_as_errors=WARNINGS_AS_ERRORS),
63 setup_zstd.get_c_extension(
64 name="zstd",
65 support_legacy=SUPPORT_LEGACY,
66 system_zstd=SYSTEM_ZSTD,
67 warnings_as_errors=WARNINGS_AS_ERRORS,
68 ),
65 69 ]
66 70
67 71 install_requires = []
68 72
69 73 if cffi:
70 74 import make_cffi
75
71 76 extensions.append(make_cffi.ffi.distutils_extension())
72 install_requires.append('cffi>=%s' % MINIMUM_CFFI_VERSION)
77 install_requires.append("cffi>=%s" % MINIMUM_CFFI_VERSION)
73 78
74 79 version = None
75 80
76 with open('c-ext/python-zstandard.h', 'r') as fh:
81 with open("c-ext/python-zstandard.h", "r") as fh:
77 82 for line in fh:
78 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
83 if not line.startswith("#define PYTHON_ZSTANDARD_VERSION"):
79 84 continue
80 85
81 86 version = line.split()[2][1:-1]
82 87 break
83 88
84 89 if not version:
85 raise Exception('could not resolve package version; '
86 'this should never happen')
90 raise Exception("could not resolve package version; " "this should never happen")
87 91
88 92 setup(
89 name='zstandard',
93 name="zstandard",
90 94 version=version,
91 description='Zstandard bindings for Python',
92 long_description=open('README.rst', 'r').read(),
93 url='https://github.com/indygreg/python-zstandard',
94 author='Gregory Szorc',
95 author_email='gregory.szorc@gmail.com',
96 license='BSD',
95 description="Zstandard bindings for Python",
96 long_description=open("README.rst", "r").read(),
97 url="https://github.com/indygreg/python-zstandard",
98 author="Gregory Szorc",
99 author_email="gregory.szorc@gmail.com",
100 license="BSD",
97 101 classifiers=[
98 'Development Status :: 4 - Beta',
99 'Intended Audience :: Developers',
100 'License :: OSI Approved :: BSD License',
101 'Programming Language :: C',
102 'Programming Language :: Python :: 2.7',
103 'Programming Language :: Python :: 3.5',
104 'Programming Language :: Python :: 3.6',
105 'Programming Language :: Python :: 3.7',
102 "Development Status :: 4 - Beta",
103 "Intended Audience :: Developers",
104 "License :: OSI Approved :: BSD License",
105 "Programming Language :: C",
106 "Programming Language :: Python :: 2.7",
107 "Programming Language :: Python :: 3.5",
108 "Programming Language :: Python :: 3.6",
109 "Programming Language :: Python :: 3.7",
110 "Programming Language :: Python :: 3.8",
106 111 ],
107 keywords='zstandard zstd compression',
108 packages=['zstandard'],
112 keywords="zstandard zstd compression",
113 packages=["zstandard"],
109 114 ext_modules=extensions,
110 test_suite='tests',
115 test_suite="tests",
111 116 install_requires=install_requires,
117 tests_require=["hypothesis"],
112 118 )
@@ -10,97 +10,110 import os
10 10 from distutils.extension import Extension
11 11
12 12
13 zstd_sources = ['zstd/%s' % p for p in (
14 'common/debug.c',
15 'common/entropy_common.c',
16 'common/error_private.c',
17 'common/fse_decompress.c',
18 'common/pool.c',
19 'common/threading.c',
20 'common/xxhash.c',
21 'common/zstd_common.c',
22 'compress/fse_compress.c',
23 'compress/hist.c',
24 'compress/huf_compress.c',
25 'compress/zstd_compress_literals.c',
26 'compress/zstd_compress_sequences.c',
27 'compress/zstd_compress.c',
28 'compress/zstd_double_fast.c',
29 'compress/zstd_fast.c',
30 'compress/zstd_lazy.c',
31 'compress/zstd_ldm.c',
32 'compress/zstd_opt.c',
33 'compress/zstdmt_compress.c',
34 'decompress/huf_decompress.c',
35 'decompress/zstd_ddict.c',
36 'decompress/zstd_decompress.c',
37 'decompress/zstd_decompress_block.c',
38 'dictBuilder/cover.c',
39 'dictBuilder/divsufsort.c',
40 'dictBuilder/fastcover.c',
41 'dictBuilder/zdict.c',
42 )]
13 zstd_sources = [
14 "zstd/%s" % p
15 for p in (
16 "common/debug.c",
17 "common/entropy_common.c",
18 "common/error_private.c",
19 "common/fse_decompress.c",
20 "common/pool.c",
21 "common/threading.c",
22 "common/xxhash.c",
23 "common/zstd_common.c",
24 "compress/fse_compress.c",
25 "compress/hist.c",
26 "compress/huf_compress.c",
27 "compress/zstd_compress_literals.c",
28 "compress/zstd_compress_sequences.c",
29 "compress/zstd_compress.c",
30 "compress/zstd_double_fast.c",
31 "compress/zstd_fast.c",
32 "compress/zstd_lazy.c",
33 "compress/zstd_ldm.c",
34 "compress/zstd_opt.c",
35 "compress/zstdmt_compress.c",
36 "decompress/huf_decompress.c",
37 "decompress/zstd_ddict.c",
38 "decompress/zstd_decompress.c",
39 "decompress/zstd_decompress_block.c",
40 "dictBuilder/cover.c",
41 "dictBuilder/divsufsort.c",
42 "dictBuilder/fastcover.c",
43 "dictBuilder/zdict.c",
44 )
45 ]
43 46
44 zstd_sources_legacy = ['zstd/%s' % p for p in (
45 'deprecated/zbuff_common.c',
46 'deprecated/zbuff_compress.c',
47 'deprecated/zbuff_decompress.c',
48 'legacy/zstd_v01.c',
49 'legacy/zstd_v02.c',
50 'legacy/zstd_v03.c',
51 'legacy/zstd_v04.c',
52 'legacy/zstd_v05.c',
53 'legacy/zstd_v06.c',
54 'legacy/zstd_v07.c'
55 )]
47 zstd_sources_legacy = [
48 "zstd/%s" % p
49 for p in (
50 "deprecated/zbuff_common.c",
51 "deprecated/zbuff_compress.c",
52 "deprecated/zbuff_decompress.c",
53 "legacy/zstd_v01.c",
54 "legacy/zstd_v02.c",
55 "legacy/zstd_v03.c",
56 "legacy/zstd_v04.c",
57 "legacy/zstd_v05.c",
58 "legacy/zstd_v06.c",
59 "legacy/zstd_v07.c",
60 )
61 ]
56 62
57 63 zstd_includes = [
58 'zstd',
59 'zstd/common',
60 'zstd/compress',
61 'zstd/decompress',
62 'zstd/dictBuilder',
64 "zstd",
65 "zstd/common",
66 "zstd/compress",
67 "zstd/decompress",
68 "zstd/dictBuilder",
63 69 ]
64 70
65 71 zstd_includes_legacy = [
66 'zstd/deprecated',
67 'zstd/legacy',
72 "zstd/deprecated",
73 "zstd/legacy",
68 74 ]
69 75
70 76 ext_includes = [
71 'c-ext',
72 'zstd/common',
77 "c-ext",
78 "zstd/common",
73 79 ]
74 80
75 81 ext_sources = [
76 'zstd/common/pool.c',
77 'zstd/common/threading.c',
78 'zstd.c',
79 'c-ext/bufferutil.c',
80 'c-ext/compressiondict.c',
81 'c-ext/compressobj.c',
82 'c-ext/compressor.c',
83 'c-ext/compressoriterator.c',
84 'c-ext/compressionchunker.c',
85 'c-ext/compressionparams.c',
86 'c-ext/compressionreader.c',
87 'c-ext/compressionwriter.c',
88 'c-ext/constants.c',
89 'c-ext/decompressobj.c',
90 'c-ext/decompressor.c',
91 'c-ext/decompressoriterator.c',
92 'c-ext/decompressionreader.c',
93 'c-ext/decompressionwriter.c',
94 'c-ext/frameparams.c',
82 "zstd/common/error_private.c",
83 "zstd/common/pool.c",
84 "zstd/common/threading.c",
85 "zstd/common/zstd_common.c",
86 "zstd.c",
87 "c-ext/bufferutil.c",
88 "c-ext/compressiondict.c",
89 "c-ext/compressobj.c",
90 "c-ext/compressor.c",
91 "c-ext/compressoriterator.c",
92 "c-ext/compressionchunker.c",
93 "c-ext/compressionparams.c",
94 "c-ext/compressionreader.c",
95 "c-ext/compressionwriter.c",
96 "c-ext/constants.c",
97 "c-ext/decompressobj.c",
98 "c-ext/decompressor.c",
99 "c-ext/decompressoriterator.c",
100 "c-ext/decompressionreader.c",
101 "c-ext/decompressionwriter.c",
102 "c-ext/frameparams.c",
95 103 ]
96 104
97 105 zstd_depends = [
98 'c-ext/python-zstandard.h',
106 "c-ext/python-zstandard.h",
99 107 ]
100 108
101 109
102 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
103 warnings_as_errors=False, root=None):
110 def get_c_extension(
111 support_legacy=False,
112 system_zstd=False,
113 name="zstd",
114 warnings_as_errors=False,
115 root=None,
116 ):
104 117 """Obtain a distutils.extension.Extension for the C extension.
105 118
106 119 ``support_legacy`` controls whether to compile in legacy zstd format support.
@@ -125,17 +138,16 def get_c_extension(support_legacy=False
125 138 if not system_zstd:
126 139 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
127 140 if support_legacy:
128 sources.update([os.path.join(actual_root, p)
129 for p in zstd_sources_legacy])
141 sources.update([os.path.join(actual_root, p) for p in zstd_sources_legacy])
130 142 sources = list(sources)
131 143
132 144 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
133 145 if not system_zstd:
134 include_dirs.update([os.path.join(actual_root, d)
135 for d in zstd_includes])
146 include_dirs.update([os.path.join(actual_root, d) for d in zstd_includes])
136 147 if support_legacy:
137 include_dirs.update([os.path.join(actual_root, d)
138 for d in zstd_includes_legacy])
148 include_dirs.update(
149 [os.path.join(actual_root, d) for d in zstd_includes_legacy]
150 )
139 151 include_dirs = list(include_dirs)
140 152
141 153 depends = [os.path.join(actual_root, p) for p in zstd_depends]
@@ -143,41 +155,40 def get_c_extension(support_legacy=False
143 155 compiler = distutils.ccompiler.new_compiler()
144 156
145 157 # Needed for MSVC.
146 if hasattr(compiler, 'initialize'):
158 if hasattr(compiler, "initialize"):
147 159 compiler.initialize()
148 160
149 if compiler.compiler_type == 'unix':
150 compiler_type = 'unix'
151 elif compiler.compiler_type == 'msvc':
152 compiler_type = 'msvc'
153 elif compiler.compiler_type == 'mingw32':
154 compiler_type = 'mingw32'
161 if compiler.compiler_type == "unix":
162 compiler_type = "unix"
163 elif compiler.compiler_type == "msvc":
164 compiler_type = "msvc"
165 elif compiler.compiler_type == "mingw32":
166 compiler_type = "mingw32"
155 167 else:
156 raise Exception('unhandled compiler type: %s' %
157 compiler.compiler_type)
168 raise Exception("unhandled compiler type: %s" % compiler.compiler_type)
158 169
159 extra_args = ['-DZSTD_MULTITHREAD']
170 extra_args = ["-DZSTD_MULTITHREAD"]
160 171
161 172 if not system_zstd:
162 extra_args.append('-DZSTDLIB_VISIBILITY=')
163 extra_args.append('-DZDICTLIB_VISIBILITY=')
164 extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
173 extra_args.append("-DZSTDLIB_VISIBILITY=")
174 extra_args.append("-DZDICTLIB_VISIBILITY=")
175 extra_args.append("-DZSTDERRORLIB_VISIBILITY=")
165 176
166 if compiler_type == 'unix':
167 extra_args.append('-fvisibility=hidden')
177 if compiler_type == "unix":
178 extra_args.append("-fvisibility=hidden")
168 179
169 180 if not system_zstd and support_legacy:
170 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
181 extra_args.append("-DZSTD_LEGACY_SUPPORT=1")
171 182
172 183 if warnings_as_errors:
173 if compiler_type in ('unix', 'mingw32'):
174 extra_args.append('-Werror')
175 elif compiler_type == 'msvc':
176 extra_args.append('/WX')
184 if compiler_type in ("unix", "mingw32"):
185 extra_args.append("-Werror")
186 elif compiler_type == "msvc":
187 extra_args.append("/WX")
177 188 else:
178 189 assert False
179 190
180 libraries = ['zstd'] if system_zstd else []
191 libraries = ["zstd"] if system_zstd else []
181 192
182 193 # Python 3.7 doesn't like absolute paths. So normalize to relative.
183 194 sources = [os.path.relpath(p, root) for p in sources]
@@ -185,8 +196,11 def get_c_extension(support_legacy=False
185 196 depends = [os.path.relpath(p, root) for p in depends]
186 197
187 198 # TODO compile with optimizations.
188 return Extension(name, sources,
189 include_dirs=include_dirs,
190 depends=depends,
191 extra_compile_args=extra_args,
192 libraries=libraries)
199 return Extension(
200 name,
201 sources,
202 include_dirs=include_dirs,
203 depends=depends,
204 extra_compile_args=extra_args,
205 libraries=libraries,
206 )
@@ -3,6 +3,7 import inspect
3 3 import io
4 4 import os
5 5 import types
6 import unittest
6 7
7 8 try:
8 9 import hypothesis
@@ -10,39 +11,46 except ImportError:
10 11 hypothesis = None
11 12
12 13
14 class TestCase(unittest.TestCase):
15 if not getattr(unittest.TestCase, "assertRaisesRegex", False):
16 assertRaisesRegex = unittest.TestCase.assertRaisesRegexp
17
18
13 19 def make_cffi(cls):
14 20 """Decorator to add CFFI versions of each test method."""
15 21
16 22 # The module containing this class definition should
17 23 # `import zstandard as zstd`. Otherwise things may blow up.
18 24 mod = inspect.getmodule(cls)
19 if not hasattr(mod, 'zstd'):
25 if not hasattr(mod, "zstd"):
20 26 raise Exception('test module does not contain "zstd" symbol')
21 27
22 if not hasattr(mod.zstd, 'backend'):
23 raise Exception('zstd symbol does not have "backend" attribute; did '
24 'you `import zstandard as zstd`?')
28 if not hasattr(mod.zstd, "backend"):
29 raise Exception(
30 'zstd symbol does not have "backend" attribute; did '
31 "you `import zstandard as zstd`?"
32 )
25 33
26 34 # If `import zstandard` already chose the cffi backend, there is nothing
27 35 # for us to do: we only add the cffi variation if the default backend
28 36 # is the C extension.
29 if mod.zstd.backend == 'cffi':
37 if mod.zstd.backend == "cffi":
30 38 return cls
31 39
32 40 old_env = dict(os.environ)
33 os.environ['PYTHON_ZSTANDARD_IMPORT_POLICY'] = 'cffi'
41 os.environ["PYTHON_ZSTANDARD_IMPORT_POLICY"] = "cffi"
34 42 try:
35 43 try:
36 mod_info = imp.find_module('zstandard')
37 mod = imp.load_module('zstandard_cffi', *mod_info)
44 mod_info = imp.find_module("zstandard")
45 mod = imp.load_module("zstandard_cffi", *mod_info)
38 46 except ImportError:
39 47 return cls
40 48 finally:
41 49 os.environ.clear()
42 50 os.environ.update(old_env)
43 51
44 if mod.backend != 'cffi':
45 raise Exception('got the zstandard %s backend instead of cffi' % mod.backend)
52 if mod.backend != "cffi":
53 raise Exception("got the zstandard %s backend instead of cffi" % mod.backend)
46 54
47 55 # If CFFI version is available, dynamically construct test methods
48 56 # that use it.
@@ -52,27 +60,31 def make_cffi(cls):
52 60 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
53 61 continue
54 62
55 if not fn.__name__.startswith('test_'):
63 if not fn.__name__.startswith("test_"):
56 64 continue
57 65
58 name = '%s_cffi' % fn.__name__
66 name = "%s_cffi" % fn.__name__
59 67
60 68 # Replace the "zstd" symbol with the CFFI module instance. Then copy
61 69 # the function object and install it in a new attribute.
62 70 if isinstance(fn, types.FunctionType):
63 71 globs = dict(fn.__globals__)
64 globs['zstd'] = mod
65 new_fn = types.FunctionType(fn.__code__, globs, name,
66 fn.__defaults__, fn.__closure__)
72 globs["zstd"] = mod
73 new_fn = types.FunctionType(
74 fn.__code__, globs, name, fn.__defaults__, fn.__closure__
75 )
67 76 new_method = new_fn
68 77 else:
69 78 globs = dict(fn.__func__.func_globals)
70 globs['zstd'] = mod
71 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
72 fn.__func__.func_defaults,
73 fn.__func__.func_closure)
74 new_method = types.UnboundMethodType(new_fn, fn.im_self,
75 fn.im_class)
79 globs["zstd"] = mod
80 new_fn = types.FunctionType(
81 fn.__func__.func_code,
82 globs,
83 name,
84 fn.__func__.func_defaults,
85 fn.__func__.func_closure,
86 )
87 new_method = types.UnboundMethodType(new_fn, fn.im_self, fn.im_class)
76 88
77 89 setattr(cls, name, new_method)
78 90
@@ -84,6 +96,7 class NonClosingBytesIO(io.BytesIO):
84 96
85 97 This allows us to access written data after close().
86 98 """
99
87 100 def __init__(self, *args, **kwargs):
88 101 super(NonClosingBytesIO, self).__init__(*args, **kwargs)
89 102 self._saved_buffer = None
@@ -135,7 +148,7 def random_input_data():
135 148 dirs[:] = list(sorted(dirs))
136 149 for f in sorted(files):
137 150 try:
138 with open(os.path.join(root, f), 'rb') as fh:
151 with open(os.path.join(root, f), "rb") as fh:
139 152 data = fh.read()
140 153 if data:
141 154 _source_files.append(data)
@@ -154,11 +167,11 def random_input_data():
154 167
155 168 def generate_samples():
156 169 inputs = [
157 b'foo',
158 b'bar',
159 b'abcdef',
160 b'sometext',
161 b'baz',
170 b"foo",
171 b"bar",
172 b"abcdef",
173 b"sometext",
174 b"baz",
162 175 ]
163 176
164 177 samples = []
@@ -173,13 +186,12 def generate_samples():
173 186
174 187 if hypothesis:
175 188 default_settings = hypothesis.settings(deadline=10000)
176 hypothesis.settings.register_profile('default', default_settings)
189 hypothesis.settings.register_profile("default", default_settings)
177 190
178 191 ci_settings = hypothesis.settings(deadline=20000, max_examples=1000)
179 hypothesis.settings.register_profile('ci', ci_settings)
192 hypothesis.settings.register_profile("ci", ci_settings)
180 193
181 194 expensive_settings = hypothesis.settings(deadline=None, max_examples=10000)
182 hypothesis.settings.register_profile('expensive', expensive_settings)
195 hypothesis.settings.register_profile("expensive", expensive_settings)
183 196
184 hypothesis.settings.load_profile(
185 os.environ.get('HYPOTHESIS_PROFILE', 'default'))
197 hypothesis.settings.load_profile(os.environ.get("HYPOTHESIS_PROFILE", "default"))
@@ -3,104 +3,114 import unittest
3 3
4 4 import zstandard as zstd
5 5
6 ss = struct.Struct('=QQ')
6 from .common import TestCase
7
8 ss = struct.Struct("=QQ")
7 9
8 10
9 class TestBufferWithSegments(unittest.TestCase):
11 class TestBufferWithSegments(TestCase):
10 12 def test_arguments(self):
11 if not hasattr(zstd, 'BufferWithSegments'):
12 self.skipTest('BufferWithSegments not available')
13 if not hasattr(zstd, "BufferWithSegments"):
14 self.skipTest("BufferWithSegments not available")
13 15
14 16 with self.assertRaises(TypeError):
15 17 zstd.BufferWithSegments()
16 18
17 19 with self.assertRaises(TypeError):
18 zstd.BufferWithSegments(b'foo')
20 zstd.BufferWithSegments(b"foo")
19 21
20 22 # Segments data should be a multiple of 16.
21 with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
22 zstd.BufferWithSegments(b'foo', b'\x00\x00')
23 with self.assertRaisesRegex(
24 ValueError, "segments array size is not a multiple of 16"
25 ):
26 zstd.BufferWithSegments(b"foo", b"\x00\x00")
23 27
24 28 def test_invalid_offset(self):
25 if not hasattr(zstd, 'BufferWithSegments'):
26 self.skipTest('BufferWithSegments not available')
29 if not hasattr(zstd, "BufferWithSegments"):
30 self.skipTest("BufferWithSegments not available")
27 31
28 with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
29 zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
32 with self.assertRaisesRegex(
33 ValueError, "offset within segments array references memory"
34 ):
35 zstd.BufferWithSegments(b"foo", ss.pack(0, 4))
30 36
31 37 def test_invalid_getitem(self):
32 if not hasattr(zstd, 'BufferWithSegments'):
33 self.skipTest('BufferWithSegments not available')
38 if not hasattr(zstd, "BufferWithSegments"):
39 self.skipTest("BufferWithSegments not available")
34 40
35 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
41 b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
36 42
37 with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
43 with self.assertRaisesRegex(IndexError, "offset must be non-negative"):
38 44 test = b[-10]
39 45
40 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
46 with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
41 47 test = b[1]
42 48
43 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
49 with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
44 50 test = b[2]
45 51
46 52 def test_single(self):
47 if not hasattr(zstd, 'BufferWithSegments'):
48 self.skipTest('BufferWithSegments not available')
53 if not hasattr(zstd, "BufferWithSegments"):
54 self.skipTest("BufferWithSegments not available")
49 55
50 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
56 b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
51 57 self.assertEqual(len(b), 1)
52 58 self.assertEqual(b.size, 3)
53 self.assertEqual(b.tobytes(), b'foo')
59 self.assertEqual(b.tobytes(), b"foo")
54 60
55 61 self.assertEqual(len(b[0]), 3)
56 62 self.assertEqual(b[0].offset, 0)
57 self.assertEqual(b[0].tobytes(), b'foo')
63 self.assertEqual(b[0].tobytes(), b"foo")
58 64
59 65 def test_multiple(self):
60 if not hasattr(zstd, 'BufferWithSegments'):
61 self.skipTest('BufferWithSegments not available')
66 if not hasattr(zstd, "BufferWithSegments"):
67 self.skipTest("BufferWithSegments not available")
62 68
63 b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
64 ss.pack(3, 4),
65 ss.pack(7, 5)]))
69 b = zstd.BufferWithSegments(
70 b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)])
71 )
66 72 self.assertEqual(len(b), 3)
67 73 self.assertEqual(b.size, 12)
68 self.assertEqual(b.tobytes(), b'foofooxfooxy')
74 self.assertEqual(b.tobytes(), b"foofooxfooxy")
69 75
70 self.assertEqual(b[0].tobytes(), b'foo')
71 self.assertEqual(b[1].tobytes(), b'foox')
72 self.assertEqual(b[2].tobytes(), b'fooxy')
76 self.assertEqual(b[0].tobytes(), b"foo")
77 self.assertEqual(b[1].tobytes(), b"foox")
78 self.assertEqual(b[2].tobytes(), b"fooxy")
73 79
74 80
75 class TestBufferWithSegmentsCollection(unittest.TestCase):
81 class TestBufferWithSegmentsCollection(TestCase):
76 82 def test_empty_constructor(self):
77 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
78 self.skipTest('BufferWithSegmentsCollection not available')
83 if not hasattr(zstd, "BufferWithSegmentsCollection"):
84 self.skipTest("BufferWithSegmentsCollection not available")
79 85
80 with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
86 with self.assertRaisesRegex(ValueError, "must pass at least 1 argument"):
81 87 zstd.BufferWithSegmentsCollection()
82 88
83 89 def test_argument_validation(self):
84 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
85 self.skipTest('BufferWithSegmentsCollection not available')
90 if not hasattr(zstd, "BufferWithSegmentsCollection"):
91 self.skipTest("BufferWithSegmentsCollection not available")
86 92
87 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
93 with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"):
88 94 zstd.BufferWithSegmentsCollection(None)
89 95
90 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
91 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
92 None)
96 with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"):
97 zstd.BufferWithSegmentsCollection(
98 zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None
99 )
93 100
94 with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
95 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
101 with self.assertRaisesRegex(
102 ValueError, "ZstdBufferWithSegments cannot be empty"
103 ):
104 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b"", b""))
96 105
97 106 def test_length(self):
98 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
99 self.skipTest('BufferWithSegmentsCollection not available')
107 if not hasattr(zstd, "BufferWithSegmentsCollection"):
108 self.skipTest("BufferWithSegmentsCollection not available")
100 109
101 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
102 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
103 ss.pack(3, 3)]))
110 b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
111 b2 = zstd.BufferWithSegments(
112 b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])
113 )
104 114
105 115 c = zstd.BufferWithSegmentsCollection(b1)
106 116 self.assertEqual(len(c), 1)
@@ -115,21 +125,22 class TestBufferWithSegmentsCollection(u
115 125 self.assertEqual(c.size(), 9)
116 126
117 127 def test_getitem(self):
118 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
119 self.skipTest('BufferWithSegmentsCollection not available')
128 if not hasattr(zstd, "BufferWithSegmentsCollection"):
129 self.skipTest("BufferWithSegmentsCollection not available")
120 130
121 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
122 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
123 ss.pack(3, 3)]))
131 b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
132 b2 = zstd.BufferWithSegments(
133 b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])
134 )
124 135
125 136 c = zstd.BufferWithSegmentsCollection(b1, b2)
126 137
127 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
138 with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
128 139 c[3]
129 140
130 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
141 with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
131 142 c[4]
132 143
133 self.assertEqual(c[0].tobytes(), b'foo')
134 self.assertEqual(c[1].tobytes(), b'bar')
135 self.assertEqual(c[2].tobytes(), b'baz')
144 self.assertEqual(c[0].tobytes(), b"foo")
145 self.assertEqual(c[1].tobytes(), b"bar")
146 self.assertEqual(c[2].tobytes(), b"baz")
This diff has been collapsed as it changes many lines, (811 lines changed) Show them Hide them
@@ -13,6 +13,7 from .common import (
13 13 make_cffi,
14 14 NonClosingBytesIO,
15 15 OpCountingBytesIO,
16 TestCase,
16 17 )
17 18
18 19
@@ -23,14 +24,13 else:
23 24
24 25
25 26 def multithreaded_chunk_size(level, source_size=0):
26 params = zstd.ZstdCompressionParameters.from_level(level,
27 source_size=source_size)
27 params = zstd.ZstdCompressionParameters.from_level(level, source_size=source_size)
28 28
29 29 return 1 << (params.window_log + 2)
30 30
31 31
32 32 @make_cffi
33 class TestCompressor(unittest.TestCase):
33 class TestCompressor(TestCase):
34 34 def test_level_bounds(self):
35 35 with self.assertRaises(ValueError):
36 36 zstd.ZstdCompressor(level=23)
@@ -41,11 +41,11 class TestCompressor(unittest.TestCase):
41 41
42 42
43 43 @make_cffi
44 class TestCompressor_compress(unittest.TestCase):
44 class TestCompressor_compress(TestCase):
45 45 def test_compress_empty(self):
46 46 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
47 result = cctx.compress(b'')
48 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
47 result = cctx.compress(b"")
48 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
49 49 params = zstd.get_frame_parameters(result)
50 50 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
51 51 self.assertEqual(params.window_size, 524288)
@@ -53,21 +53,21 class TestCompressor_compress(unittest.T
53 53 self.assertFalse(params.has_checksum, 0)
54 54
55 55 cctx = zstd.ZstdCompressor()
56 result = cctx.compress(b'')
57 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
56 result = cctx.compress(b"")
57 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00")
58 58 params = zstd.get_frame_parameters(result)
59 59 self.assertEqual(params.content_size, 0)
60 60
61 61 def test_input_types(self):
62 62 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
63 expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
63 expected = b"\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f"
64 64
65 65 mutable_array = bytearray(3)
66 mutable_array[:] = b'foo'
66 mutable_array[:] = b"foo"
67 67
68 68 sources = [
69 memoryview(b'foo'),
70 bytearray(b'foo'),
69 memoryview(b"foo"),
70 bytearray(b"foo"),
71 71 mutable_array,
72 72 ]
73 73
@@ -77,43 +77,46 class TestCompressor_compress(unittest.T
77 77 def test_compress_large(self):
78 78 chunks = []
79 79 for i in range(255):
80 chunks.append(struct.Struct('>B').pack(i) * 16384)
80 chunks.append(struct.Struct(">B").pack(i) * 16384)
81 81
82 82 cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
83 result = cctx.compress(b''.join(chunks))
83 result = cctx.compress(b"".join(chunks))
84 84 self.assertEqual(len(result), 999)
85 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
85 self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd")
86 86
87 87 # This matches the test for read_to_iter() below.
88 88 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
89 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
90 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
91 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
92 b'\x02\x09\x00\x00\x6f')
89 result = cctx.compress(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b"o")
90 self.assertEqual(
91 result,
92 b"\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00"
93 b"\x10\x66\x66\x01\x00\xfb\xff\x39\xc0"
94 b"\x02\x09\x00\x00\x6f",
95 )
93 96
94 97 def test_negative_level(self):
95 98 cctx = zstd.ZstdCompressor(level=-4)
96 result = cctx.compress(b'foo' * 256)
99 result = cctx.compress(b"foo" * 256)
97 100
98 101 def test_no_magic(self):
99 params = zstd.ZstdCompressionParameters.from_level(
100 1, format=zstd.FORMAT_ZSTD1)
102 params = zstd.ZstdCompressionParameters.from_level(1, format=zstd.FORMAT_ZSTD1)
101 103 cctx = zstd.ZstdCompressor(compression_params=params)
102 magic = cctx.compress(b'foobar')
104 magic = cctx.compress(b"foobar")
103 105
104 106 params = zstd.ZstdCompressionParameters.from_level(
105 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
107 1, format=zstd.FORMAT_ZSTD1_MAGICLESS
108 )
106 109 cctx = zstd.ZstdCompressor(compression_params=params)
107 no_magic = cctx.compress(b'foobar')
110 no_magic = cctx.compress(b"foobar")
108 111
109 self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
112 self.assertEqual(magic[0:4], b"\x28\xb5\x2f\xfd")
110 113 self.assertEqual(magic[4:], no_magic)
111 114
112 115 def test_write_checksum(self):
113 116 cctx = zstd.ZstdCompressor(level=1)
114 no_checksum = cctx.compress(b'foobar')
117 no_checksum = cctx.compress(b"foobar")
115 118 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
116 with_checksum = cctx.compress(b'foobar')
119 with_checksum = cctx.compress(b"foobar")
117 120
118 121 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
119 122
@@ -125,9 +128,9 class TestCompressor_compress(unittest.T
125 128
126 129 def test_write_content_size(self):
127 130 cctx = zstd.ZstdCompressor(level=1)
128 with_size = cctx.compress(b'foobar' * 256)
131 with_size = cctx.compress(b"foobar" * 256)
129 132 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
130 no_size = cctx.compress(b'foobar' * 256)
133 no_size = cctx.compress(b"foobar" * 256)
131 134
132 135 self.assertEqual(len(with_size), len(no_size) + 1)
133 136
@@ -139,17 +142,17 class TestCompressor_compress(unittest.T
139 142 def test_no_dict_id(self):
140 143 samples = []
141 144 for i in range(128):
142 samples.append(b'foo' * 64)
143 samples.append(b'bar' * 64)
144 samples.append(b'foobar' * 64)
145 samples.append(b"foo" * 64)
146 samples.append(b"bar" * 64)
147 samples.append(b"foobar" * 64)
145 148
146 149 d = zstd.train_dictionary(1024, samples)
147 150
148 151 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
149 with_dict_id = cctx.compress(b'foobarfoobar')
152 with_dict_id = cctx.compress(b"foobarfoobar")
150 153
151 154 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
152 no_dict_id = cctx.compress(b'foobarfoobar')
155 no_dict_id = cctx.compress(b"foobarfoobar")
153 156
154 157 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
155 158
@@ -161,23 +164,23 class TestCompressor_compress(unittest.T
161 164 def test_compress_dict_multiple(self):
162 165 samples = []
163 166 for i in range(128):
164 samples.append(b'foo' * 64)
165 samples.append(b'bar' * 64)
166 samples.append(b'foobar' * 64)
167 samples.append(b"foo" * 64)
168 samples.append(b"bar" * 64)
169 samples.append(b"foobar" * 64)
167 170
168 171 d = zstd.train_dictionary(8192, samples)
169 172
170 173 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
171 174
172 175 for i in range(32):
173 cctx.compress(b'foo bar foobar foo bar foobar')
176 cctx.compress(b"foo bar foobar foo bar foobar")
174 177
175 178 def test_dict_precompute(self):
176 179 samples = []
177 180 for i in range(128):
178 samples.append(b'foo' * 64)
179 samples.append(b'bar' * 64)
180 samples.append(b'foobar' * 64)
181 samples.append(b"foo" * 64)
182 samples.append(b"bar" * 64)
183 samples.append(b"foobar" * 64)
181 184
182 185 d = zstd.train_dictionary(8192, samples)
183 186 d.precompute_compress(level=1)
@@ -185,11 +188,11 class TestCompressor_compress(unittest.T
185 188 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
186 189
187 190 for i in range(32):
188 cctx.compress(b'foo bar foobar foo bar foobar')
191 cctx.compress(b"foo bar foobar foo bar foobar")
189 192
190 193 def test_multithreaded(self):
191 194 chunk_size = multithreaded_chunk_size(1)
192 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
195 source = b"".join([b"x" * chunk_size, b"y" * chunk_size])
193 196
194 197 cctx = zstd.ZstdCompressor(level=1, threads=2)
195 198 compressed = cctx.compress(source)
@@ -205,73 +208,72 class TestCompressor_compress(unittest.T
205 208 def test_multithreaded_dict(self):
206 209 samples = []
207 210 for i in range(128):
208 samples.append(b'foo' * 64)
209 samples.append(b'bar' * 64)
210 samples.append(b'foobar' * 64)
211 samples.append(b"foo" * 64)
212 samples.append(b"bar" * 64)
213 samples.append(b"foobar" * 64)
211 214
212 215 d = zstd.train_dictionary(1024, samples)
213 216
214 217 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
215 218
216 result = cctx.compress(b'foo')
217 params = zstd.get_frame_parameters(result);
218 self.assertEqual(params.content_size, 3);
219 result = cctx.compress(b"foo")
220 params = zstd.get_frame_parameters(result)
221 self.assertEqual(params.content_size, 3)
219 222 self.assertEqual(params.dict_id, d.dict_id())
220 223
221 self.assertEqual(result,
222 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
223 b'\x66\x6f\x6f')
224 self.assertEqual(
225 result,
226 b"\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00" b"\x66\x6f\x6f",
227 )
224 228
225 229 def test_multithreaded_compression_params(self):
226 230 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
227 231 cctx = zstd.ZstdCompressor(compression_params=params)
228 232
229 result = cctx.compress(b'foo')
230 params = zstd.get_frame_parameters(result);
231 self.assertEqual(params.content_size, 3);
233 result = cctx.compress(b"foo")
234 params = zstd.get_frame_parameters(result)
235 self.assertEqual(params.content_size, 3)
232 236
233 self.assertEqual(result,
234 b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
237 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f")
235 238
236 239
237 240 @make_cffi
238 class TestCompressor_compressobj(unittest.TestCase):
241 class TestCompressor_compressobj(TestCase):
239 242 def test_compressobj_empty(self):
240 243 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
241 244 cobj = cctx.compressobj()
242 self.assertEqual(cobj.compress(b''), b'')
243 self.assertEqual(cobj.flush(),
244 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
245 self.assertEqual(cobj.compress(b""), b"")
246 self.assertEqual(cobj.flush(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
245 247
246 248 def test_input_types(self):
247 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
249 expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f"
248 250 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
249 251
250 252 mutable_array = bytearray(3)
251 mutable_array[:] = b'foo'
253 mutable_array[:] = b"foo"
252 254
253 255 sources = [
254 memoryview(b'foo'),
255 bytearray(b'foo'),
256 memoryview(b"foo"),
257 bytearray(b"foo"),
256 258 mutable_array,
257 259 ]
258 260
259 261 for source in sources:
260 262 cobj = cctx.compressobj()
261 self.assertEqual(cobj.compress(source), b'')
263 self.assertEqual(cobj.compress(source), b"")
262 264 self.assertEqual(cobj.flush(), expected)
263 265
264 266 def test_compressobj_large(self):
265 267 chunks = []
266 268 for i in range(255):
267 chunks.append(struct.Struct('>B').pack(i) * 16384)
269 chunks.append(struct.Struct(">B").pack(i) * 16384)
268 270
269 271 cctx = zstd.ZstdCompressor(level=3)
270 272 cobj = cctx.compressobj()
271 273
272 result = cobj.compress(b''.join(chunks)) + cobj.flush()
274 result = cobj.compress(b"".join(chunks)) + cobj.flush()
273 275 self.assertEqual(len(result), 999)
274 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
276 self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd")
275 277
276 278 params = zstd.get_frame_parameters(result)
277 279 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
@@ -282,10 +284,10 class TestCompressor_compressobj(unittes
282 284 def test_write_checksum(self):
283 285 cctx = zstd.ZstdCompressor(level=1)
284 286 cobj = cctx.compressobj()
285 no_checksum = cobj.compress(b'foobar') + cobj.flush()
287 no_checksum = cobj.compress(b"foobar") + cobj.flush()
286 288 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
287 289 cobj = cctx.compressobj()
288 with_checksum = cobj.compress(b'foobar') + cobj.flush()
290 with_checksum = cobj.compress(b"foobar") + cobj.flush()
289 291
290 292 no_params = zstd.get_frame_parameters(no_checksum)
291 293 with_params = zstd.get_frame_parameters(with_checksum)
@@ -300,11 +302,11 class TestCompressor_compressobj(unittes
300 302
301 303 def test_write_content_size(self):
302 304 cctx = zstd.ZstdCompressor(level=1)
303 cobj = cctx.compressobj(size=len(b'foobar' * 256))
304 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
305 cobj = cctx.compressobj(size=len(b"foobar" * 256))
306 with_size = cobj.compress(b"foobar" * 256) + cobj.flush()
305 307 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
306 cobj = cctx.compressobj(size=len(b'foobar' * 256))
307 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
308 cobj = cctx.compressobj(size=len(b"foobar" * 256))
309 no_size = cobj.compress(b"foobar" * 256) + cobj.flush()
308 310
309 311 no_params = zstd.get_frame_parameters(no_size)
310 312 with_params = zstd.get_frame_parameters(with_size)
@@ -321,48 +323,53 class TestCompressor_compressobj(unittes
321 323 cctx = zstd.ZstdCompressor()
322 324 cobj = cctx.compressobj()
323 325
324 cobj.compress(b'foo')
326 cobj.compress(b"foo")
325 327 cobj.flush()
326 328
327 with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'):
328 cobj.compress(b'foo')
329 with self.assertRaisesRegex(
330 zstd.ZstdError, r"cannot call compress\(\) after compressor"
331 ):
332 cobj.compress(b"foo")
329 333
330 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
334 with self.assertRaisesRegex(
335 zstd.ZstdError, "compressor object already finished"
336 ):
331 337 cobj.flush()
332 338
333 339 def test_flush_block_repeated(self):
334 340 cctx = zstd.ZstdCompressor(level=1)
335 341 cobj = cctx.compressobj()
336 342
337 self.assertEqual(cobj.compress(b'foo'), b'')
338 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
339 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
340 self.assertEqual(cobj.compress(b'bar'), b'')
343 self.assertEqual(cobj.compress(b"foo"), b"")
344 self.assertEqual(
345 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
346 b"\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo",
347 )
348 self.assertEqual(cobj.compress(b"bar"), b"")
341 349 # 3 byte header plus content.
342 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
343 b'\x18\x00\x00bar')
344 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
350 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"\x18\x00\x00bar")
351 self.assertEqual(cobj.flush(), b"\x01\x00\x00")
345 352
346 353 def test_flush_empty_block(self):
347 354 cctx = zstd.ZstdCompressor(write_checksum=True)
348 355 cobj = cctx.compressobj()
349 356
350 cobj.compress(b'foobar')
357 cobj.compress(b"foobar")
351 358 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
352 359 # No-op if no block is active (this is internal to zstd).
353 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
360 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"")
354 361
355 362 trailing = cobj.flush()
356 363 # 3 bytes block header + 4 bytes frame checksum
357 364 self.assertEqual(len(trailing), 7)
358 365 header = trailing[0:3]
359 self.assertEqual(header, b'\x01\x00\x00')
366 self.assertEqual(header, b"\x01\x00\x00")
360 367
361 368 def test_multithreaded(self):
362 369 source = io.BytesIO()
363 source.write(b'a' * 1048576)
364 source.write(b'b' * 1048576)
365 source.write(b'c' * 1048576)
370 source.write(b"a" * 1048576)
371 source.write(b"b" * 1048576)
372 source.write(b"c" * 1048576)
366 373 source.seek(0)
367 374
368 375 cctx = zstd.ZstdCompressor(level=1, threads=2)
@@ -378,9 +385,9 class TestCompressor_compressobj(unittes
378 385
379 386 chunks.append(cobj.flush())
380 387
381 compressed = b''.join(chunks)
388 compressed = b"".join(chunks)
382 389
383 self.assertEqual(len(compressed), 295)
390 self.assertEqual(len(compressed), 119)
384 391
385 392 def test_frame_progression(self):
386 393 cctx = zstd.ZstdCompressor()
@@ -389,7 +396,7 class TestCompressor_compressobj(unittes
389 396
390 397 cobj = cctx.compressobj()
391 398
392 cobj.compress(b'foobar')
399 cobj.compress(b"foobar")
393 400 self.assertEqual(cctx.frame_progression(), (6, 0, 0))
394 401
395 402 cobj.flush()
@@ -399,20 +406,20 class TestCompressor_compressobj(unittes
399 406 cctx = zstd.ZstdCompressor()
400 407
401 408 cobj = cctx.compressobj(size=2)
402 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
403 cobj.compress(b'foo')
409 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
410 cobj.compress(b"foo")
404 411
405 412 # Try another operation on this instance.
406 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
407 cobj.compress(b'aa')
413 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
414 cobj.compress(b"aa")
408 415
409 416 # Try another operation on the compressor.
410 417 cctx.compressobj(size=4)
411 cctx.compress(b'foobar')
418 cctx.compress(b"foobar")
412 419
413 420
414 421 @make_cffi
415 class TestCompressor_copy_stream(unittest.TestCase):
422 class TestCompressor_copy_stream(TestCase):
416 423 def test_no_read(self):
417 424 source = object()
418 425 dest = io.BytesIO()
@@ -438,13 +445,12 class TestCompressor_copy_stream(unittes
438 445 self.assertEqual(int(r), 0)
439 446 self.assertEqual(w, 9)
440 447
441 self.assertEqual(dest.getvalue(),
442 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
448 self.assertEqual(dest.getvalue(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
443 449
444 450 def test_large_data(self):
445 451 source = io.BytesIO()
446 452 for i in range(255):
447 source.write(struct.Struct('>B').pack(i) * 16384)
453 source.write(struct.Struct(">B").pack(i) * 16384)
448 454 source.seek(0)
449 455
450 456 dest = io.BytesIO()
@@ -461,7 +467,7 class TestCompressor_copy_stream(unittes
461 467 self.assertFalse(params.has_checksum)
462 468
463 469 def test_write_checksum(self):
464 source = io.BytesIO(b'foobar')
470 source = io.BytesIO(b"foobar")
465 471 no_checksum = io.BytesIO()
466 472
467 473 cctx = zstd.ZstdCompressor(level=1)
@@ -472,8 +478,7 class TestCompressor_copy_stream(unittes
472 478 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
473 479 cctx.copy_stream(source, with_checksum)
474 480
475 self.assertEqual(len(with_checksum.getvalue()),
476 len(no_checksum.getvalue()) + 4)
481 self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
477 482
478 483 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
479 484 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
@@ -485,7 +490,7 class TestCompressor_copy_stream(unittes
485 490 self.assertTrue(with_params.has_checksum)
486 491
487 492 def test_write_content_size(self):
488 source = io.BytesIO(b'foobar' * 256)
493 source = io.BytesIO(b"foobar" * 256)
489 494 no_size = io.BytesIO()
490 495
491 496 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
@@ -497,16 +502,14 class TestCompressor_copy_stream(unittes
497 502 cctx.copy_stream(source, with_size)
498 503
499 504 # Source content size is unknown, so no content size written.
500 self.assertEqual(len(with_size.getvalue()),
501 len(no_size.getvalue()))
505 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))
502 506
503 507 source.seek(0)
504 508 with_size = io.BytesIO()
505 509 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
506 510
507 511 # We specified source size, so content size header is present.
508 self.assertEqual(len(with_size.getvalue()),
509 len(no_size.getvalue()) + 1)
512 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1)
510 513
511 514 no_params = zstd.get_frame_parameters(no_size.getvalue())
512 515 with_params = zstd.get_frame_parameters(with_size.getvalue())
@@ -518,7 +521,7 class TestCompressor_copy_stream(unittes
518 521 self.assertFalse(with_params.has_checksum)
519 522
520 523 def test_read_write_size(self):
521 source = OpCountingBytesIO(b'foobarfoobar')
524 source = OpCountingBytesIO(b"foobarfoobar")
522 525 dest = OpCountingBytesIO()
523 526 cctx = zstd.ZstdCompressor()
524 527 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
@@ -530,16 +533,16 class TestCompressor_copy_stream(unittes
530 533
531 534 def test_multithreaded(self):
532 535 source = io.BytesIO()
533 source.write(b'a' * 1048576)
534 source.write(b'b' * 1048576)
535 source.write(b'c' * 1048576)
536 source.write(b"a" * 1048576)
537 source.write(b"b" * 1048576)
538 source.write(b"c" * 1048576)
536 539 source.seek(0)
537 540
538 541 dest = io.BytesIO()
539 542 cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
540 543 r, w = cctx.copy_stream(source, dest)
541 544 self.assertEqual(r, 3145728)
542 self.assertEqual(w, 295)
545 self.assertEqual(w, 111)
543 546
544 547 params = zstd.get_frame_parameters(dest.getvalue())
545 548 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
@@ -559,15 +562,15 class TestCompressor_copy_stream(unittes
559 562
560 563 def test_bad_size(self):
561 564 source = io.BytesIO()
562 source.write(b'a' * 32768)
563 source.write(b'b' * 32768)
565 source.write(b"a" * 32768)
566 source.write(b"b" * 32768)
564 567 source.seek(0)
565 568
566 569 dest = io.BytesIO()
567 570
568 571 cctx = zstd.ZstdCompressor()
569 572
570 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
573 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
571 574 cctx.copy_stream(source, dest, size=42)
572 575
573 576 # Try another operation on this compressor.
@@ -577,31 +580,31 class TestCompressor_copy_stream(unittes
577 580
578 581
579 582 @make_cffi
580 class TestCompressor_stream_reader(unittest.TestCase):
583 class TestCompressor_stream_reader(TestCase):
581 584 def test_context_manager(self):
582 585 cctx = zstd.ZstdCompressor()
583 586
584 with cctx.stream_reader(b'foo') as reader:
585 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
587 with cctx.stream_reader(b"foo") as reader:
588 with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"):
586 589 with reader as reader2:
587 590 pass
588 591
589 592 def test_no_context_manager(self):
590 593 cctx = zstd.ZstdCompressor()
591 594
592 reader = cctx.stream_reader(b'foo')
595 reader = cctx.stream_reader(b"foo")
593 596 reader.read(4)
594 597 self.assertFalse(reader.closed)
595 598
596 599 reader.close()
597 600 self.assertTrue(reader.closed)
598 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
601 with self.assertRaisesRegex(ValueError, "stream is closed"):
599 602 reader.read(1)
600 603
601 604 def test_not_implemented(self):
602 605 cctx = zstd.ZstdCompressor()
603 606
604 with cctx.stream_reader(b'foo' * 60) as reader:
607 with cctx.stream_reader(b"foo" * 60) as reader:
605 608 with self.assertRaises(io.UnsupportedOperation):
606 609 reader.readline()
607 610
@@ -618,12 +621,12 class TestCompressor_stream_reader(unitt
618 621 reader.writelines([])
619 622
620 623 with self.assertRaises(OSError):
621 reader.write(b'foo')
624 reader.write(b"foo")
622 625
623 626 def test_constant_methods(self):
624 627 cctx = zstd.ZstdCompressor()
625 628
626 with cctx.stream_reader(b'boo') as reader:
629 with cctx.stream_reader(b"boo") as reader:
627 630 self.assertTrue(reader.readable())
628 631 self.assertFalse(reader.writable())
629 632 self.assertFalse(reader.seekable())
@@ -637,27 +640,29 class TestCompressor_stream_reader(unitt
637 640 def test_read_closed(self):
638 641 cctx = zstd.ZstdCompressor()
639 642
640 with cctx.stream_reader(b'foo' * 60) as reader:
643 with cctx.stream_reader(b"foo" * 60) as reader:
641 644 reader.close()
642 645 self.assertTrue(reader.closed)
643 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
646 with self.assertRaisesRegex(ValueError, "stream is closed"):
644 647 reader.read(10)
645 648
646 649 def test_read_sizes(self):
647 650 cctx = zstd.ZstdCompressor()
648 foo = cctx.compress(b'foo')
651 foo = cctx.compress(b"foo")
649 652
650 with cctx.stream_reader(b'foo') as reader:
651 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
653 with cctx.stream_reader(b"foo") as reader:
654 with self.assertRaisesRegex(
655 ValueError, "cannot read negative amounts less than -1"
656 ):
652 657 reader.read(-2)
653 658
654 self.assertEqual(reader.read(0), b'')
659 self.assertEqual(reader.read(0), b"")
655 660 self.assertEqual(reader.read(), foo)
656 661
657 662 def test_read_buffer(self):
658 663 cctx = zstd.ZstdCompressor()
659 664
660 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
665 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
661 666 frame = cctx.compress(source)
662 667
663 668 with cctx.stream_reader(source) as reader:
@@ -667,13 +672,13 class TestCompressor_stream_reader(unitt
667 672 result = reader.read(8192)
668 673 self.assertEqual(result, frame)
669 674 self.assertEqual(reader.tell(), len(result))
670 self.assertEqual(reader.read(), b'')
675 self.assertEqual(reader.read(), b"")
671 676 self.assertEqual(reader.tell(), len(result))
672 677
673 678 def test_read_buffer_small_chunks(self):
674 679 cctx = zstd.ZstdCompressor()
675 680
676 source = b'foo' * 60
681 source = b"foo" * 60
677 682 chunks = []
678 683
679 684 with cctx.stream_reader(source) as reader:
@@ -687,12 +692,12 class TestCompressor_stream_reader(unitt
687 692 chunks.append(chunk)
688 693 self.assertEqual(reader.tell(), sum(map(len, chunks)))
689 694
690 self.assertEqual(b''.join(chunks), cctx.compress(source))
695 self.assertEqual(b"".join(chunks), cctx.compress(source))
691 696
692 697 def test_read_stream(self):
693 698 cctx = zstd.ZstdCompressor()
694 699
695 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
700 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
696 701 frame = cctx.compress(source)
697 702
698 703 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
@@ -701,13 +706,13 class TestCompressor_stream_reader(unitt
701 706 chunk = reader.read(8192)
702 707 self.assertEqual(chunk, frame)
703 708 self.assertEqual(reader.tell(), len(chunk))
704 self.assertEqual(reader.read(), b'')
709 self.assertEqual(reader.read(), b"")
705 710 self.assertEqual(reader.tell(), len(chunk))
706 711
707 712 def test_read_stream_small_chunks(self):
708 713 cctx = zstd.ZstdCompressor()
709 714
710 source = b'foo' * 60
715 source = b"foo" * 60
711 716 chunks = []
712 717
713 718 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
@@ -721,25 +726,25 class TestCompressor_stream_reader(unitt
721 726 chunks.append(chunk)
722 727 self.assertEqual(reader.tell(), sum(map(len, chunks)))
723 728
724 self.assertEqual(b''.join(chunks), cctx.compress(source))
729 self.assertEqual(b"".join(chunks), cctx.compress(source))
725 730
726 731 def test_read_after_exit(self):
727 732 cctx = zstd.ZstdCompressor()
728 733
729 with cctx.stream_reader(b'foo' * 60) as reader:
734 with cctx.stream_reader(b"foo" * 60) as reader:
730 735 while reader.read(8192):
731 736 pass
732 737
733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
738 with self.assertRaisesRegex(ValueError, "stream is closed"):
734 739 reader.read(10)
735 740
736 741 def test_bad_size(self):
737 742 cctx = zstd.ZstdCompressor()
738 743
739 source = io.BytesIO(b'foobar')
744 source = io.BytesIO(b"foobar")
740 745
741 746 with cctx.stream_reader(source, size=2) as reader:
742 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
747 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
743 748 reader.read(10)
744 749
745 750 # Try another compression operation.
@@ -748,36 +753,36 class TestCompressor_stream_reader(unitt
748 753
749 754 def test_readall(self):
750 755 cctx = zstd.ZstdCompressor()
751 frame = cctx.compress(b'foo' * 1024)
756 frame = cctx.compress(b"foo" * 1024)
752 757
753 reader = cctx.stream_reader(b'foo' * 1024)
758 reader = cctx.stream_reader(b"foo" * 1024)
754 759 self.assertEqual(reader.readall(), frame)
755 760
756 761 def test_readinto(self):
757 762 cctx = zstd.ZstdCompressor()
758 foo = cctx.compress(b'foo')
763 foo = cctx.compress(b"foo")
759 764
760 reader = cctx.stream_reader(b'foo')
765 reader = cctx.stream_reader(b"foo")
761 766 with self.assertRaises(Exception):
762 reader.readinto(b'foobar')
767 reader.readinto(b"foobar")
763 768
764 769 # readinto() with sufficiently large destination.
765 770 b = bytearray(1024)
766 reader = cctx.stream_reader(b'foo')
771 reader = cctx.stream_reader(b"foo")
767 772 self.assertEqual(reader.readinto(b), len(foo))
768 self.assertEqual(b[0:len(foo)], foo)
773 self.assertEqual(b[0 : len(foo)], foo)
769 774 self.assertEqual(reader.readinto(b), 0)
770 self.assertEqual(b[0:len(foo)], foo)
775 self.assertEqual(b[0 : len(foo)], foo)
771 776
772 777 # readinto() with small reads.
773 778 b = bytearray(1024)
774 reader = cctx.stream_reader(b'foo', read_size=1)
779 reader = cctx.stream_reader(b"foo", read_size=1)
775 780 self.assertEqual(reader.readinto(b), len(foo))
776 self.assertEqual(b[0:len(foo)], foo)
781 self.assertEqual(b[0 : len(foo)], foo)
777 782
778 783 # Too small destination buffer.
779 784 b = bytearray(2)
780 reader = cctx.stream_reader(b'foo')
785 reader = cctx.stream_reader(b"foo")
781 786 self.assertEqual(reader.readinto(b), 2)
782 787 self.assertEqual(b[:], foo[0:2])
783 788 self.assertEqual(reader.readinto(b), 2)
@@ -787,41 +792,41 class TestCompressor_stream_reader(unitt
787 792
788 793 def test_readinto1(self):
789 794 cctx = zstd.ZstdCompressor()
790 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
795 foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo")))
791 796
792 reader = cctx.stream_reader(b'foo')
797 reader = cctx.stream_reader(b"foo")
793 798 with self.assertRaises(Exception):
794 reader.readinto1(b'foobar')
799 reader.readinto1(b"foobar")
795 800
796 801 b = bytearray(1024)
797 source = OpCountingBytesIO(b'foo')
802 source = OpCountingBytesIO(b"foo")
798 803 reader = cctx.stream_reader(source)
799 804 self.assertEqual(reader.readinto1(b), len(foo))
800 self.assertEqual(b[0:len(foo)], foo)
805 self.assertEqual(b[0 : len(foo)], foo)
801 806 self.assertEqual(source._read_count, 2)
802 807
803 808 # readinto1() with small reads.
804 809 b = bytearray(1024)
805 source = OpCountingBytesIO(b'foo')
810 source = OpCountingBytesIO(b"foo")
806 811 reader = cctx.stream_reader(source, read_size=1)
807 812 self.assertEqual(reader.readinto1(b), len(foo))
808 self.assertEqual(b[0:len(foo)], foo)
813 self.assertEqual(b[0 : len(foo)], foo)
809 814 self.assertEqual(source._read_count, 4)
810 815
811 816 def test_read1(self):
812 817 cctx = zstd.ZstdCompressor()
813 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
818 foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo")))
814 819
815 b = OpCountingBytesIO(b'foo')
820 b = OpCountingBytesIO(b"foo")
816 821 reader = cctx.stream_reader(b)
817 822
818 823 self.assertEqual(reader.read1(), foo)
819 824 self.assertEqual(b._read_count, 2)
820 825
821 b = OpCountingBytesIO(b'foo')
826 b = OpCountingBytesIO(b"foo")
822 827 reader = cctx.stream_reader(b)
823 828
824 self.assertEqual(reader.read1(0), b'')
829 self.assertEqual(reader.read1(0), b"")
825 830 self.assertEqual(reader.read1(2), foo[0:2])
826 831 self.assertEqual(b._read_count, 2)
827 832 self.assertEqual(reader.read1(2), foo[2:4])
@@ -829,7 +834,7 class TestCompressor_stream_reader(unitt
829 834
830 835
831 836 @make_cffi
832 class TestCompressor_stream_writer(unittest.TestCase):
837 class TestCompressor_stream_writer(TestCase):
833 838 def test_io_api(self):
834 839 buffer = io.BytesIO()
835 840 cctx = zstd.ZstdCompressor()
@@ -899,7 +904,7 class TestCompressor_stream_writer(unitt
899 904 self.assertFalse(writer.closed)
900 905
901 906 def test_fileno_file(self):
902 with tempfile.TemporaryFile('wb') as tf:
907 with tempfile.TemporaryFile("wb") as tf:
903 908 cctx = zstd.ZstdCompressor()
904 909 writer = cctx.stream_writer(tf)
905 910
@@ -910,33 +915,35 class TestCompressor_stream_writer(unitt
910 915 cctx = zstd.ZstdCompressor(level=1)
911 916 writer = cctx.stream_writer(buffer)
912 917
913 writer.write(b'foo' * 1024)
918 writer.write(b"foo" * 1024)
914 919 self.assertFalse(writer.closed)
915 920 self.assertFalse(buffer.closed)
916 921 writer.close()
917 922 self.assertTrue(writer.closed)
918 923 self.assertTrue(buffer.closed)
919 924
920 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
921 writer.write(b'foo')
925 with self.assertRaisesRegex(ValueError, "stream is closed"):
926 writer.write(b"foo")
922 927
923 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
928 with self.assertRaisesRegex(ValueError, "stream is closed"):
924 929 writer.flush()
925 930
926 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
931 with self.assertRaisesRegex(ValueError, "stream is closed"):
927 932 with writer:
928 933 pass
929 934
930 self.assertEqual(buffer.getvalue(),
931 b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f'
932 b'\x6f\x01\x00\xfa\xd3\x77\x43')
935 self.assertEqual(
936 buffer.getvalue(),
937 b"\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f"
938 b"\x6f\x01\x00\xfa\xd3\x77\x43",
939 )
933 940
934 941 # Context manager exit should close stream.
935 942 buffer = io.BytesIO()
936 943 writer = cctx.stream_writer(buffer)
937 944
938 945 with writer:
939 writer.write(b'foo')
946 writer.write(b"foo")
940 947
941 948 self.assertTrue(writer.closed)
942 949
@@ -944,10 +951,10 class TestCompressor_stream_writer(unitt
944 951 buffer = NonClosingBytesIO()
945 952 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
946 953 with cctx.stream_writer(buffer) as compressor:
947 compressor.write(b'')
954 compressor.write(b"")
948 955
949 956 result = buffer.getvalue()
950 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
957 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
951 958
952 959 params = zstd.get_frame_parameters(result)
953 960 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
@@ -958,11 +965,11 class TestCompressor_stream_writer(unitt
958 965 # Test without context manager.
959 966 buffer = io.BytesIO()
960 967 compressor = cctx.stream_writer(buffer)
961 self.assertEqual(compressor.write(b''), 0)
962 self.assertEqual(buffer.getvalue(), b'')
968 self.assertEqual(compressor.write(b""), 0)
969 self.assertEqual(buffer.getvalue(), b"")
963 970 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9)
964 971 result = buffer.getvalue()
965 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
972 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
966 973
967 974 params = zstd.get_frame_parameters(result)
968 975 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
@@ -972,18 +979,18 class TestCompressor_stream_writer(unitt
972 979
973 980 # Test write_return_read=True
974 981 compressor = cctx.stream_writer(buffer, write_return_read=True)
975 self.assertEqual(compressor.write(b''), 0)
982 self.assertEqual(compressor.write(b""), 0)
976 983
977 984 def test_input_types(self):
978 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
985 expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f"
979 986 cctx = zstd.ZstdCompressor(level=1)
980 987
981 988 mutable_array = bytearray(3)
982 mutable_array[:] = b'foo'
989 mutable_array[:] = b"foo"
983 990
984 991 sources = [
985 memoryview(b'foo'),
986 bytearray(b'foo'),
992 memoryview(b"foo"),
993 bytearray(b"foo"),
987 994 mutable_array,
988 995 ]
989 996
@@ -1001,51 +1008,55 class TestCompressor_stream_writer(unitt
1001 1008 buffer = NonClosingBytesIO()
1002 1009 cctx = zstd.ZstdCompressor(level=5)
1003 1010 with cctx.stream_writer(buffer) as compressor:
1004 self.assertEqual(compressor.write(b'foo'), 0)
1005 self.assertEqual(compressor.write(b'bar'), 0)
1006 self.assertEqual(compressor.write(b'x' * 8192), 0)
1011 self.assertEqual(compressor.write(b"foo"), 0)
1012 self.assertEqual(compressor.write(b"bar"), 0)
1013 self.assertEqual(compressor.write(b"x" * 8192), 0)
1007 1014
1008 1015 result = buffer.getvalue()
1009 self.assertEqual(result,
1010 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1011 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1016 self.assertEqual(
1017 result,
1018 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f"
1019 b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23",
1020 )
1012 1021
1013 1022 # Test without context manager.
1014 1023 buffer = io.BytesIO()
1015 1024 compressor = cctx.stream_writer(buffer)
1016 self.assertEqual(compressor.write(b'foo'), 0)
1017 self.assertEqual(compressor.write(b'bar'), 0)
1018 self.assertEqual(compressor.write(b'x' * 8192), 0)
1025 self.assertEqual(compressor.write(b"foo"), 0)
1026 self.assertEqual(compressor.write(b"bar"), 0)
1027 self.assertEqual(compressor.write(b"x" * 8192), 0)
1019 1028 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1020 1029 result = buffer.getvalue()
1021 self.assertEqual(result,
1022 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1023 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1030 self.assertEqual(
1031 result,
1032 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f"
1033 b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23",
1034 )
1024 1035
1025 1036 # Test with write_return_read=True.
1026 1037 compressor = cctx.stream_writer(buffer, write_return_read=True)
1027 self.assertEqual(compressor.write(b'foo'), 3)
1028 self.assertEqual(compressor.write(b'barbiz'), 6)
1029 self.assertEqual(compressor.write(b'x' * 8192), 8192)
1038 self.assertEqual(compressor.write(b"foo"), 3)
1039 self.assertEqual(compressor.write(b"barbiz"), 6)
1040 self.assertEqual(compressor.write(b"x" * 8192), 8192)
1030 1041
1031 1042 def test_dictionary(self):
1032 1043 samples = []
1033 1044 for i in range(128):
1034 samples.append(b'foo' * 64)
1035 samples.append(b'bar' * 64)
1036 samples.append(b'foobar' * 64)
1045 samples.append(b"foo" * 64)
1046 samples.append(b"bar" * 64)
1047 samples.append(b"foobar" * 64)
1037 1048
1038 1049 d = zstd.train_dictionary(8192, samples)
1039 1050
1040 1051 h = hashlib.sha1(d.as_bytes()).hexdigest()
1041 self.assertEqual(h, '7a2e59a876db958f74257141045af8f912e00d4e')
1052 self.assertEqual(h, "7a2e59a876db958f74257141045af8f912e00d4e")
1042 1053
1043 1054 buffer = NonClosingBytesIO()
1044 1055 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
1045 1056 with cctx.stream_writer(buffer) as compressor:
1046 self.assertEqual(compressor.write(b'foo'), 0)
1047 self.assertEqual(compressor.write(b'bar'), 0)
1048 self.assertEqual(compressor.write(b'foo' * 16384), 0)
1057 self.assertEqual(compressor.write(b"foo"), 0)
1058 self.assertEqual(compressor.write(b"bar"), 0)
1059 self.assertEqual(compressor.write(b"foo" * 16384), 0)
1049 1060
1050 1061 compressed = buffer.getvalue()
1051 1062
@@ -1056,14 +1067,15 class TestCompressor_stream_writer(unitt
1056 1067 self.assertFalse(params.has_checksum)
1057 1068
1058 1069 h = hashlib.sha1(compressed).hexdigest()
1059 self.assertEqual(h, '0a7c05635061f58039727cdbe76388c6f4cfef06')
1070 self.assertEqual(h, "0a7c05635061f58039727cdbe76388c6f4cfef06")
1060 1071
1061 source = b'foo' + b'bar' + (b'foo' * 16384)
1072 source = b"foo" + b"bar" + (b"foo" * 16384)
1062 1073
1063 1074 dctx = zstd.ZstdDecompressor(dict_data=d)
1064 1075
1065 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
1066 source)
1076 self.assertEqual(
1077 dctx.decompress(compressed, max_output_size=len(source)), source
1078 )
1067 1079
1068 1080 def test_compression_params(self):
1069 1081 params = zstd.ZstdCompressionParameters(
@@ -1073,14 +1085,15 class TestCompressor_stream_writer(unitt
1073 1085 min_match=5,
1074 1086 search_log=4,
1075 1087 target_length=10,
1076 strategy=zstd.STRATEGY_FAST)
1088 strategy=zstd.STRATEGY_FAST,
1089 )
1077 1090
1078 1091 buffer = NonClosingBytesIO()
1079 1092 cctx = zstd.ZstdCompressor(compression_params=params)
1080 1093 with cctx.stream_writer(buffer) as compressor:
1081 self.assertEqual(compressor.write(b'foo'), 0)
1082 self.assertEqual(compressor.write(b'bar'), 0)
1083 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
1094 self.assertEqual(compressor.write(b"foo"), 0)
1095 self.assertEqual(compressor.write(b"bar"), 0)
1096 self.assertEqual(compressor.write(b"foobar" * 16384), 0)
1084 1097
1085 1098 compressed = buffer.getvalue()
1086 1099
@@ -1091,18 +1104,18 class TestCompressor_stream_writer(unitt
1091 1104 self.assertFalse(params.has_checksum)
1092 1105
1093 1106 h = hashlib.sha1(compressed).hexdigest()
1094 self.assertEqual(h, 'dd4bb7d37c1a0235b38a2f6b462814376843ef0b')
1107 self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b")
1095 1108
1096 1109 def test_write_checksum(self):
1097 1110 no_checksum = NonClosingBytesIO()
1098 1111 cctx = zstd.ZstdCompressor(level=1)
1099 1112 with cctx.stream_writer(no_checksum) as compressor:
1100 self.assertEqual(compressor.write(b'foobar'), 0)
1113 self.assertEqual(compressor.write(b"foobar"), 0)
1101 1114
1102 1115 with_checksum = NonClosingBytesIO()
1103 1116 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
1104 1117 with cctx.stream_writer(with_checksum) as compressor:
1105 self.assertEqual(compressor.write(b'foobar'), 0)
1118 self.assertEqual(compressor.write(b"foobar"), 0)
1106 1119
1107 1120 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
1108 1121 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
@@ -1113,29 +1126,27 class TestCompressor_stream_writer(unitt
1113 1126 self.assertFalse(no_params.has_checksum)
1114 1127 self.assertTrue(with_params.has_checksum)
1115 1128
1116 self.assertEqual(len(with_checksum.getvalue()),
1117 len(no_checksum.getvalue()) + 4)
1129 self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
1118 1130
1119 1131 def test_write_content_size(self):
1120 1132 no_size = NonClosingBytesIO()
1121 1133 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1122 1134 with cctx.stream_writer(no_size) as compressor:
1123 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1135 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1124 1136
1125 1137 with_size = NonClosingBytesIO()
1126 1138 cctx = zstd.ZstdCompressor(level=1)
1127 1139 with cctx.stream_writer(with_size) as compressor:
1128 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1140 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1129 1141
1130 1142 # Source size is not known in streaming mode, so header not
1131 1143 # written.
1132 self.assertEqual(len(with_size.getvalue()),
1133 len(no_size.getvalue()))
1144 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))
1134 1145
1135 1146 # Declaring size will write the header.
1136 1147 with_size = NonClosingBytesIO()
1137 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
1138 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1148 with cctx.stream_writer(with_size, size=len(b"foobar" * 256)) as compressor:
1149 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1139 1150
1140 1151 no_params = zstd.get_frame_parameters(no_size.getvalue())
1141 1152 with_params = zstd.get_frame_parameters(with_size.getvalue())
@@ -1146,31 +1157,30 class TestCompressor_stream_writer(unitt
1146 1157 self.assertFalse(no_params.has_checksum)
1147 1158 self.assertFalse(with_params.has_checksum)
1148 1159
1149 self.assertEqual(len(with_size.getvalue()),
1150 len(no_size.getvalue()) + 1)
1160 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1)
1151 1161
1152 1162 def test_no_dict_id(self):
1153 1163 samples = []
1154 1164 for i in range(128):
1155 samples.append(b'foo' * 64)
1156 samples.append(b'bar' * 64)
1157 samples.append(b'foobar' * 64)
1165 samples.append(b"foo" * 64)
1166 samples.append(b"bar" * 64)
1167 samples.append(b"foobar" * 64)
1158 1168
1159 1169 d = zstd.train_dictionary(1024, samples)
1160 1170
1161 1171 with_dict_id = NonClosingBytesIO()
1162 1172 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
1163 1173 with cctx.stream_writer(with_dict_id) as compressor:
1164 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1174 self.assertEqual(compressor.write(b"foobarfoobar"), 0)
1165 1175
1166 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
1176 self.assertEqual(with_dict_id.getvalue()[4:5], b"\x03")
1167 1177
1168 1178 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
1169 1179 no_dict_id = NonClosingBytesIO()
1170 1180 with cctx.stream_writer(no_dict_id) as compressor:
1171 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1181 self.assertEqual(compressor.write(b"foobarfoobar"), 0)
1172 1182
1173 self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
1183 self.assertEqual(no_dict_id.getvalue()[4:5], b"\x00")
1174 1184
1175 1185 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
1176 1186 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
@@ -1181,14 +1191,13 class TestCompressor_stream_writer(unitt
1181 1191 self.assertFalse(no_params.has_checksum)
1182 1192 self.assertFalse(with_params.has_checksum)
1183 1193
1184 self.assertEqual(len(with_dict_id.getvalue()),
1185 len(no_dict_id.getvalue()) + 4)
1194 self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4)
1186 1195
1187 1196 def test_memory_size(self):
1188 1197 cctx = zstd.ZstdCompressor(level=3)
1189 1198 buffer = io.BytesIO()
1190 1199 with cctx.stream_writer(buffer) as compressor:
1191 compressor.write(b'foo')
1200 compressor.write(b"foo")
1192 1201 size = compressor.memory_size()
1193 1202
1194 1203 self.assertGreater(size, 100000)
@@ -1197,9 +1206,9 class TestCompressor_stream_writer(unitt
1197 1206 cctx = zstd.ZstdCompressor(level=3)
1198 1207 dest = OpCountingBytesIO()
1199 1208 with cctx.stream_writer(dest, write_size=1) as compressor:
1200 self.assertEqual(compressor.write(b'foo'), 0)
1201 self.assertEqual(compressor.write(b'bar'), 0)
1202 self.assertEqual(compressor.write(b'foobar'), 0)
1209 self.assertEqual(compressor.write(b"foo"), 0)
1210 self.assertEqual(compressor.write(b"bar"), 0)
1211 self.assertEqual(compressor.write(b"foobar"), 0)
1203 1212
1204 1213 self.assertEqual(len(dest.getvalue()), dest._write_count)
1205 1214
@@ -1207,15 +1216,15 class TestCompressor_stream_writer(unitt
1207 1216 cctx = zstd.ZstdCompressor(level=3)
1208 1217 dest = OpCountingBytesIO()
1209 1218 with cctx.stream_writer(dest) as compressor:
1210 self.assertEqual(compressor.write(b'foo'), 0)
1219 self.assertEqual(compressor.write(b"foo"), 0)
1211 1220 self.assertEqual(dest._write_count, 0)
1212 1221 self.assertEqual(compressor.flush(), 12)
1213 1222 self.assertEqual(dest._write_count, 1)
1214 self.assertEqual(compressor.write(b'bar'), 0)
1223 self.assertEqual(compressor.write(b"bar"), 0)
1215 1224 self.assertEqual(dest._write_count, 1)
1216 1225 self.assertEqual(compressor.flush(), 6)
1217 1226 self.assertEqual(dest._write_count, 2)
1218 self.assertEqual(compressor.write(b'baz'), 0)
1227 self.assertEqual(compressor.write(b"baz"), 0)
1219 1228
1220 1229 self.assertEqual(dest._write_count, 3)
1221 1230
@@ -1223,7 +1232,7 class TestCompressor_stream_writer(unitt
1223 1232 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
1224 1233 dest = OpCountingBytesIO()
1225 1234 with cctx.stream_writer(dest) as compressor:
1226 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1235 self.assertEqual(compressor.write(b"foobar" * 8192), 0)
1227 1236 count = dest._write_count
1228 1237 offset = dest.tell()
1229 1238 self.assertEqual(compressor.flush(), 23)
@@ -1238,41 +1247,43 class TestCompressor_stream_writer(unitt
1238 1247 self.assertEqual(len(trailing), 7)
1239 1248
1240 1249 header = trailing[0:3]
1241 self.assertEqual(header, b'\x01\x00\x00')
1250 self.assertEqual(header, b"\x01\x00\x00")
1242 1251
1243 1252 def test_flush_frame(self):
1244 1253 cctx = zstd.ZstdCompressor(level=3)
1245 1254 dest = OpCountingBytesIO()
1246 1255
1247 1256 with cctx.stream_writer(dest) as compressor:
1248 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1257 self.assertEqual(compressor.write(b"foobar" * 8192), 0)
1249 1258 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1250 compressor.write(b'biz' * 16384)
1259 compressor.write(b"biz" * 16384)
1251 1260
1252 self.assertEqual(dest.getvalue(),
1253 # Frame 1.
1254 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f'
1255 b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08'
1256 # Frame 2.
1257 b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a'
1258 b'\x01\x00\xfa\x3f\x75\x37\x04')
1261 self.assertEqual(
1262 dest.getvalue(),
1263 # Frame 1.
1264 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f"
1265 b"\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08"
1266 # Frame 2.
1267 b"\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a"
1268 b"\x01\x00\xfa\x3f\x75\x37\x04",
1269 )
1259 1270
1260 1271 def test_bad_flush_mode(self):
1261 1272 cctx = zstd.ZstdCompressor()
1262 1273 dest = io.BytesIO()
1263 1274 with cctx.stream_writer(dest) as compressor:
1264 with self.assertRaisesRegexp(ValueError, 'unknown flush_mode: 42'):
1275 with self.assertRaisesRegex(ValueError, "unknown flush_mode: 42"):
1265 1276 compressor.flush(flush_mode=42)
1266 1277
1267 1278 def test_multithreaded(self):
1268 1279 dest = NonClosingBytesIO()
1269 1280 cctx = zstd.ZstdCompressor(threads=2)
1270 1281 with cctx.stream_writer(dest) as compressor:
1271 compressor.write(b'a' * 1048576)
1272 compressor.write(b'b' * 1048576)
1273 compressor.write(b'c' * 1048576)
1282 compressor.write(b"a" * 1048576)
1283 compressor.write(b"b" * 1048576)
1284 compressor.write(b"c" * 1048576)
1274 1285
1275 self.assertEqual(len(dest.getvalue()), 295)
1286 self.assertEqual(len(dest.getvalue()), 111)
1276 1287
1277 1288 def test_tell(self):
1278 1289 dest = io.BytesIO()
@@ -1281,7 +1292,7 class TestCompressor_stream_writer(unitt
1281 1292 self.assertEqual(compressor.tell(), 0)
1282 1293
1283 1294 for i in range(256):
1284 compressor.write(b'foo' * (i + 1))
1295 compressor.write(b"foo" * (i + 1))
1285 1296 self.assertEqual(compressor.tell(), dest.tell())
1286 1297
1287 1298 def test_bad_size(self):
@@ -1289,9 +1300,9 class TestCompressor_stream_writer(unitt
1289 1300
1290 1301 dest = io.BytesIO()
1291 1302
1292 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1303 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
1293 1304 with cctx.stream_writer(dest, size=2) as compressor:
1294 compressor.write(b'foo')
1305 compressor.write(b"foo")
1295 1306
1296 1307 # Test another operation.
1297 1308 with cctx.stream_writer(dest, size=42):
@@ -1301,20 +1312,20 class TestCompressor_stream_writer(unitt
1301 1312 dest = NonClosingBytesIO()
1302 1313 cctx = zstd.ZstdCompressor()
1303 1314 with cctx.stream_writer(dest) as compressor:
1304 with tarfile.open('tf', mode='w|', fileobj=compressor) as tf:
1305 tf.add(__file__, 'test_compressor.py')
1315 with tarfile.open("tf", mode="w|", fileobj=compressor) as tf:
1316 tf.add(__file__, "test_compressor.py")
1306 1317
1307 1318 dest = io.BytesIO(dest.getvalue())
1308 1319
1309 1320 dctx = zstd.ZstdDecompressor()
1310 1321 with dctx.stream_reader(dest) as reader:
1311 with tarfile.open(mode='r|', fileobj=reader) as tf:
1322 with tarfile.open(mode="r|", fileobj=reader) as tf:
1312 1323 for member in tf:
1313 self.assertEqual(member.name, 'test_compressor.py')
1324 self.assertEqual(member.name, "test_compressor.py")
1314 1325
1315 1326
1316 1327 @make_cffi
1317 class TestCompressor_read_to_iter(unittest.TestCase):
1328 class TestCompressor_read_to_iter(TestCase):
1318 1329 def test_type_validation(self):
1319 1330 cctx = zstd.ZstdCompressor()
1320 1331
@@ -1323,10 +1334,10 class TestCompressor_read_to_iter(unitte
1323 1334 pass
1324 1335
1325 1336 # Buffer protocol works.
1326 for chunk in cctx.read_to_iter(b'foobar'):
1337 for chunk in cctx.read_to_iter(b"foobar"):
1327 1338 pass
1328 1339
1329 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1340 with self.assertRaisesRegex(ValueError, "must pass an object with a read"):
1330 1341 for chunk in cctx.read_to_iter(True):
1331 1342 pass
1332 1343
@@ -1337,22 +1348,22 class TestCompressor_read_to_iter(unitte
1337 1348 it = cctx.read_to_iter(source)
1338 1349 chunks = list(it)
1339 1350 self.assertEqual(len(chunks), 1)
1340 compressed = b''.join(chunks)
1341 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
1351 compressed = b"".join(chunks)
1352 self.assertEqual(compressed, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
1342 1353
1343 1354 # And again with the buffer protocol.
1344 it = cctx.read_to_iter(b'')
1355 it = cctx.read_to_iter(b"")
1345 1356 chunks = list(it)
1346 1357 self.assertEqual(len(chunks), 1)
1347 compressed2 = b''.join(chunks)
1358 compressed2 = b"".join(chunks)
1348 1359 self.assertEqual(compressed2, compressed)
1349 1360
1350 1361 def test_read_large(self):
1351 1362 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1352 1363
1353 1364 source = io.BytesIO()
1354 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1355 source.write(b'o')
1365 source.write(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1366 source.write(b"o")
1356 1367 source.seek(0)
1357 1368
1358 1369 # Creating an iterator should not perform any compression until
@@ -1380,9 +1391,9 class TestCompressor_read_to_iter(unitte
1380 1391 next(it)
1381 1392
1382 1393 # We should get the same output as the one-shot compression mechanism.
1383 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1394 self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue()))
1384 1395
1385 params = zstd.get_frame_parameters(b''.join(chunks))
1396 params = zstd.get_frame_parameters(b"".join(chunks))
1386 1397 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1387 1398 self.assertEqual(params.window_size, 262144)
1388 1399 self.assertEqual(params.dict_id, 0)
@@ -1393,16 +1404,16 class TestCompressor_read_to_iter(unitte
1393 1404 chunks = list(it)
1394 1405 self.assertEqual(len(chunks), 2)
1395 1406
1396 params = zstd.get_frame_parameters(b''.join(chunks))
1407 params = zstd.get_frame_parameters(b"".join(chunks))
1397 1408 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1398 #self.assertEqual(params.window_size, 262144)
1409 # self.assertEqual(params.window_size, 262144)
1399 1410 self.assertEqual(params.dict_id, 0)
1400 1411 self.assertFalse(params.has_checksum)
1401 1412
1402 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1413 self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue()))
1403 1414
1404 1415 def test_read_write_size(self):
1405 source = OpCountingBytesIO(b'foobarfoobar')
1416 source = OpCountingBytesIO(b"foobarfoobar")
1406 1417 cctx = zstd.ZstdCompressor(level=3)
1407 1418 for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
1408 1419 self.assertEqual(len(chunk), 1)
@@ -1411,42 +1422,42 class TestCompressor_read_to_iter(unitte
1411 1422
1412 1423 def test_multithreaded(self):
1413 1424 source = io.BytesIO()
1414 source.write(b'a' * 1048576)
1415 source.write(b'b' * 1048576)
1416 source.write(b'c' * 1048576)
1425 source.write(b"a" * 1048576)
1426 source.write(b"b" * 1048576)
1427 source.write(b"c" * 1048576)
1417 1428 source.seek(0)
1418 1429
1419 1430 cctx = zstd.ZstdCompressor(threads=2)
1420 1431
1421 compressed = b''.join(cctx.read_to_iter(source))
1422 self.assertEqual(len(compressed), 295)
1432 compressed = b"".join(cctx.read_to_iter(source))
1433 self.assertEqual(len(compressed), 111)
1423 1434
1424 1435 def test_bad_size(self):
1425 1436 cctx = zstd.ZstdCompressor()
1426 1437
1427 source = io.BytesIO(b'a' * 42)
1438 source = io.BytesIO(b"a" * 42)
1428 1439
1429 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1430 b''.join(cctx.read_to_iter(source, size=2))
1440 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
1441 b"".join(cctx.read_to_iter(source, size=2))
1431 1442
1432 1443 # Test another operation on errored compressor.
1433 b''.join(cctx.read_to_iter(source))
1444 b"".join(cctx.read_to_iter(source))
1434 1445
1435 1446
1436 1447 @make_cffi
1437 class TestCompressor_chunker(unittest.TestCase):
1448 class TestCompressor_chunker(TestCase):
1438 1449 def test_empty(self):
1439 1450 cctx = zstd.ZstdCompressor(write_content_size=False)
1440 1451 chunker = cctx.chunker()
1441 1452
1442 it = chunker.compress(b'')
1453 it = chunker.compress(b"")
1443 1454
1444 1455 with self.assertRaises(StopIteration):
1445 1456 next(it)
1446 1457
1447 1458 it = chunker.finish()
1448 1459
1449 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00')
1460 self.assertEqual(next(it), b"\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00")
1450 1461
1451 1462 with self.assertRaises(StopIteration):
1452 1463 next(it)
@@ -1455,21 +1466,23 class TestCompressor_chunker(unittest.Te
1455 1466 cctx = zstd.ZstdCompressor()
1456 1467 chunker = cctx.chunker()
1457 1468
1458 it = chunker.compress(b'foobar')
1469 it = chunker.compress(b"foobar")
1459 1470
1460 1471 with self.assertRaises(StopIteration):
1461 1472 next(it)
1462 1473
1463 it = chunker.compress(b'baz' * 30)
1474 it = chunker.compress(b"baz" * 30)
1464 1475
1465 1476 with self.assertRaises(StopIteration):
1466 1477 next(it)
1467 1478
1468 1479 it = chunker.finish()
1469 1480
1470 self.assertEqual(next(it),
1471 b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f'
1472 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1481 self.assertEqual(
1482 next(it),
1483 b"\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f"
1484 b"\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e",
1485 )
1473 1486
1474 1487 with self.assertRaises(StopIteration):
1475 1488 next(it)
@@ -1478,57 +1491,60 class TestCompressor_chunker(unittest.Te
1478 1491 cctx = zstd.ZstdCompressor()
1479 1492 chunker = cctx.chunker(size=1024)
1480 1493
1481 it = chunker.compress(b'x' * 1000)
1494 it = chunker.compress(b"x" * 1000)
1482 1495
1483 1496 with self.assertRaises(StopIteration):
1484 1497 next(it)
1485 1498
1486 it = chunker.compress(b'y' * 24)
1499 it = chunker.compress(b"y" * 24)
1487 1500
1488 1501 with self.assertRaises(StopIteration):
1489 1502 next(it)
1490 1503
1491 1504 chunks = list(chunker.finish())
1492 1505
1493 self.assertEqual(chunks, [
1494 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1495 b'\xa0\x16\xe3\x2b\x80\x05'
1496 ])
1506 self.assertEqual(
1507 chunks,
1508 [
1509 b"\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00"
1510 b"\xa0\x16\xe3\x2b\x80\x05"
1511 ],
1512 )
1497 1513
1498 1514 dctx = zstd.ZstdDecompressor()
1499 1515
1500 self.assertEqual(dctx.decompress(b''.join(chunks)),
1501 (b'x' * 1000) + (b'y' * 24))
1516 self.assertEqual(dctx.decompress(b"".join(chunks)), (b"x" * 1000) + (b"y" * 24))
1502 1517
1503 1518 def test_small_chunk_size(self):
1504 1519 cctx = zstd.ZstdCompressor()
1505 1520 chunker = cctx.chunker(chunk_size=1)
1506 1521
1507 chunks = list(chunker.compress(b'foo' * 1024))
1522 chunks = list(chunker.compress(b"foo" * 1024))
1508 1523 self.assertEqual(chunks, [])
1509 1524
1510 1525 chunks = list(chunker.finish())
1511 1526 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1512 1527
1513 1528 self.assertEqual(
1514 b''.join(chunks),
1515 b'\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1516 b'\xfa\xd3\x77\x43')
1529 b"".join(chunks),
1530 b"\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00"
1531 b"\xfa\xd3\x77\x43",
1532 )
1517 1533
1518 1534 dctx = zstd.ZstdDecompressor()
1519 self.assertEqual(dctx.decompress(b''.join(chunks),
1520 max_output_size=10000),
1521 b'foo' * 1024)
1535 self.assertEqual(
1536 dctx.decompress(b"".join(chunks), max_output_size=10000), b"foo" * 1024
1537 )
1522 1538
1523 1539 def test_input_types(self):
1524 1540 cctx = zstd.ZstdCompressor()
1525 1541
1526 1542 mutable_array = bytearray(3)
1527 mutable_array[:] = b'foo'
1543 mutable_array[:] = b"foo"
1528 1544
1529 1545 sources = [
1530 memoryview(b'foo'),
1531 bytearray(b'foo'),
1546 memoryview(b"foo"),
1547 bytearray(b"foo"),
1532 1548 mutable_array,
1533 1549 ]
1534 1550
@@ -1536,28 +1552,32 class TestCompressor_chunker(unittest.Te
1536 1552 chunker = cctx.chunker()
1537 1553
1538 1554 self.assertEqual(list(chunker.compress(source)), [])
1539 self.assertEqual(list(chunker.finish()), [
1540 b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f'
1541 ])
1555 self.assertEqual(
1556 list(chunker.finish()),
1557 [b"\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f"],
1558 )
1542 1559
1543 1560 def test_flush(self):
1544 1561 cctx = zstd.ZstdCompressor()
1545 1562 chunker = cctx.chunker()
1546 1563
1547 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1548 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1564 self.assertEqual(list(chunker.compress(b"foo" * 1024)), [])
1565 self.assertEqual(list(chunker.compress(b"bar" * 1024)), [])
1549 1566
1550 1567 chunks1 = list(chunker.flush())
1551 1568
1552 self.assertEqual(chunks1, [
1553 b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1554 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1555 ])
1569 self.assertEqual(
1570 chunks1,
1571 [
1572 b"\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72"
1573 b"\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02"
1574 ],
1575 )
1556 1576
1557 1577 self.assertEqual(list(chunker.flush()), [])
1558 1578 self.assertEqual(list(chunker.flush()), [])
1559 1579
1560 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1580 self.assertEqual(list(chunker.compress(b"baz" * 1024)), [])
1561 1581
1562 1582 chunks2 = list(chunker.flush())
1563 1583 self.assertEqual(len(chunks2), 1)
@@ -1567,53 +1587,56 class TestCompressor_chunker(unittest.Te
1567 1587
1568 1588 dctx = zstd.ZstdDecompressor()
1569 1589
1570 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1571 max_output_size=10000),
1572 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1590 self.assertEqual(
1591 dctx.decompress(
1592 b"".join(chunks1 + chunks2 + chunks3), max_output_size=10000
1593 ),
1594 (b"foo" * 1024) + (b"bar" * 1024) + (b"baz" * 1024),
1595 )
1573 1596
1574 1597 def test_compress_after_finish(self):
1575 1598 cctx = zstd.ZstdCompressor()
1576 1599 chunker = cctx.chunker()
1577 1600
1578 list(chunker.compress(b'foo'))
1601 list(chunker.compress(b"foo"))
1579 1602 list(chunker.finish())
1580 1603
1581 with self.assertRaisesRegexp(
1582 zstd.ZstdError,
1583 r'cannot call compress\(\) after compression finished'):
1584 list(chunker.compress(b'foo'))
1604 with self.assertRaisesRegex(
1605 zstd.ZstdError, r"cannot call compress\(\) after compression finished"
1606 ):
1607 list(chunker.compress(b"foo"))
1585 1608
1586 1609 def test_flush_after_finish(self):
1587 1610 cctx = zstd.ZstdCompressor()
1588 1611 chunker = cctx.chunker()
1589 1612
1590 list(chunker.compress(b'foo'))
1613 list(chunker.compress(b"foo"))
1591 1614 list(chunker.finish())
1592 1615
1593 with self.assertRaisesRegexp(
1594 zstd.ZstdError,
1595 r'cannot call flush\(\) after compression finished'):
1616 with self.assertRaisesRegex(
1617 zstd.ZstdError, r"cannot call flush\(\) after compression finished"
1618 ):
1596 1619 list(chunker.flush())
1597 1620
1598 1621 def test_finish_after_finish(self):
1599 1622 cctx = zstd.ZstdCompressor()
1600 1623 chunker = cctx.chunker()
1601 1624
1602 list(chunker.compress(b'foo'))
1625 list(chunker.compress(b"foo"))
1603 1626 list(chunker.finish())
1604 1627
1605 with self.assertRaisesRegexp(
1606 zstd.ZstdError,
1607 r'cannot call finish\(\) after compression finished'):
1628 with self.assertRaisesRegex(
1629 zstd.ZstdError, r"cannot call finish\(\) after compression finished"
1630 ):
1608 1631 list(chunker.finish())
1609 1632
1610 1633
1611 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1634 class TestCompressor_multi_compress_to_buffer(TestCase):
1612 1635 def test_invalid_inputs(self):
1613 1636 cctx = zstd.ZstdCompressor()
1614 1637
1615 if not hasattr(cctx, 'multi_compress_to_buffer'):
1616 self.skipTest('multi_compress_to_buffer not available')
1638 if not hasattr(cctx, "multi_compress_to_buffer"):
1639 self.skipTest("multi_compress_to_buffer not available")
1617 1640
1618 1641 with self.assertRaises(TypeError):
1619 1642 cctx.multi_compress_to_buffer(True)
@@ -1621,28 +1644,28 class TestCompressor_multi_compress_to_b
1621 1644 with self.assertRaises(TypeError):
1622 1645 cctx.multi_compress_to_buffer((1, 2))
1623 1646
1624 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1625 cctx.multi_compress_to_buffer([u'foo'])
1647 with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"):
1648 cctx.multi_compress_to_buffer([u"foo"])
1626 1649
1627 1650 def test_empty_input(self):
1628 1651 cctx = zstd.ZstdCompressor()
1629 1652
1630 if not hasattr(cctx, 'multi_compress_to_buffer'):
1631 self.skipTest('multi_compress_to_buffer not available')
1653 if not hasattr(cctx, "multi_compress_to_buffer"):
1654 self.skipTest("multi_compress_to_buffer not available")
1632 1655
1633 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
1656 with self.assertRaisesRegex(ValueError, "no source elements found"):
1634 1657 cctx.multi_compress_to_buffer([])
1635 1658
1636 with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
1637 cctx.multi_compress_to_buffer([b'', b'', b''])
1659 with self.assertRaisesRegex(ValueError, "source elements are empty"):
1660 cctx.multi_compress_to_buffer([b"", b"", b""])
1638 1661
1639 1662 def test_list_input(self):
1640 1663 cctx = zstd.ZstdCompressor(write_checksum=True)
1641 1664
1642 if not hasattr(cctx, 'multi_compress_to_buffer'):
1643 self.skipTest('multi_compress_to_buffer not available')
1665 if not hasattr(cctx, "multi_compress_to_buffer"):
1666 self.skipTest("multi_compress_to_buffer not available")
1644 1667
1645 original = [b'foo' * 12, b'bar' * 6]
1668 original = [b"foo" * 12, b"bar" * 6]
1646 1669 frames = [cctx.compress(c) for c in original]
1647 1670 b = cctx.multi_compress_to_buffer(original)
1648 1671
@@ -1657,15 +1680,16 class TestCompressor_multi_compress_to_b
1657 1680 def test_buffer_with_segments_input(self):
1658 1681 cctx = zstd.ZstdCompressor(write_checksum=True)
1659 1682
1660 if not hasattr(cctx, 'multi_compress_to_buffer'):
1661 self.skipTest('multi_compress_to_buffer not available')
1683 if not hasattr(cctx, "multi_compress_to_buffer"):
1684 self.skipTest("multi_compress_to_buffer not available")
1662 1685
1663 original = [b'foo' * 4, b'bar' * 6]
1686 original = [b"foo" * 4, b"bar" * 6]
1664 1687 frames = [cctx.compress(c) for c in original]
1665 1688
1666 offsets = struct.pack('=QQQQ', 0, len(original[0]),
1667 len(original[0]), len(original[1]))
1668 segments = zstd.BufferWithSegments(b''.join(original), offsets)
1689 offsets = struct.pack(
1690 "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1])
1691 )
1692 segments = zstd.BufferWithSegments(b"".join(original), offsets)
1669 1693
1670 1694 result = cctx.multi_compress_to_buffer(segments)
1671 1695
@@ -1678,28 +1702,39 class TestCompressor_multi_compress_to_b
1678 1702 def test_buffer_with_segments_collection_input(self):
1679 1703 cctx = zstd.ZstdCompressor(write_checksum=True)
1680 1704
1681 if not hasattr(cctx, 'multi_compress_to_buffer'):
1682 self.skipTest('multi_compress_to_buffer not available')
1705 if not hasattr(cctx, "multi_compress_to_buffer"):
1706 self.skipTest("multi_compress_to_buffer not available")
1683 1707
1684 1708 original = [
1685 b'foo1',
1686 b'foo2' * 2,
1687 b'foo3' * 3,
1688 b'foo4' * 4,
1689 b'foo5' * 5,
1709 b"foo1",
1710 b"foo2" * 2,
1711 b"foo3" * 3,
1712 b"foo4" * 4,
1713 b"foo5" * 5,
1690 1714 ]
1691 1715
1692 1716 frames = [cctx.compress(c) for c in original]
1693 1717
1694 b = b''.join([original[0], original[1]])
1695 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1696 0, len(original[0]),
1697 len(original[0]), len(original[1])))
1698 b = b''.join([original[2], original[3], original[4]])
1699 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1700 0, len(original[2]),
1701 len(original[2]), len(original[3]),
1702 len(original[2]) + len(original[3]), len(original[4])))
1718 b = b"".join([original[0], original[1]])
1719 b1 = zstd.BufferWithSegments(
1720 b,
1721 struct.pack(
1722 "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1])
1723 ),
1724 )
1725 b = b"".join([original[2], original[3], original[4]])
1726 b2 = zstd.BufferWithSegments(
1727 b,
1728 struct.pack(
1729 "=QQQQQQ",
1730 0,
1731 len(original[2]),
1732 len(original[2]),
1733 len(original[3]),
1734 len(original[2]) + len(original[3]),
1735 len(original[4]),
1736 ),
1737 )
1703 1738
1704 1739 c = zstd.BufferWithSegmentsCollection(b1, b2)
1705 1740
@@ -1714,16 +1749,16 class TestCompressor_multi_compress_to_b
1714 1749 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
1715 1750 # make output different.
1716 1751 refcctx = zstd.ZstdCompressor(write_checksum=True)
1717 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
1752 reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)]
1718 1753
1719 1754 cctx = zstd.ZstdCompressor(write_checksum=True)
1720 1755
1721 if not hasattr(cctx, 'multi_compress_to_buffer'):
1722 self.skipTest('multi_compress_to_buffer not available')
1756 if not hasattr(cctx, "multi_compress_to_buffer"):
1757 self.skipTest("multi_compress_to_buffer not available")
1723 1758
1724 1759 frames = []
1725 frames.extend(b'x' * 64 for i in range(256))
1726 frames.extend(b'y' * 64 for i in range(256))
1760 frames.extend(b"x" * 64 for i in range(256))
1761 frames.extend(b"y" * 64 for i in range(256))
1727 1762
1728 1763 result = cctx.multi_compress_to_buffer(frames, threads=-1)
1729 1764
This diff has been collapsed as it changes many lines, (631 lines changed) Show them Hide them
@@ -6,28 +6,31 try:
6 6 import hypothesis
7 7 import hypothesis.strategies as strategies
8 8 except ImportError:
9 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest("hypothesis not available")
10 10
11 11 import zstandard as zstd
12 12
13 from . common import (
13 from .common import (
14 14 make_cffi,
15 15 NonClosingBytesIO,
16 16 random_input_data,
17 TestCase,
17 18 )
18 19
19 20
20 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
21 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
21 22 @make_cffi
22 class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
23 class TestCompressor_stream_reader_fuzzing(TestCase):
23 24 @hypothesis.settings(
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
26 level=strategies.integers(min_value=1, max_value=5),
27 source_read_size=strategies.integers(1, 16384),
28 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
29 def test_stream_source_read(self, original, level, source_read_size,
30 read_size):
25 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
26 )
27 @hypothesis.given(
28 original=strategies.sampled_from(random_input_data()),
29 level=strategies.integers(min_value=1, max_value=5),
30 source_read_size=strategies.integers(1, 16384),
31 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
32 )
33 def test_stream_source_read(self, original, level, source_read_size, read_size):
31 34 if read_size == 0:
32 35 read_size = -1
33 36
@@ -35,8 +38,9 class TestCompressor_stream_reader_fuzzi
35 38 ref_frame = refctx.compress(original)
36 39
37 40 cctx = zstd.ZstdCompressor(level=level)
38 with cctx.stream_reader(io.BytesIO(original), size=len(original),
39 read_size=source_read_size) as reader:
41 with cctx.stream_reader(
42 io.BytesIO(original), size=len(original), read_size=source_read_size
43 ) as reader:
40 44 chunks = []
41 45 while True:
42 46 chunk = reader.read(read_size)
@@ -45,16 +49,18 class TestCompressor_stream_reader_fuzzi
45 49
46 50 chunks.append(chunk)
47 51
48 self.assertEqual(b''.join(chunks), ref_frame)
52 self.assertEqual(b"".join(chunks), ref_frame)
49 53
50 54 @hypothesis.settings(
51 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
52 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
53 level=strategies.integers(min_value=1, max_value=5),
54 source_read_size=strategies.integers(1, 16384),
55 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
56 def test_buffer_source_read(self, original, level, source_read_size,
57 read_size):
55 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
56 )
57 @hypothesis.given(
58 original=strategies.sampled_from(random_input_data()),
59 level=strategies.integers(min_value=1, max_value=5),
60 source_read_size=strategies.integers(1, 16384),
61 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
62 )
63 def test_buffer_source_read(self, original, level, source_read_size, read_size):
58 64 if read_size == 0:
59 65 read_size = -1
60 66
@@ -62,8 +68,9 class TestCompressor_stream_reader_fuzzi
62 68 ref_frame = refctx.compress(original)
63 69
64 70 cctx = zstd.ZstdCompressor(level=level)
65 with cctx.stream_reader(original, size=len(original),
66 read_size=source_read_size) as reader:
71 with cctx.stream_reader(
72 original, size=len(original), read_size=source_read_size
73 ) as reader:
67 74 chunks = []
68 75 while True:
69 76 chunk = reader.read(read_size)
@@ -72,22 +79,30 class TestCompressor_stream_reader_fuzzi
72 79
73 80 chunks.append(chunk)
74 81
75 self.assertEqual(b''.join(chunks), ref_frame)
82 self.assertEqual(b"".join(chunks), ref_frame)
76 83
77 84 @hypothesis.settings(
78 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
79 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
80 level=strategies.integers(min_value=1, max_value=5),
81 source_read_size=strategies.integers(1, 16384),
82 read_sizes=strategies.data())
83 def test_stream_source_read_variance(self, original, level, source_read_size,
84 read_sizes):
85 suppress_health_check=[
86 hypothesis.HealthCheck.large_base_example,
87 hypothesis.HealthCheck.too_slow,
88 ]
89 )
90 @hypothesis.given(
91 original=strategies.sampled_from(random_input_data()),
92 level=strategies.integers(min_value=1, max_value=5),
93 source_read_size=strategies.integers(1, 16384),
94 read_sizes=strategies.data(),
95 )
96 def test_stream_source_read_variance(
97 self, original, level, source_read_size, read_sizes
98 ):
85 99 refctx = zstd.ZstdCompressor(level=level)
86 100 ref_frame = refctx.compress(original)
87 101
88 102 cctx = zstd.ZstdCompressor(level=level)
89 with cctx.stream_reader(io.BytesIO(original), size=len(original),
90 read_size=source_read_size) as reader:
103 with cctx.stream_reader(
104 io.BytesIO(original), size=len(original), read_size=source_read_size
105 ) as reader:
91 106 chunks = []
92 107 while True:
93 108 read_size = read_sizes.draw(strategies.integers(-1, 16384))
@@ -97,23 +112,31 class TestCompressor_stream_reader_fuzzi
97 112
98 113 chunks.append(chunk)
99 114
100 self.assertEqual(b''.join(chunks), ref_frame)
115 self.assertEqual(b"".join(chunks), ref_frame)
101 116
102 117 @hypothesis.settings(
103 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
104 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
105 level=strategies.integers(min_value=1, max_value=5),
106 source_read_size=strategies.integers(1, 16384),
107 read_sizes=strategies.data())
108 def test_buffer_source_read_variance(self, original, level, source_read_size,
109 read_sizes):
118 suppress_health_check=[
119 hypothesis.HealthCheck.large_base_example,
120 hypothesis.HealthCheck.too_slow,
121 ]
122 )
123 @hypothesis.given(
124 original=strategies.sampled_from(random_input_data()),
125 level=strategies.integers(min_value=1, max_value=5),
126 source_read_size=strategies.integers(1, 16384),
127 read_sizes=strategies.data(),
128 )
129 def test_buffer_source_read_variance(
130 self, original, level, source_read_size, read_sizes
131 ):
110 132
111 133 refctx = zstd.ZstdCompressor(level=level)
112 134 ref_frame = refctx.compress(original)
113 135
114 136 cctx = zstd.ZstdCompressor(level=level)
115 with cctx.stream_reader(original, size=len(original),
116 read_size=source_read_size) as reader:
137 with cctx.stream_reader(
138 original, size=len(original), read_size=source_read_size
139 ) as reader:
117 140 chunks = []
118 141 while True:
119 142 read_size = read_sizes.draw(strategies.integers(-1, 16384))
@@ -123,22 +146,25 class TestCompressor_stream_reader_fuzzi
123 146
124 147 chunks.append(chunk)
125 148
126 self.assertEqual(b''.join(chunks), ref_frame)
149 self.assertEqual(b"".join(chunks), ref_frame)
127 150
128 151 @hypothesis.settings(
129 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
130 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
131 level=strategies.integers(min_value=1, max_value=5),
132 source_read_size=strategies.integers(1, 16384),
133 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
134 def test_stream_source_readinto(self, original, level,
135 source_read_size, read_size):
152 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
153 )
154 @hypothesis.given(
155 original=strategies.sampled_from(random_input_data()),
156 level=strategies.integers(min_value=1, max_value=5),
157 source_read_size=strategies.integers(1, 16384),
158 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
159 )
160 def test_stream_source_readinto(self, original, level, source_read_size, read_size):
136 161 refctx = zstd.ZstdCompressor(level=level)
137 162 ref_frame = refctx.compress(original)
138 163
139 164 cctx = zstd.ZstdCompressor(level=level)
140 with cctx.stream_reader(io.BytesIO(original), size=len(original),
141 read_size=source_read_size) as reader:
165 with cctx.stream_reader(
166 io.BytesIO(original), size=len(original), read_size=source_read_size
167 ) as reader:
142 168 chunks = []
143 169 while True:
144 170 b = bytearray(read_size)
@@ -149,23 +175,26 class TestCompressor_stream_reader_fuzzi
149 175
150 176 chunks.append(bytes(b[0:count]))
151 177
152 self.assertEqual(b''.join(chunks), ref_frame)
178 self.assertEqual(b"".join(chunks), ref_frame)
153 179
154 180 @hypothesis.settings(
155 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
156 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
157 level=strategies.integers(min_value=1, max_value=5),
158 source_read_size=strategies.integers(1, 16384),
159 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
160 def test_buffer_source_readinto(self, original, level,
161 source_read_size, read_size):
181 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
182 )
183 @hypothesis.given(
184 original=strategies.sampled_from(random_input_data()),
185 level=strategies.integers(min_value=1, max_value=5),
186 source_read_size=strategies.integers(1, 16384),
187 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
188 )
189 def test_buffer_source_readinto(self, original, level, source_read_size, read_size):
162 190
163 191 refctx = zstd.ZstdCompressor(level=level)
164 192 ref_frame = refctx.compress(original)
165 193
166 194 cctx = zstd.ZstdCompressor(level=level)
167 with cctx.stream_reader(original, size=len(original),
168 read_size=source_read_size) as reader:
195 with cctx.stream_reader(
196 original, size=len(original), read_size=source_read_size
197 ) as reader:
169 198 chunks = []
170 199 while True:
171 200 b = bytearray(read_size)
@@ -176,22 +205,30 class TestCompressor_stream_reader_fuzzi
176 205
177 206 chunks.append(bytes(b[0:count]))
178 207
179 self.assertEqual(b''.join(chunks), ref_frame)
208 self.assertEqual(b"".join(chunks), ref_frame)
180 209
181 210 @hypothesis.settings(
182 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
183 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
184 level=strategies.integers(min_value=1, max_value=5),
185 source_read_size=strategies.integers(1, 16384),
186 read_sizes=strategies.data())
187 def test_stream_source_readinto_variance(self, original, level,
188 source_read_size, read_sizes):
211 suppress_health_check=[
212 hypothesis.HealthCheck.large_base_example,
213 hypothesis.HealthCheck.too_slow,
214 ]
215 )
216 @hypothesis.given(
217 original=strategies.sampled_from(random_input_data()),
218 level=strategies.integers(min_value=1, max_value=5),
219 source_read_size=strategies.integers(1, 16384),
220 read_sizes=strategies.data(),
221 )
222 def test_stream_source_readinto_variance(
223 self, original, level, source_read_size, read_sizes
224 ):
189 225 refctx = zstd.ZstdCompressor(level=level)
190 226 ref_frame = refctx.compress(original)
191 227
192 228 cctx = zstd.ZstdCompressor(level=level)
193 with cctx.stream_reader(io.BytesIO(original), size=len(original),
194 read_size=source_read_size) as reader:
229 with cctx.stream_reader(
230 io.BytesIO(original), size=len(original), read_size=source_read_size
231 ) as reader:
195 232 chunks = []
196 233 while True:
197 234 read_size = read_sizes.draw(strategies.integers(1, 16384))
@@ -203,23 +240,31 class TestCompressor_stream_reader_fuzzi
203 240
204 241 chunks.append(bytes(b[0:count]))
205 242
206 self.assertEqual(b''.join(chunks), ref_frame)
243 self.assertEqual(b"".join(chunks), ref_frame)
207 244
208 245 @hypothesis.settings(
209 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
210 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
211 level=strategies.integers(min_value=1, max_value=5),
212 source_read_size=strategies.integers(1, 16384),
213 read_sizes=strategies.data())
214 def test_buffer_source_readinto_variance(self, original, level,
215 source_read_size, read_sizes):
246 suppress_health_check=[
247 hypothesis.HealthCheck.large_base_example,
248 hypothesis.HealthCheck.too_slow,
249 ]
250 )
251 @hypothesis.given(
252 original=strategies.sampled_from(random_input_data()),
253 level=strategies.integers(min_value=1, max_value=5),
254 source_read_size=strategies.integers(1, 16384),
255 read_sizes=strategies.data(),
256 )
257 def test_buffer_source_readinto_variance(
258 self, original, level, source_read_size, read_sizes
259 ):
216 260
217 261 refctx = zstd.ZstdCompressor(level=level)
218 262 ref_frame = refctx.compress(original)
219 263
220 264 cctx = zstd.ZstdCompressor(level=level)
221 with cctx.stream_reader(original, size=len(original),
222 read_size=source_read_size) as reader:
265 with cctx.stream_reader(
266 original, size=len(original), read_size=source_read_size
267 ) as reader:
223 268 chunks = []
224 269 while True:
225 270 read_size = read_sizes.draw(strategies.integers(1, 16384))
@@ -231,16 +276,18 class TestCompressor_stream_reader_fuzzi
231 276
232 277 chunks.append(bytes(b[0:count]))
233 278
234 self.assertEqual(b''.join(chunks), ref_frame)
279 self.assertEqual(b"".join(chunks), ref_frame)
235 280
236 281 @hypothesis.settings(
237 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
238 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
239 level=strategies.integers(min_value=1, max_value=5),
240 source_read_size=strategies.integers(1, 16384),
241 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
242 def test_stream_source_read1(self, original, level, source_read_size,
243 read_size):
282 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
283 )
284 @hypothesis.given(
285 original=strategies.sampled_from(random_input_data()),
286 level=strategies.integers(min_value=1, max_value=5),
287 source_read_size=strategies.integers(1, 16384),
288 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
289 )
290 def test_stream_source_read1(self, original, level, source_read_size, read_size):
244 291 if read_size == 0:
245 292 read_size = -1
246 293
@@ -248,8 +295,9 class TestCompressor_stream_reader_fuzzi
248 295 ref_frame = refctx.compress(original)
249 296
250 297 cctx = zstd.ZstdCompressor(level=level)
251 with cctx.stream_reader(io.BytesIO(original), size=len(original),
252 read_size=source_read_size) as reader:
298 with cctx.stream_reader(
299 io.BytesIO(original), size=len(original), read_size=source_read_size
300 ) as reader:
253 301 chunks = []
254 302 while True:
255 303 chunk = reader.read1(read_size)
@@ -258,16 +306,18 class TestCompressor_stream_reader_fuzzi
258 306
259 307 chunks.append(chunk)
260 308
261 self.assertEqual(b''.join(chunks), ref_frame)
309 self.assertEqual(b"".join(chunks), ref_frame)
262 310
263 311 @hypothesis.settings(
264 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
265 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
266 level=strategies.integers(min_value=1, max_value=5),
267 source_read_size=strategies.integers(1, 16384),
268 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
269 def test_buffer_source_read1(self, original, level, source_read_size,
270 read_size):
312 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
313 )
314 @hypothesis.given(
315 original=strategies.sampled_from(random_input_data()),
316 level=strategies.integers(min_value=1, max_value=5),
317 source_read_size=strategies.integers(1, 16384),
318 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
319 )
320 def test_buffer_source_read1(self, original, level, source_read_size, read_size):
271 321 if read_size == 0:
272 322 read_size = -1
273 323
@@ -275,8 +325,9 class TestCompressor_stream_reader_fuzzi
275 325 ref_frame = refctx.compress(original)
276 326
277 327 cctx = zstd.ZstdCompressor(level=level)
278 with cctx.stream_reader(original, size=len(original),
279 read_size=source_read_size) as reader:
328 with cctx.stream_reader(
329 original, size=len(original), read_size=source_read_size
330 ) as reader:
280 331 chunks = []
281 332 while True:
282 333 chunk = reader.read1(read_size)
@@ -285,22 +336,30 class TestCompressor_stream_reader_fuzzi
285 336
286 337 chunks.append(chunk)
287 338
288 self.assertEqual(b''.join(chunks), ref_frame)
339 self.assertEqual(b"".join(chunks), ref_frame)
289 340
290 341 @hypothesis.settings(
291 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
292 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
293 level=strategies.integers(min_value=1, max_value=5),
294 source_read_size=strategies.integers(1, 16384),
295 read_sizes=strategies.data())
296 def test_stream_source_read1_variance(self, original, level, source_read_size,
297 read_sizes):
342 suppress_health_check=[
343 hypothesis.HealthCheck.large_base_example,
344 hypothesis.HealthCheck.too_slow,
345 ]
346 )
347 @hypothesis.given(
348 original=strategies.sampled_from(random_input_data()),
349 level=strategies.integers(min_value=1, max_value=5),
350 source_read_size=strategies.integers(1, 16384),
351 read_sizes=strategies.data(),
352 )
353 def test_stream_source_read1_variance(
354 self, original, level, source_read_size, read_sizes
355 ):
298 356 refctx = zstd.ZstdCompressor(level=level)
299 357 ref_frame = refctx.compress(original)
300 358
301 359 cctx = zstd.ZstdCompressor(level=level)
302 with cctx.stream_reader(io.BytesIO(original), size=len(original),
303 read_size=source_read_size) as reader:
360 with cctx.stream_reader(
361 io.BytesIO(original), size=len(original), read_size=source_read_size
362 ) as reader:
304 363 chunks = []
305 364 while True:
306 365 read_size = read_sizes.draw(strategies.integers(-1, 16384))
@@ -310,23 +369,31 class TestCompressor_stream_reader_fuzzi
310 369
311 370 chunks.append(chunk)
312 371
313 self.assertEqual(b''.join(chunks), ref_frame)
372 self.assertEqual(b"".join(chunks), ref_frame)
314 373
315 374 @hypothesis.settings(
316 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
317 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
318 level=strategies.integers(min_value=1, max_value=5),
319 source_read_size=strategies.integers(1, 16384),
320 read_sizes=strategies.data())
321 def test_buffer_source_read1_variance(self, original, level, source_read_size,
322 read_sizes):
375 suppress_health_check=[
376 hypothesis.HealthCheck.large_base_example,
377 hypothesis.HealthCheck.too_slow,
378 ]
379 )
380 @hypothesis.given(
381 original=strategies.sampled_from(random_input_data()),
382 level=strategies.integers(min_value=1, max_value=5),
383 source_read_size=strategies.integers(1, 16384),
384 read_sizes=strategies.data(),
385 )
386 def test_buffer_source_read1_variance(
387 self, original, level, source_read_size, read_sizes
388 ):
323 389
324 390 refctx = zstd.ZstdCompressor(level=level)
325 391 ref_frame = refctx.compress(original)
326 392
327 393 cctx = zstd.ZstdCompressor(level=level)
328 with cctx.stream_reader(original, size=len(original),
329 read_size=source_read_size) as reader:
394 with cctx.stream_reader(
395 original, size=len(original), read_size=source_read_size
396 ) as reader:
330 397 chunks = []
331 398 while True:
332 399 read_size = read_sizes.draw(strategies.integers(-1, 16384))
@@ -336,17 +403,20 class TestCompressor_stream_reader_fuzzi
336 403
337 404 chunks.append(chunk)
338 405
339 self.assertEqual(b''.join(chunks), ref_frame)
340
406 self.assertEqual(b"".join(chunks), ref_frame)
341 407
342 408 @hypothesis.settings(
343 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
344 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
345 level=strategies.integers(min_value=1, max_value=5),
346 source_read_size=strategies.integers(1, 16384),
347 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
348 def test_stream_source_readinto1(self, original, level, source_read_size,
349 read_size):
409 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
410 )
411 @hypothesis.given(
412 original=strategies.sampled_from(random_input_data()),
413 level=strategies.integers(min_value=1, max_value=5),
414 source_read_size=strategies.integers(1, 16384),
415 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
416 )
417 def test_stream_source_readinto1(
418 self, original, level, source_read_size, read_size
419 ):
350 420 if read_size == 0:
351 421 read_size = -1
352 422
@@ -354,8 +424,9 class TestCompressor_stream_reader_fuzzi
354 424 ref_frame = refctx.compress(original)
355 425
356 426 cctx = zstd.ZstdCompressor(level=level)
357 with cctx.stream_reader(io.BytesIO(original), size=len(original),
358 read_size=source_read_size) as reader:
427 with cctx.stream_reader(
428 io.BytesIO(original), size=len(original), read_size=source_read_size
429 ) as reader:
359 430 chunks = []
360 431 while True:
361 432 b = bytearray(read_size)
@@ -366,16 +437,20 class TestCompressor_stream_reader_fuzzi
366 437
367 438 chunks.append(bytes(b[0:count]))
368 439
369 self.assertEqual(b''.join(chunks), ref_frame)
440 self.assertEqual(b"".join(chunks), ref_frame)
370 441
371 442 @hypothesis.settings(
372 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
373 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
374 level=strategies.integers(min_value=1, max_value=5),
375 source_read_size=strategies.integers(1, 16384),
376 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
377 def test_buffer_source_readinto1(self, original, level, source_read_size,
378 read_size):
443 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
444 )
445 @hypothesis.given(
446 original=strategies.sampled_from(random_input_data()),
447 level=strategies.integers(min_value=1, max_value=5),
448 source_read_size=strategies.integers(1, 16384),
449 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
450 )
451 def test_buffer_source_readinto1(
452 self, original, level, source_read_size, read_size
453 ):
379 454 if read_size == 0:
380 455 read_size = -1
381 456
@@ -383,8 +458,9 class TestCompressor_stream_reader_fuzzi
383 458 ref_frame = refctx.compress(original)
384 459
385 460 cctx = zstd.ZstdCompressor(level=level)
386 with cctx.stream_reader(original, size=len(original),
387 read_size=source_read_size) as reader:
461 with cctx.stream_reader(
462 original, size=len(original), read_size=source_read_size
463 ) as reader:
388 464 chunks = []
389 465 while True:
390 466 b = bytearray(read_size)
@@ -395,22 +471,30 class TestCompressor_stream_reader_fuzzi
395 471
396 472 chunks.append(bytes(b[0:count]))
397 473
398 self.assertEqual(b''.join(chunks), ref_frame)
474 self.assertEqual(b"".join(chunks), ref_frame)
399 475
400 476 @hypothesis.settings(
401 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
402 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
403 level=strategies.integers(min_value=1, max_value=5),
404 source_read_size=strategies.integers(1, 16384),
405 read_sizes=strategies.data())
406 def test_stream_source_readinto1_variance(self, original, level, source_read_size,
407 read_sizes):
477 suppress_health_check=[
478 hypothesis.HealthCheck.large_base_example,
479 hypothesis.HealthCheck.too_slow,
480 ]
481 )
482 @hypothesis.given(
483 original=strategies.sampled_from(random_input_data()),
484 level=strategies.integers(min_value=1, max_value=5),
485 source_read_size=strategies.integers(1, 16384),
486 read_sizes=strategies.data(),
487 )
488 def test_stream_source_readinto1_variance(
489 self, original, level, source_read_size, read_sizes
490 ):
408 491 refctx = zstd.ZstdCompressor(level=level)
409 492 ref_frame = refctx.compress(original)
410 493
411 494 cctx = zstd.ZstdCompressor(level=level)
412 with cctx.stream_reader(io.BytesIO(original), size=len(original),
413 read_size=source_read_size) as reader:
495 with cctx.stream_reader(
496 io.BytesIO(original), size=len(original), read_size=source_read_size
497 ) as reader:
414 498 chunks = []
415 499 while True:
416 500 read_size = read_sizes.draw(strategies.integers(1, 16384))
@@ -422,23 +506,31 class TestCompressor_stream_reader_fuzzi
422 506
423 507 chunks.append(bytes(b[0:count]))
424 508
425 self.assertEqual(b''.join(chunks), ref_frame)
509 self.assertEqual(b"".join(chunks), ref_frame)
426 510
427 511 @hypothesis.settings(
428 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
429 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
430 level=strategies.integers(min_value=1, max_value=5),
431 source_read_size=strategies.integers(1, 16384),
432 read_sizes=strategies.data())
433 def test_buffer_source_readinto1_variance(self, original, level, source_read_size,
434 read_sizes):
512 suppress_health_check=[
513 hypothesis.HealthCheck.large_base_example,
514 hypothesis.HealthCheck.too_slow,
515 ]
516 )
517 @hypothesis.given(
518 original=strategies.sampled_from(random_input_data()),
519 level=strategies.integers(min_value=1, max_value=5),
520 source_read_size=strategies.integers(1, 16384),
521 read_sizes=strategies.data(),
522 )
523 def test_buffer_source_readinto1_variance(
524 self, original, level, source_read_size, read_sizes
525 ):
435 526
436 527 refctx = zstd.ZstdCompressor(level=level)
437 528 ref_frame = refctx.compress(original)
438 529
439 530 cctx = zstd.ZstdCompressor(level=level)
440 with cctx.stream_reader(original, size=len(original),
441 read_size=source_read_size) as reader:
531 with cctx.stream_reader(
532 original, size=len(original), read_size=source_read_size
533 ) as reader:
442 534 chunks = []
443 535 while True:
444 536 read_size = read_sizes.draw(strategies.integers(1, 16384))
@@ -450,35 +542,40 class TestCompressor_stream_reader_fuzzi
450 542
451 543 chunks.append(bytes(b[0:count]))
452 544
453 self.assertEqual(b''.join(chunks), ref_frame)
454
545 self.assertEqual(b"".join(chunks), ref_frame)
455 546
456 547
457 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
548 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
458 549 @make_cffi
459 class TestCompressor_stream_writer_fuzzing(unittest.TestCase):
460 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
461 level=strategies.integers(min_value=1, max_value=5),
462 write_size=strategies.integers(min_value=1, max_value=1048576))
550 class TestCompressor_stream_writer_fuzzing(TestCase):
551 @hypothesis.given(
552 original=strategies.sampled_from(random_input_data()),
553 level=strategies.integers(min_value=1, max_value=5),
554 write_size=strategies.integers(min_value=1, max_value=1048576),
555 )
463 556 def test_write_size_variance(self, original, level, write_size):
464 557 refctx = zstd.ZstdCompressor(level=level)
465 558 ref_frame = refctx.compress(original)
466 559
467 560 cctx = zstd.ZstdCompressor(level=level)
468 561 b = NonClosingBytesIO()
469 with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
562 with cctx.stream_writer(
563 b, size=len(original), write_size=write_size
564 ) as compressor:
470 565 compressor.write(original)
471 566
472 567 self.assertEqual(b.getvalue(), ref_frame)
473 568
474 569
475 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
570 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
476 571 @make_cffi
477 class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
478 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
479 level=strategies.integers(min_value=1, max_value=5),
480 read_size=strategies.integers(min_value=1, max_value=1048576),
481 write_size=strategies.integers(min_value=1, max_value=1048576))
572 class TestCompressor_copy_stream_fuzzing(TestCase):
573 @hypothesis.given(
574 original=strategies.sampled_from(random_input_data()),
575 level=strategies.integers(min_value=1, max_value=5),
576 read_size=strategies.integers(min_value=1, max_value=1048576),
577 write_size=strategies.integers(min_value=1, max_value=1048576),
578 )
482 579 def test_read_write_size_variance(self, original, level, read_size, write_size):
483 580 refctx = zstd.ZstdCompressor(level=level)
484 581 ref_frame = refctx.compress(original)
@@ -487,20 +584,27 class TestCompressor_copy_stream_fuzzing
487 584 source = io.BytesIO(original)
488 585 dest = io.BytesIO()
489 586
490 cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
491 write_size=write_size)
587 cctx.copy_stream(
588 source, dest, size=len(original), read_size=read_size, write_size=write_size
589 )
492 590
493 591 self.assertEqual(dest.getvalue(), ref_frame)
494 592
495 593
496 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
594 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
497 595 @make_cffi
498 class TestCompressor_compressobj_fuzzing(unittest.TestCase):
596 class TestCompressor_compressobj_fuzzing(TestCase):
499 597 @hypothesis.settings(
500 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
501 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
502 level=strategies.integers(min_value=1, max_value=5),
503 chunk_sizes=strategies.data())
598 suppress_health_check=[
599 hypothesis.HealthCheck.large_base_example,
600 hypothesis.HealthCheck.too_slow,
601 ]
602 )
603 @hypothesis.given(
604 original=strategies.sampled_from(random_input_data()),
605 level=strategies.integers(min_value=1, max_value=5),
606 chunk_sizes=strategies.data(),
607 )
504 608 def test_random_input_sizes(self, original, level, chunk_sizes):
505 609 refctx = zstd.ZstdCompressor(level=level)
506 610 ref_frame = refctx.compress(original)
@@ -512,7 +616,7 class TestCompressor_compressobj_fuzzing
512 616 i = 0
513 617 while True:
514 618 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
515 source = original[i:i + chunk_size]
619 source = original[i : i + chunk_size]
516 620 if not source:
517 621 break
518 622
@@ -521,14 +625,20 class TestCompressor_compressobj_fuzzing
521 625
522 626 chunks.append(cobj.flush())
523 627
524 self.assertEqual(b''.join(chunks), ref_frame)
628 self.assertEqual(b"".join(chunks), ref_frame)
525 629
526 630 @hypothesis.settings(
527 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
528 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
529 level=strategies.integers(min_value=1, max_value=5),
530 chunk_sizes=strategies.data(),
531 flushes=strategies.data())
631 suppress_health_check=[
632 hypothesis.HealthCheck.large_base_example,
633 hypothesis.HealthCheck.too_slow,
634 ]
635 )
636 @hypothesis.given(
637 original=strategies.sampled_from(random_input_data()),
638 level=strategies.integers(min_value=1, max_value=5),
639 chunk_sizes=strategies.data(),
640 flushes=strategies.data(),
641 )
532 642 def test_flush_block(self, original, level, chunk_sizes, flushes):
533 643 cctx = zstd.ZstdCompressor(level=level)
534 644 cobj = cctx.compressobj()
@@ -541,7 +651,7 class TestCompressor_compressobj_fuzzing
541 651 i = 0
542 652 while True:
543 653 input_size = chunk_sizes.draw(strategies.integers(1, 4096))
544 source = original[i:i + input_size]
654 source = original[i : i + input_size]
545 655 if not source:
546 656 break
547 657
@@ -558,24 +668,28 class TestCompressor_compressobj_fuzzing
558 668 compressed_chunks.append(chunk)
559 669 decompressed_chunks.append(dobj.decompress(chunk))
560 670
561 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
671 self.assertEqual(b"".join(decompressed_chunks), original[0:i])
562 672
563 673 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
564 674 compressed_chunks.append(chunk)
565 675 decompressed_chunks.append(dobj.decompress(chunk))
566 676
567 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
568 max_output_size=len(original)),
569 original)
570 self.assertEqual(b''.join(decompressed_chunks), original)
677 self.assertEqual(
678 dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)),
679 original,
680 )
681 self.assertEqual(b"".join(decompressed_chunks), original)
682
571 683
572 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
684 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
573 685 @make_cffi
574 class TestCompressor_read_to_iter_fuzzing(unittest.TestCase):
575 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
576 level=strategies.integers(min_value=1, max_value=5),
577 read_size=strategies.integers(min_value=1, max_value=4096),
578 write_size=strategies.integers(min_value=1, max_value=4096))
686 class TestCompressor_read_to_iter_fuzzing(TestCase):
687 @hypothesis.given(
688 original=strategies.sampled_from(random_input_data()),
689 level=strategies.integers(min_value=1, max_value=5),
690 read_size=strategies.integers(min_value=1, max_value=4096),
691 write_size=strategies.integers(min_value=1, max_value=4096),
692 )
579 693 def test_read_write_size_variance(self, original, level, read_size, write_size):
580 694 refcctx = zstd.ZstdCompressor(level=level)
581 695 ref_frame = refcctx.compress(original)
@@ -583,32 +697,35 class TestCompressor_read_to_iter_fuzzin
583 697 source = io.BytesIO(original)
584 698
585 699 cctx = zstd.ZstdCompressor(level=level)
586 chunks = list(cctx.read_to_iter(source, size=len(original),
587 read_size=read_size,
588 write_size=write_size))
700 chunks = list(
701 cctx.read_to_iter(
702 source, size=len(original), read_size=read_size, write_size=write_size
703 )
704 )
589 705
590 self.assertEqual(b''.join(chunks), ref_frame)
706 self.assertEqual(b"".join(chunks), ref_frame)
591 707
592 708
593 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
594 class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
595 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
596 min_size=1, max_size=1024),
597 threads=strategies.integers(min_value=1, max_value=8),
598 use_dict=strategies.booleans())
709 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
710 class TestCompressor_multi_compress_to_buffer_fuzzing(TestCase):
711 @hypothesis.given(
712 original=strategies.lists(
713 strategies.sampled_from(random_input_data()), min_size=1, max_size=1024
714 ),
715 threads=strategies.integers(min_value=1, max_value=8),
716 use_dict=strategies.booleans(),
717 )
599 718 def test_data_equivalence(self, original, threads, use_dict):
600 719 kwargs = {}
601 720
602 721 # Use a content dictionary because it is cheap to create.
603 722 if use_dict:
604 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
723 kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])
605 724
606 cctx = zstd.ZstdCompressor(level=1,
607 write_checksum=True,
608 **kwargs)
725 cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs)
609 726
610 if not hasattr(cctx, 'multi_compress_to_buffer'):
611 self.skipTest('multi_compress_to_buffer not available')
727 if not hasattr(cctx, "multi_compress_to_buffer"):
728 self.skipTest("multi_compress_to_buffer not available")
612 729
613 730 result = cctx.multi_compress_to_buffer(original, threads=-1)
614 731
@@ -624,17 +741,21 class TestCompressor_multi_compress_to_b
624 741 self.assertEqual(dctx.decompress(frame), original[i])
625 742
626 743
627 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
744 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
628 745 @make_cffi
629 class TestCompressor_chunker_fuzzing(unittest.TestCase):
746 class TestCompressor_chunker_fuzzing(TestCase):
630 747 @hypothesis.settings(
631 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
632 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
633 level=strategies.integers(min_value=1, max_value=5),
634 chunk_size=strategies.integers(
635 min_value=1,
636 max_value=32 * 1048576),
637 input_sizes=strategies.data())
748 suppress_health_check=[
749 hypothesis.HealthCheck.large_base_example,
750 hypothesis.HealthCheck.too_slow,
751 ]
752 )
753 @hypothesis.given(
754 original=strategies.sampled_from(random_input_data()),
755 level=strategies.integers(min_value=1, max_value=5),
756 chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576),
757 input_sizes=strategies.data(),
758 )
638 759 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
639 760 cctx = zstd.ZstdCompressor(level=level)
640 761 chunker = cctx.chunker(chunk_size=chunk_size)
@@ -643,7 +764,7 class TestCompressor_chunker_fuzzing(uni
643 764 i = 0
644 765 while True:
645 766 input_size = input_sizes.draw(strategies.integers(1, 4096))
646 source = original[i:i + input_size]
767 source = original[i : i + input_size]
647 768 if not source:
648 769 break
649 770
@@ -654,23 +775,26 class TestCompressor_chunker_fuzzing(uni
654 775
655 776 dctx = zstd.ZstdDecompressor()
656 777
657 self.assertEqual(dctx.decompress(b''.join(chunks),
658 max_output_size=len(original)),
659 original)
778 self.assertEqual(
779 dctx.decompress(b"".join(chunks), max_output_size=len(original)), original
780 )
660 781
661 782 self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
662 783
663 784 @hypothesis.settings(
664 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
665 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
666 level=strategies.integers(min_value=1, max_value=5),
667 chunk_size=strategies.integers(
668 min_value=1,
669 max_value=32 * 1048576),
670 input_sizes=strategies.data(),
671 flushes=strategies.data())
672 def test_flush_block(self, original, level, chunk_size, input_sizes,
673 flushes):
785 suppress_health_check=[
786 hypothesis.HealthCheck.large_base_example,
787 hypothesis.HealthCheck.too_slow,
788 ]
789 )
790 @hypothesis.given(
791 original=strategies.sampled_from(random_input_data()),
792 level=strategies.integers(min_value=1, max_value=5),
793 chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576),
794 input_sizes=strategies.data(),
795 flushes=strategies.data(),
796 )
797 def test_flush_block(self, original, level, chunk_size, input_sizes, flushes):
674 798 cctx = zstd.ZstdCompressor(level=level)
675 799 chunker = cctx.chunker(chunk_size=chunk_size)
676 800
@@ -682,7 +806,7 class TestCompressor_chunker_fuzzing(uni
682 806 i = 0
683 807 while True:
684 808 input_size = input_sizes.draw(strategies.integers(1, 4096))
685 source = original[i:i + input_size]
809 source = original[i : i + input_size]
686 810 if not source:
687 811 break
688 812
@@ -690,22 +814,23 class TestCompressor_chunker_fuzzing(uni
690 814
691 815 chunks = list(chunker.compress(source))
692 816 compressed_chunks.extend(chunks)
693 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
817 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
694 818
695 819 if not flushes.draw(strategies.booleans()):
696 820 continue
697 821
698 822 chunks = list(chunker.flush())
699 823 compressed_chunks.extend(chunks)
700 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
824 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
701 825
702 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
826 self.assertEqual(b"".join(decompressed_chunks), original[0:i])
703 827
704 828 chunks = list(chunker.finish())
705 829 compressed_chunks.extend(chunks)
706 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
830 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
707 831
708 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
709 max_output_size=len(original)),
710 original)
711 self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file
832 self.assertEqual(
833 dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)),
834 original,
835 )
836 self.assertEqual(b"".join(decompressed_chunks), original)
@@ -3,29 +3,34 import unittest
3 3
4 4 import zstandard as zstd
5 5
6 from . common import (
6 from .common import (
7 7 make_cffi,
8 TestCase,
8 9 )
9 10
10 11
11 12 @make_cffi
12 class TestCompressionParameters(unittest.TestCase):
13 class TestCompressionParameters(TestCase):
13 14 def test_bounds(self):
14 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
15 chain_log=zstd.CHAINLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN,
18 min_match=zstd.MINMATCH_MIN + 1,
19 target_length=zstd.TARGETLENGTH_MIN,
20 strategy=zstd.STRATEGY_FAST)
15 zstd.ZstdCompressionParameters(
16 window_log=zstd.WINDOWLOG_MIN,
17 chain_log=zstd.CHAINLOG_MIN,
18 hash_log=zstd.HASHLOG_MIN,
19 search_log=zstd.SEARCHLOG_MIN,
20 min_match=zstd.MINMATCH_MIN + 1,
21 target_length=zstd.TARGETLENGTH_MIN,
22 strategy=zstd.STRATEGY_FAST,
23 )
21 24
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
23 chain_log=zstd.CHAINLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
26 min_match=zstd.MINMATCH_MAX - 1,
27 target_length=zstd.TARGETLENGTH_MAX,
28 strategy=zstd.STRATEGY_BTULTRA2)
25 zstd.ZstdCompressionParameters(
26 window_log=zstd.WINDOWLOG_MAX,
27 chain_log=zstd.CHAINLOG_MAX,
28 hash_log=zstd.HASHLOG_MAX,
29 search_log=zstd.SEARCHLOG_MAX,
30 min_match=zstd.MINMATCH_MAX - 1,
31 target_length=zstd.TARGETLENGTH_MAX,
32 strategy=zstd.STRATEGY_BTULTRA2,
33 )
29 34
30 35 def test_from_level(self):
31 36 p = zstd.ZstdCompressionParameters.from_level(1)
@@ -37,13 +42,15 class TestCompressionParameters(unittest
37 42 self.assertEqual(p.window_log, 19)
38 43
39 44 def test_members(self):
40 p = zstd.ZstdCompressionParameters(window_log=10,
41 chain_log=6,
42 hash_log=7,
43 search_log=4,
44 min_match=5,
45 target_length=8,
46 strategy=1)
45 p = zstd.ZstdCompressionParameters(
46 window_log=10,
47 chain_log=6,
48 hash_log=7,
49 search_log=4,
50 min_match=5,
51 target_length=8,
52 strategy=1,
53 )
47 54 self.assertEqual(p.window_log, 10)
48 55 self.assertEqual(p.chain_log, 6)
49 56 self.assertEqual(p.hash_log, 7)
@@ -58,8 +65,7 class TestCompressionParameters(unittest
58 65 p = zstd.ZstdCompressionParameters(threads=4)
59 66 self.assertEqual(p.threads, 4)
60 67
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
62 overlap_log=6)
68 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6)
63 69 self.assertEqual(p.threads, 2)
64 70 self.assertEqual(p.job_size, 1048576)
65 71 self.assertEqual(p.overlap_log, 6)
@@ -91,20 +97,25 class TestCompressionParameters(unittest
91 97 self.assertEqual(p.ldm_hash_rate_log, 8)
92 98
93 99 def test_estimated_compression_context_size(self):
94 p = zstd.ZstdCompressionParameters(window_log=20,
95 chain_log=16,
96 hash_log=17,
97 search_log=1,
98 min_match=5,
99 target_length=16,
100 strategy=zstd.STRATEGY_DFAST)
100 p = zstd.ZstdCompressionParameters(
101 window_log=20,
102 chain_log=16,
103 hash_log=17,
104 search_log=1,
105 min_match=5,
106 target_length=16,
107 strategy=zstd.STRATEGY_DFAST,
108 )
101 109
102 110 # 32-bit has slightly different values from 64-bit.
103 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144,
104 delta=250)
111 self.assertAlmostEqual(
112 p.estimated_compression_context_size(), 1294464, delta=400
113 )
105 114
106 115 def test_strategy(self):
107 with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'):
116 with self.assertRaisesRegex(
117 ValueError, "cannot specify both compression_strategy"
118 ):
108 119 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
109 120
110 121 p = zstd.ZstdCompressionParameters(strategy=2)
@@ -114,7 +125,9 class TestCompressionParameters(unittest
114 125 self.assertEqual(p.compression_strategy, 3)
115 126
116 127 def test_ldm_hash_rate_log(self):
117 with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'):
128 with self.assertRaisesRegex(
129 ValueError, "cannot specify both ldm_hash_rate_log"
130 ):
118 131 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
119 132
120 133 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
@@ -124,7 +137,7 class TestCompressionParameters(unittest
124 137 self.assertEqual(p.ldm_hash_every_log, 16)
125 138
126 139 def test_overlap_log(self):
127 with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'):
140 with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"):
128 141 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
129 142
130 143 p = zstd.ZstdCompressionParameters(overlap_log=2)
@@ -137,7 +150,7 class TestCompressionParameters(unittest
137 150
138 151
139 152 @make_cffi
140 class TestFrameParameters(unittest.TestCase):
153 class TestFrameParameters(TestCase):
141 154 def test_invalid_type(self):
142 155 with self.assertRaises(TypeError):
143 156 zstd.get_frame_parameters(None)
@@ -145,71 +158,71 class TestFrameParameters(unittest.TestC
145 158 # Python 3 doesn't appear to convert unicode to Py_buffer.
146 159 if sys.version_info[0] >= 3:
147 160 with self.assertRaises(TypeError):
148 zstd.get_frame_parameters(u'foobarbaz')
161 zstd.get_frame_parameters(u"foobarbaz")
149 162 else:
150 163 # CPython will convert unicode to Py_buffer. But CFFI won't.
151 if zstd.backend == 'cffi':
164 if zstd.backend == "cffi":
152 165 with self.assertRaises(TypeError):
153 zstd.get_frame_parameters(u'foobarbaz')
166 zstd.get_frame_parameters(u"foobarbaz")
154 167 else:
155 168 with self.assertRaises(zstd.ZstdError):
156 zstd.get_frame_parameters(u'foobarbaz')
169 zstd.get_frame_parameters(u"foobarbaz")
157 170
158 171 def test_invalid_input_sizes(self):
159 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
160 zstd.get_frame_parameters(b'')
172 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
173 zstd.get_frame_parameters(b"")
161 174
162 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
175 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
163 176 zstd.get_frame_parameters(zstd.FRAME_HEADER)
164 177
165 178 def test_invalid_frame(self):
166 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
167 zstd.get_frame_parameters(b'foobarbaz')
179 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
180 zstd.get_frame_parameters(b"foobarbaz")
168 181
169 182 def test_attributes(self):
170 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
183 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00")
171 184 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
172 185 self.assertEqual(params.window_size, 1024)
173 186 self.assertEqual(params.dict_id, 0)
174 187 self.assertFalse(params.has_checksum)
175 188
176 189 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
177 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
190 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff")
178 191 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
179 192 self.assertEqual(params.window_size, 1024)
180 193 self.assertEqual(params.dict_id, 255)
181 194 self.assertFalse(params.has_checksum)
182 195
183 196 # Lowest 3rd bit indicates if checksum is present.
184 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
197 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00")
185 198 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
186 199 self.assertEqual(params.window_size, 1024)
187 200 self.assertEqual(params.dict_id, 0)
188 201 self.assertTrue(params.has_checksum)
189 202
190 203 # Upper 2 bits indicate content size.
191 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
204 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00")
192 205 self.assertEqual(params.content_size, 511)
193 206 self.assertEqual(params.window_size, 1024)
194 207 self.assertEqual(params.dict_id, 0)
195 208 self.assertFalse(params.has_checksum)
196 209
197 210 # Window descriptor is 2nd byte after frame header.
198 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
211 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40")
199 212 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
200 213 self.assertEqual(params.window_size, 262144)
201 214 self.assertEqual(params.dict_id, 0)
202 215 self.assertFalse(params.has_checksum)
203 216
204 217 # Set multiple things.
205 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
218 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00")
206 219 self.assertEqual(params.content_size, 272)
207 220 self.assertEqual(params.window_size, 262144)
208 221 self.assertEqual(params.dict_id, 15)
209 222 self.assertTrue(params.has_checksum)
210 223
211 224 def test_input_types(self):
212 v = zstd.FRAME_HEADER + b'\x00\x00'
225 v = zstd.FRAME_HEADER + b"\x00\x00"
213 226
214 227 mutable_array = bytearray(len(v))
215 228 mutable_array[:] = v
@@ -7,70 +7,99 try:
7 7 import hypothesis
8 8 import hypothesis.strategies as strategies
9 9 except ImportError:
10 raise unittest.SkipTest('hypothesis not available')
10 raise unittest.SkipTest("hypothesis not available")
11 11
12 12 import zstandard as zstd
13 13
14 14 from .common import (
15 15 make_cffi,
16 TestCase,
17 )
18
19
20 s_windowlog = strategies.integers(
21 min_value=zstd.WINDOWLOG_MIN, max_value=zstd.WINDOWLOG_MAX
22 )
23 s_chainlog = strategies.integers(
24 min_value=zstd.CHAINLOG_MIN, max_value=zstd.CHAINLOG_MAX
25 )
26 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, max_value=zstd.HASHLOG_MAX)
27 s_searchlog = strategies.integers(
28 min_value=zstd.SEARCHLOG_MIN, max_value=zstd.SEARCHLOG_MAX
29 )
30 s_minmatch = strategies.integers(
31 min_value=zstd.MINMATCH_MIN, max_value=zstd.MINMATCH_MAX
32 )
33 s_targetlength = strategies.integers(
34 min_value=zstd.TARGETLENGTH_MIN, max_value=zstd.TARGETLENGTH_MAX
35 )
36 s_strategy = strategies.sampled_from(
37 (
38 zstd.STRATEGY_FAST,
39 zstd.STRATEGY_DFAST,
40 zstd.STRATEGY_GREEDY,
41 zstd.STRATEGY_LAZY,
42 zstd.STRATEGY_LAZY2,
43 zstd.STRATEGY_BTLAZY2,
44 zstd.STRATEGY_BTOPT,
45 zstd.STRATEGY_BTULTRA,
46 zstd.STRATEGY_BTULTRA2,
47 )
16 48 )
17 49
18 50
19 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
20 max_value=zstd.WINDOWLOG_MAX)
21 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
22 max_value=zstd.CHAINLOG_MAX)
23 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
24 max_value=zstd.HASHLOG_MAX)
25 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
26 max_value=zstd.SEARCHLOG_MAX)
27 s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN,
28 max_value=zstd.MINMATCH_MAX)
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
30 max_value=zstd.TARGETLENGTH_MAX)
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
32 zstd.STRATEGY_DFAST,
33 zstd.STRATEGY_GREEDY,
34 zstd.STRATEGY_LAZY,
35 zstd.STRATEGY_LAZY2,
36 zstd.STRATEGY_BTLAZY2,
37 zstd.STRATEGY_BTOPT,
38 zstd.STRATEGY_BTULTRA,
39 zstd.STRATEGY_BTULTRA2))
40
51 @make_cffi
52 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
53 class TestCompressionParametersHypothesis(TestCase):
54 @hypothesis.given(
55 s_windowlog,
56 s_chainlog,
57 s_hashlog,
58 s_searchlog,
59 s_minmatch,
60 s_targetlength,
61 s_strategy,
62 )
63 def test_valid_init(
64 self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy
65 ):
66 zstd.ZstdCompressionParameters(
67 window_log=windowlog,
68 chain_log=chainlog,
69 hash_log=hashlog,
70 search_log=searchlog,
71 min_match=minmatch,
72 target_length=targetlength,
73 strategy=strategy,
74 )
41 75
42 @make_cffi
43 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
44 class TestCompressionParametersHypothesis(unittest.TestCase):
45 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
46 s_minmatch, s_targetlength, s_strategy)
47 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
48 minmatch, targetlength, strategy):
49 zstd.ZstdCompressionParameters(window_log=windowlog,
50 chain_log=chainlog,
51 hash_log=hashlog,
52 search_log=searchlog,
53 min_match=minmatch,
54 target_length=targetlength,
55 strategy=strategy)
56
57 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
58 s_minmatch, s_targetlength, s_strategy)
59 def test_estimated_compression_context_size(self, windowlog, chainlog,
60 hashlog, searchlog,
61 minmatch, targetlength,
62 strategy):
63 if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
76 @hypothesis.given(
77 s_windowlog,
78 s_chainlog,
79 s_hashlog,
80 s_searchlog,
81 s_minmatch,
82 s_targetlength,
83 s_strategy,
84 )
85 def test_estimated_compression_context_size(
86 self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy
87 ):
88 if minmatch == zstd.MINMATCH_MIN and strategy in (
89 zstd.STRATEGY_FAST,
90 zstd.STRATEGY_GREEDY,
91 ):
64 92 minmatch += 1
65 93 elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST:
66 94 minmatch -= 1
67 95
68 p = zstd.ZstdCompressionParameters(window_log=windowlog,
69 chain_log=chainlog,
70 hash_log=hashlog,
71 search_log=searchlog,
72 min_match=minmatch,
73 target_length=targetlength,
74 strategy=strategy)
96 p = zstd.ZstdCompressionParameters(
97 window_log=windowlog,
98 chain_log=chainlog,
99 hash_log=hashlog,
100 search_log=searchlog,
101 min_match=minmatch,
102 target_length=targetlength,
103 strategy=strategy,
104 )
75 105 size = p.estimated_compression_context_size()
76
This diff has been collapsed as it changes many lines, (729 lines changed) Show them Hide them
@@ -13,6 +13,7 from .common import (
13 13 make_cffi,
14 14 NonClosingBytesIO,
15 15 OpCountingBytesIO,
16 TestCase,
16 17 )
17 18
18 19
@@ -23,62 +24,67 else:
23 24
24 25
25 26 @make_cffi
26 class TestFrameHeaderSize(unittest.TestCase):
27 class TestFrameHeaderSize(TestCase):
27 28 def test_empty(self):
28 with self.assertRaisesRegexp(
29 zstd.ZstdError, 'could not determine frame header size: Src size '
30 'is incorrect'):
31 zstd.frame_header_size(b'')
29 with self.assertRaisesRegex(
30 zstd.ZstdError,
31 "could not determine frame header size: Src size " "is incorrect",
32 ):
33 zstd.frame_header_size(b"")
32 34
33 35 def test_too_small(self):
34 with self.assertRaisesRegexp(
35 zstd.ZstdError, 'could not determine frame header size: Src size '
36 'is incorrect'):
37 zstd.frame_header_size(b'foob')
36 with self.assertRaisesRegex(
37 zstd.ZstdError,
38 "could not determine frame header size: Src size " "is incorrect",
39 ):
40 zstd.frame_header_size(b"foob")
38 41
39 42 def test_basic(self):
40 43 # It doesn't matter that it isn't a valid frame.
41 self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6)
44 self.assertEqual(zstd.frame_header_size(b"long enough but no magic"), 6)
42 45
43 46
44 47 @make_cffi
45 class TestFrameContentSize(unittest.TestCase):
48 class TestFrameContentSize(TestCase):
46 49 def test_empty(self):
47 with self.assertRaisesRegexp(zstd.ZstdError,
48 'error when determining content size'):
49 zstd.frame_content_size(b'')
50 with self.assertRaisesRegex(
51 zstd.ZstdError, "error when determining content size"
52 ):
53 zstd.frame_content_size(b"")
50 54
51 55 def test_too_small(self):
52 with self.assertRaisesRegexp(zstd.ZstdError,
53 'error when determining content size'):
54 zstd.frame_content_size(b'foob')
56 with self.assertRaisesRegex(
57 zstd.ZstdError, "error when determining content size"
58 ):
59 zstd.frame_content_size(b"foob")
55 60
56 61 def test_bad_frame(self):
57 with self.assertRaisesRegexp(zstd.ZstdError,
58 'error when determining content size'):
59 zstd.frame_content_size(b'invalid frame header')
62 with self.assertRaisesRegex(
63 zstd.ZstdError, "error when determining content size"
64 ):
65 zstd.frame_content_size(b"invalid frame header")
60 66
61 67 def test_unknown(self):
62 68 cctx = zstd.ZstdCompressor(write_content_size=False)
63 frame = cctx.compress(b'foobar')
69 frame = cctx.compress(b"foobar")
64 70
65 71 self.assertEqual(zstd.frame_content_size(frame), -1)
66 72
67 73 def test_empty(self):
68 74 cctx = zstd.ZstdCompressor()
69 frame = cctx.compress(b'')
75 frame = cctx.compress(b"")
70 76
71 77 self.assertEqual(zstd.frame_content_size(frame), 0)
72 78
73 79 def test_basic(self):
74 80 cctx = zstd.ZstdCompressor()
75 frame = cctx.compress(b'foobar')
81 frame = cctx.compress(b"foobar")
76 82
77 83 self.assertEqual(zstd.frame_content_size(frame), 6)
78 84
79 85
80 86 @make_cffi
81 class TestDecompressor(unittest.TestCase):
87 class TestDecompressor(TestCase):
82 88 def test_memory_size(self):
83 89 dctx = zstd.ZstdDecompressor()
84 90
@@ -86,22 +92,26 class TestDecompressor(unittest.TestCase
86 92
87 93
88 94 @make_cffi
89 class TestDecompressor_decompress(unittest.TestCase):
95 class TestDecompressor_decompress(TestCase):
90 96 def test_empty_input(self):
91 97 dctx = zstd.ZstdDecompressor()
92 98
93 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
94 dctx.decompress(b'')
99 with self.assertRaisesRegex(
100 zstd.ZstdError, "error determining content size from frame header"
101 ):
102 dctx.decompress(b"")
95 103
96 104 def test_invalid_input(self):
97 105 dctx = zstd.ZstdDecompressor()
98 106
99 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
100 dctx.decompress(b'foobar')
107 with self.assertRaisesRegex(
108 zstd.ZstdError, "error determining content size from frame header"
109 ):
110 dctx.decompress(b"foobar")
101 111
102 112 def test_input_types(self):
103 113 cctx = zstd.ZstdCompressor(level=1)
104 compressed = cctx.compress(b'foo')
114 compressed = cctx.compress(b"foo")
105 115
106 116 mutable_array = bytearray(len(compressed))
107 117 mutable_array[:] = compressed
@@ -114,36 +124,38 class TestDecompressor_decompress(unitte
114 124
115 125 dctx = zstd.ZstdDecompressor()
116 126 for source in sources:
117 self.assertEqual(dctx.decompress(source), b'foo')
127 self.assertEqual(dctx.decompress(source), b"foo")
118 128
119 129 def test_no_content_size_in_frame(self):
120 130 cctx = zstd.ZstdCompressor(write_content_size=False)
121 compressed = cctx.compress(b'foobar')
131 compressed = cctx.compress(b"foobar")
122 132
123 133 dctx = zstd.ZstdDecompressor()
124 with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'):
134 with self.assertRaisesRegex(
135 zstd.ZstdError, "could not determine content size in frame header"
136 ):
125 137 dctx.decompress(compressed)
126 138
127 139 def test_content_size_present(self):
128 140 cctx = zstd.ZstdCompressor()
129 compressed = cctx.compress(b'foobar')
141 compressed = cctx.compress(b"foobar")
130 142
131 143 dctx = zstd.ZstdDecompressor()
132 144 decompressed = dctx.decompress(compressed)
133 self.assertEqual(decompressed, b'foobar')
145 self.assertEqual(decompressed, b"foobar")
134 146
135 147 def test_empty_roundtrip(self):
136 148 cctx = zstd.ZstdCompressor()
137 compressed = cctx.compress(b'')
149 compressed = cctx.compress(b"")
138 150
139 151 dctx = zstd.ZstdDecompressor()
140 152 decompressed = dctx.decompress(compressed)
141 153
142 self.assertEqual(decompressed, b'')
154 self.assertEqual(decompressed, b"")
143 155
144 156 def test_max_output_size(self):
145 157 cctx = zstd.ZstdCompressor(write_content_size=False)
146 source = b'foobar' * 256
158 source = b"foobar" * 256
147 159 compressed = cctx.compress(source)
148 160
149 161 dctx = zstd.ZstdDecompressor()
@@ -152,8 +164,9 class TestDecompressor_decompress(unitte
152 164 self.assertEqual(decompressed, source)
153 165
154 166 # Input size - 1 fails
155 with self.assertRaisesRegexp(zstd.ZstdError,
156 'decompression error: did not decompress full frame'):
167 with self.assertRaisesRegex(
168 zstd.ZstdError, "decompression error: did not decompress full frame"
169 ):
157 170 dctx.decompress(compressed, max_output_size=len(source) - 1)
158 171
159 172 # Input size + 1 works
@@ -166,24 +179,24 class TestDecompressor_decompress(unitte
166 179
167 180 def test_stupidly_large_output_buffer(self):
168 181 cctx = zstd.ZstdCompressor(write_content_size=False)
169 compressed = cctx.compress(b'foobar' * 256)
182 compressed = cctx.compress(b"foobar" * 256)
170 183 dctx = zstd.ZstdDecompressor()
171 184
172 185 # Will get OverflowError on some Python distributions that can't
173 186 # handle really large integers.
174 187 with self.assertRaises((MemoryError, OverflowError)):
175 dctx.decompress(compressed, max_output_size=2**62)
188 dctx.decompress(compressed, max_output_size=2 ** 62)
176 189
177 190 def test_dictionary(self):
178 191 samples = []
179 192 for i in range(128):
180 samples.append(b'foo' * 64)
181 samples.append(b'bar' * 64)
182 samples.append(b'foobar' * 64)
193 samples.append(b"foo" * 64)
194 samples.append(b"bar" * 64)
195 samples.append(b"foobar" * 64)
183 196
184 197 d = zstd.train_dictionary(8192, samples)
185 198
186 orig = b'foobar' * 16384
199 orig = b"foobar" * 16384
187 200 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
188 201 compressed = cctx.compress(orig)
189 202
@@ -195,13 +208,13 class TestDecompressor_decompress(unitte
195 208 def test_dictionary_multiple(self):
196 209 samples = []
197 210 for i in range(128):
198 samples.append(b'foo' * 64)
199 samples.append(b'bar' * 64)
200 samples.append(b'foobar' * 64)
211 samples.append(b"foo" * 64)
212 samples.append(b"bar" * 64)
213 samples.append(b"foobar" * 64)
201 214
202 215 d = zstd.train_dictionary(8192, samples)
203 216
204 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
217 sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192)
205 218 compressed = []
206 219 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
207 220 for source in sources:
@@ -213,7 +226,7 class TestDecompressor_decompress(unitte
213 226 self.assertEqual(decompressed, sources[i])
214 227
215 228 def test_max_window_size(self):
216 with open(__file__, 'rb') as fh:
229 with open(__file__, "rb") as fh:
217 230 source = fh.read()
218 231
219 232 # If we write a content size, the decompressor engages single pass
@@ -221,15 +234,16 class TestDecompressor_decompress(unitte
221 234 cctx = zstd.ZstdCompressor(write_content_size=False)
222 235 frame = cctx.compress(source)
223 236
224 dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN)
237 dctx = zstd.ZstdDecompressor(max_window_size=2 ** zstd.WINDOWLOG_MIN)
225 238
226 with self.assertRaisesRegexp(
227 zstd.ZstdError, 'decompression error: Frame requires too much memory'):
239 with self.assertRaisesRegex(
240 zstd.ZstdError, "decompression error: Frame requires too much memory"
241 ):
228 242 dctx.decompress(frame, max_output_size=len(source))
229 243
230 244
231 245 @make_cffi
232 class TestDecompressor_copy_stream(unittest.TestCase):
246 class TestDecompressor_copy_stream(TestCase):
233 247 def test_no_read(self):
234 248 source = object()
235 249 dest = io.BytesIO()
@@ -256,12 +270,12 class TestDecompressor_copy_stream(unitt
256 270
257 271 self.assertEqual(r, 0)
258 272 self.assertEqual(w, 0)
259 self.assertEqual(dest.getvalue(), b'')
273 self.assertEqual(dest.getvalue(), b"")
260 274
261 275 def test_large_data(self):
262 276 source = io.BytesIO()
263 277 for i in range(255):
264 source.write(struct.Struct('>B').pack(i) * 16384)
278 source.write(struct.Struct(">B").pack(i) * 16384)
265 279 source.seek(0)
266 280
267 281 compressed = io.BytesIO()
@@ -277,33 +291,32 class TestDecompressor_copy_stream(unitt
277 291 self.assertEqual(w, len(source.getvalue()))
278 292
279 293 def test_read_write_size(self):
280 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
281 b'foobarfoobar'))
294 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar"))
282 295
283 296 dest = OpCountingBytesIO()
284 297 dctx = zstd.ZstdDecompressor()
285 298 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
286 299
287 300 self.assertEqual(r, len(source.getvalue()))
288 self.assertEqual(w, len(b'foobarfoobar'))
301 self.assertEqual(w, len(b"foobarfoobar"))
289 302 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
290 303 self.assertEqual(dest._write_count, len(dest.getvalue()))
291 304
292 305
293 306 @make_cffi
294 class TestDecompressor_stream_reader(unittest.TestCase):
307 class TestDecompressor_stream_reader(TestCase):
295 308 def test_context_manager(self):
296 309 dctx = zstd.ZstdDecompressor()
297 310
298 with dctx.stream_reader(b'foo') as reader:
299 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
311 with dctx.stream_reader(b"foo") as reader:
312 with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"):
300 313 with reader as reader2:
301 314 pass
302 315
303 316 def test_not_implemented(self):
304 317 dctx = zstd.ZstdDecompressor()
305 318
306 with dctx.stream_reader(b'foo') as reader:
319 with dctx.stream_reader(b"foo") as reader:
307 320 with self.assertRaises(io.UnsupportedOperation):
308 321 reader.readline()
309 322
@@ -317,7 +330,7 class TestDecompressor_stream_reader(uni
317 330 next(reader)
318 331
319 332 with self.assertRaises(io.UnsupportedOperation):
320 reader.write(b'foo')
333 reader.write(b"foo")
321 334
322 335 with self.assertRaises(io.UnsupportedOperation):
323 336 reader.writelines([])
@@ -325,7 +338,7 class TestDecompressor_stream_reader(uni
325 338 def test_constant_methods(self):
326 339 dctx = zstd.ZstdDecompressor()
327 340
328 with dctx.stream_reader(b'foo') as reader:
341 with dctx.stream_reader(b"foo") as reader:
329 342 self.assertFalse(reader.closed)
330 343 self.assertTrue(reader.readable())
331 344 self.assertFalse(reader.writable())
@@ -340,29 +353,31 class TestDecompressor_stream_reader(uni
340 353 def test_read_closed(self):
341 354 dctx = zstd.ZstdDecompressor()
342 355
343 with dctx.stream_reader(b'foo') as reader:
356 with dctx.stream_reader(b"foo") as reader:
344 357 reader.close()
345 358 self.assertTrue(reader.closed)
346 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
359 with self.assertRaisesRegex(ValueError, "stream is closed"):
347 360 reader.read(1)
348 361
349 362 def test_read_sizes(self):
350 363 cctx = zstd.ZstdCompressor()
351 foo = cctx.compress(b'foo')
364 foo = cctx.compress(b"foo")
352 365
353 366 dctx = zstd.ZstdDecompressor()
354 367
355 368 with dctx.stream_reader(foo) as reader:
356 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
369 with self.assertRaisesRegex(
370 ValueError, "cannot read negative amounts less than -1"
371 ):
357 372 reader.read(-2)
358 373
359 self.assertEqual(reader.read(0), b'')
360 self.assertEqual(reader.read(), b'foo')
374 self.assertEqual(reader.read(0), b"")
375 self.assertEqual(reader.read(), b"foo")
361 376
362 377 def test_read_buffer(self):
363 378 cctx = zstd.ZstdCompressor()
364 379
365 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
380 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
366 381 frame = cctx.compress(source)
367 382
368 383 dctx = zstd.ZstdDecompressor()
@@ -376,14 +391,14 class TestDecompressor_stream_reader(uni
376 391 self.assertEqual(reader.tell(), len(source))
377 392
378 393 # Read after EOF should return empty bytes.
379 self.assertEqual(reader.read(1), b'')
394 self.assertEqual(reader.read(1), b"")
380 395 self.assertEqual(reader.tell(), len(result))
381 396
382 397 self.assertTrue(reader.closed)
383 398
384 399 def test_read_buffer_small_chunks(self):
385 400 cctx = zstd.ZstdCompressor()
386 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
401 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
387 402 frame = cctx.compress(source)
388 403
389 404 dctx = zstd.ZstdDecompressor()
@@ -398,11 +413,11 class TestDecompressor_stream_reader(uni
398 413 chunks.append(chunk)
399 414 self.assertEqual(reader.tell(), sum(map(len, chunks)))
400 415
401 self.assertEqual(b''.join(chunks), source)
416 self.assertEqual(b"".join(chunks), source)
402 417
403 418 def test_read_stream(self):
404 419 cctx = zstd.ZstdCompressor()
405 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
420 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
406 421 frame = cctx.compress(source)
407 422
408 423 dctx = zstd.ZstdDecompressor()
@@ -412,7 +427,7 class TestDecompressor_stream_reader(uni
412 427 chunk = reader.read(8192)
413 428 self.assertEqual(chunk, source)
414 429 self.assertEqual(reader.tell(), len(source))
415 self.assertEqual(reader.read(1), b'')
430 self.assertEqual(reader.read(1), b"")
416 431 self.assertEqual(reader.tell(), len(source))
417 432 self.assertFalse(reader.closed)
418 433
@@ -420,7 +435,7 class TestDecompressor_stream_reader(uni
420 435
421 436 def test_read_stream_small_chunks(self):
422 437 cctx = zstd.ZstdCompressor()
423 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
438 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
424 439 frame = cctx.compress(source)
425 440
426 441 dctx = zstd.ZstdDecompressor()
@@ -435,11 +450,11 class TestDecompressor_stream_reader(uni
435 450 chunks.append(chunk)
436 451 self.assertEqual(reader.tell(), sum(map(len, chunks)))
437 452
438 self.assertEqual(b''.join(chunks), source)
453 self.assertEqual(b"".join(chunks), source)
439 454
440 455 def test_read_after_exit(self):
441 456 cctx = zstd.ZstdCompressor()
442 frame = cctx.compress(b'foo' * 60)
457 frame = cctx.compress(b"foo" * 60)
443 458
444 459 dctx = zstd.ZstdDecompressor()
445 460
@@ -449,45 +464,46 class TestDecompressor_stream_reader(uni
449 464
450 465 self.assertTrue(reader.closed)
451 466
452 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
467 with self.assertRaisesRegex(ValueError, "stream is closed"):
453 468 reader.read(10)
454 469
455 470 def test_illegal_seeks(self):
456 471 cctx = zstd.ZstdCompressor()
457 frame = cctx.compress(b'foo' * 60)
472 frame = cctx.compress(b"foo" * 60)
458 473
459 474 dctx = zstd.ZstdDecompressor()
460 475
461 476 with dctx.stream_reader(frame) as reader:
462 with self.assertRaisesRegexp(ValueError,
463 'cannot seek to negative position'):
477 with self.assertRaisesRegex(ValueError, "cannot seek to negative position"):
464 478 reader.seek(-1, os.SEEK_SET)
465 479
466 480 reader.read(1)
467 481
468 with self.assertRaisesRegexp(
469 ValueError, 'cannot seek zstd decompression stream backwards'):
482 with self.assertRaisesRegex(
483 ValueError, "cannot seek zstd decompression stream backwards"
484 ):
470 485 reader.seek(0, os.SEEK_SET)
471 486
472 with self.assertRaisesRegexp(
473 ValueError, 'cannot seek zstd decompression stream backwards'):
487 with self.assertRaisesRegex(
488 ValueError, "cannot seek zstd decompression stream backwards"
489 ):
474 490 reader.seek(-1, os.SEEK_CUR)
475 491
476 with self.assertRaisesRegexp(
477 ValueError,
478 'zstd decompression streams cannot be seeked with SEEK_END'):
492 with self.assertRaisesRegex(
493 ValueError, "zstd decompression streams cannot be seeked with SEEK_END"
494 ):
479 495 reader.seek(0, os.SEEK_END)
480 496
481 497 reader.close()
482 498
483 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
499 with self.assertRaisesRegex(ValueError, "stream is closed"):
484 500 reader.seek(4, os.SEEK_SET)
485 501
486 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
502 with self.assertRaisesRegex(ValueError, "stream is closed"):
487 503 reader.seek(0)
488 504
489 505 def test_seek(self):
490 source = b'foobar' * 60
506 source = b"foobar" * 60
491 507 cctx = zstd.ZstdCompressor()
492 508 frame = cctx.compress(source)
493 509
@@ -495,32 +511,32 class TestDecompressor_stream_reader(uni
495 511
496 512 with dctx.stream_reader(frame) as reader:
497 513 reader.seek(3)
498 self.assertEqual(reader.read(3), b'bar')
514 self.assertEqual(reader.read(3), b"bar")
499 515
500 516 reader.seek(4, os.SEEK_CUR)
501 self.assertEqual(reader.read(2), b'ar')
517 self.assertEqual(reader.read(2), b"ar")
502 518
503 519 def test_no_context_manager(self):
504 source = b'foobar' * 60
520 source = b"foobar" * 60
505 521 cctx = zstd.ZstdCompressor()
506 522 frame = cctx.compress(source)
507 523
508 524 dctx = zstd.ZstdDecompressor()
509 525 reader = dctx.stream_reader(frame)
510 526
511 self.assertEqual(reader.read(6), b'foobar')
512 self.assertEqual(reader.read(18), b'foobar' * 3)
527 self.assertEqual(reader.read(6), b"foobar")
528 self.assertEqual(reader.read(18), b"foobar" * 3)
513 529 self.assertFalse(reader.closed)
514 530
515 531 # Calling close prevents subsequent use.
516 532 reader.close()
517 533 self.assertTrue(reader.closed)
518 534
519 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
535 with self.assertRaisesRegex(ValueError, "stream is closed"):
520 536 reader.read(6)
521 537
522 538 def test_read_after_error(self):
523 source = io.BytesIO(b'')
539 source = io.BytesIO(b"")
524 540 dctx = zstd.ZstdDecompressor()
525 541
526 542 reader = dctx.stream_reader(source)
@@ -529,7 +545,7 class TestDecompressor_stream_reader(uni
529 545 reader.read(0)
530 546
531 547 with reader:
532 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
548 with self.assertRaisesRegex(ValueError, "stream is closed"):
533 549 reader.read(100)
534 550
535 551 def test_partial_read(self):
@@ -553,87 +569,87 class TestDecompressor_stream_reader(uni
553 569 cctx = zstd.ZstdCompressor()
554 570 source = io.BytesIO()
555 571 writer = cctx.stream_writer(source)
556 writer.write(b'foo')
572 writer.write(b"foo")
557 573 writer.flush(zstd.FLUSH_FRAME)
558 writer.write(b'bar')
574 writer.write(b"bar")
559 575 writer.flush(zstd.FLUSH_FRAME)
560 576
561 577 dctx = zstd.ZstdDecompressor()
562 578
563 579 reader = dctx.stream_reader(source.getvalue())
564 self.assertEqual(reader.read(2), b'fo')
565 self.assertEqual(reader.read(2), b'o')
566 self.assertEqual(reader.read(2), b'ba')
567 self.assertEqual(reader.read(2), b'r')
580 self.assertEqual(reader.read(2), b"fo")
581 self.assertEqual(reader.read(2), b"o")
582 self.assertEqual(reader.read(2), b"ba")
583 self.assertEqual(reader.read(2), b"r")
568 584
569 585 source.seek(0)
570 586 reader = dctx.stream_reader(source)
571 self.assertEqual(reader.read(2), b'fo')
572 self.assertEqual(reader.read(2), b'o')
573 self.assertEqual(reader.read(2), b'ba')
574 self.assertEqual(reader.read(2), b'r')
587 self.assertEqual(reader.read(2), b"fo")
588 self.assertEqual(reader.read(2), b"o")
589 self.assertEqual(reader.read(2), b"ba")
590 self.assertEqual(reader.read(2), b"r")
575 591
576 592 reader = dctx.stream_reader(source.getvalue())
577 self.assertEqual(reader.read(3), b'foo')
578 self.assertEqual(reader.read(3), b'bar')
593 self.assertEqual(reader.read(3), b"foo")
594 self.assertEqual(reader.read(3), b"bar")
579 595
580 596 source.seek(0)
581 597 reader = dctx.stream_reader(source)
582 self.assertEqual(reader.read(3), b'foo')
583 self.assertEqual(reader.read(3), b'bar')
598 self.assertEqual(reader.read(3), b"foo")
599 self.assertEqual(reader.read(3), b"bar")
584 600
585 601 reader = dctx.stream_reader(source.getvalue())
586 self.assertEqual(reader.read(4), b'foo')
587 self.assertEqual(reader.read(4), b'bar')
602 self.assertEqual(reader.read(4), b"foo")
603 self.assertEqual(reader.read(4), b"bar")
588 604
589 605 source.seek(0)
590 606 reader = dctx.stream_reader(source)
591 self.assertEqual(reader.read(4), b'foo')
592 self.assertEqual(reader.read(4), b'bar')
607 self.assertEqual(reader.read(4), b"foo")
608 self.assertEqual(reader.read(4), b"bar")
593 609
594 610 reader = dctx.stream_reader(source.getvalue())
595 self.assertEqual(reader.read(128), b'foo')
596 self.assertEqual(reader.read(128), b'bar')
611 self.assertEqual(reader.read(128), b"foo")
612 self.assertEqual(reader.read(128), b"bar")
597 613
598 614 source.seek(0)
599 615 reader = dctx.stream_reader(source)
600 self.assertEqual(reader.read(128), b'foo')
601 self.assertEqual(reader.read(128), b'bar')
616 self.assertEqual(reader.read(128), b"foo")
617 self.assertEqual(reader.read(128), b"bar")
602 618
603 619 # Now tests for reads spanning frames.
604 620 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
605 self.assertEqual(reader.read(3), b'foo')
606 self.assertEqual(reader.read(3), b'bar')
621 self.assertEqual(reader.read(3), b"foo")
622 self.assertEqual(reader.read(3), b"bar")
607 623
608 624 source.seek(0)
609 625 reader = dctx.stream_reader(source, read_across_frames=True)
610 self.assertEqual(reader.read(3), b'foo')
611 self.assertEqual(reader.read(3), b'bar')
626 self.assertEqual(reader.read(3), b"foo")
627 self.assertEqual(reader.read(3), b"bar")
612 628
613 629 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
614 self.assertEqual(reader.read(6), b'foobar')
630 self.assertEqual(reader.read(6), b"foobar")
615 631
616 632 source.seek(0)
617 633 reader = dctx.stream_reader(source, read_across_frames=True)
618 self.assertEqual(reader.read(6), b'foobar')
634 self.assertEqual(reader.read(6), b"foobar")
619 635
620 636 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
621 self.assertEqual(reader.read(7), b'foobar')
637 self.assertEqual(reader.read(7), b"foobar")
622 638
623 639 source.seek(0)
624 640 reader = dctx.stream_reader(source, read_across_frames=True)
625 self.assertEqual(reader.read(7), b'foobar')
641 self.assertEqual(reader.read(7), b"foobar")
626 642
627 643 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
628 self.assertEqual(reader.read(128), b'foobar')
644 self.assertEqual(reader.read(128), b"foobar")
629 645
630 646 source.seek(0)
631 647 reader = dctx.stream_reader(source, read_across_frames=True)
632 self.assertEqual(reader.read(128), b'foobar')
648 self.assertEqual(reader.read(128), b"foobar")
633 649
634 650 def test_readinto(self):
635 651 cctx = zstd.ZstdCompressor()
636 foo = cctx.compress(b'foo')
652 foo = cctx.compress(b"foo")
637 653
638 654 dctx = zstd.ZstdDecompressor()
639 655
@@ -641,116 +657,116 class TestDecompressor_stream_reader(uni
641 657 # The exact exception varies based on the backend.
642 658 reader = dctx.stream_reader(foo)
643 659 with self.assertRaises(Exception):
644 reader.readinto(b'foobar')
660 reader.readinto(b"foobar")
645 661
646 662 # readinto() with sufficiently large destination.
647 663 b = bytearray(1024)
648 664 reader = dctx.stream_reader(foo)
649 665 self.assertEqual(reader.readinto(b), 3)
650 self.assertEqual(b[0:3], b'foo')
666 self.assertEqual(b[0:3], b"foo")
651 667 self.assertEqual(reader.readinto(b), 0)
652 self.assertEqual(b[0:3], b'foo')
668 self.assertEqual(b[0:3], b"foo")
653 669
654 670 # readinto() with small reads.
655 671 b = bytearray(1024)
656 672 reader = dctx.stream_reader(foo, read_size=1)
657 673 self.assertEqual(reader.readinto(b), 3)
658 self.assertEqual(b[0:3], b'foo')
674 self.assertEqual(b[0:3], b"foo")
659 675
660 676 # Too small destination buffer.
661 677 b = bytearray(2)
662 678 reader = dctx.stream_reader(foo)
663 679 self.assertEqual(reader.readinto(b), 2)
664 self.assertEqual(b[:], b'fo')
680 self.assertEqual(b[:], b"fo")
665 681
666 682 def test_readinto1(self):
667 683 cctx = zstd.ZstdCompressor()
668 foo = cctx.compress(b'foo')
684 foo = cctx.compress(b"foo")
669 685
670 686 dctx = zstd.ZstdDecompressor()
671 687
672 688 reader = dctx.stream_reader(foo)
673 689 with self.assertRaises(Exception):
674 reader.readinto1(b'foobar')
690 reader.readinto1(b"foobar")
675 691
676 692 # Sufficiently large destination.
677 693 b = bytearray(1024)
678 694 reader = dctx.stream_reader(foo)
679 695 self.assertEqual(reader.readinto1(b), 3)
680 self.assertEqual(b[0:3], b'foo')
696 self.assertEqual(b[0:3], b"foo")
681 697 self.assertEqual(reader.readinto1(b), 0)
682 self.assertEqual(b[0:3], b'foo')
698 self.assertEqual(b[0:3], b"foo")
683 699
684 700 # readinto() with small reads.
685 701 b = bytearray(1024)
686 702 reader = dctx.stream_reader(foo, read_size=1)
687 703 self.assertEqual(reader.readinto1(b), 3)
688 self.assertEqual(b[0:3], b'foo')
704 self.assertEqual(b[0:3], b"foo")
689 705
690 706 # Too small destination buffer.
691 707 b = bytearray(2)
692 708 reader = dctx.stream_reader(foo)
693 709 self.assertEqual(reader.readinto1(b), 2)
694 self.assertEqual(b[:], b'fo')
710 self.assertEqual(b[:], b"fo")
695 711
696 712 def test_readall(self):
697 713 cctx = zstd.ZstdCompressor()
698 foo = cctx.compress(b'foo')
714 foo = cctx.compress(b"foo")
699 715
700 716 dctx = zstd.ZstdDecompressor()
701 717 reader = dctx.stream_reader(foo)
702 718
703 self.assertEqual(reader.readall(), b'foo')
719 self.assertEqual(reader.readall(), b"foo")
704 720
705 721 def test_read1(self):
706 722 cctx = zstd.ZstdCompressor()
707 foo = cctx.compress(b'foo')
723 foo = cctx.compress(b"foo")
708 724
709 725 dctx = zstd.ZstdDecompressor()
710 726
711 727 b = OpCountingBytesIO(foo)
712 728 reader = dctx.stream_reader(b)
713 729
714 self.assertEqual(reader.read1(), b'foo')
730 self.assertEqual(reader.read1(), b"foo")
715 731 self.assertEqual(b._read_count, 1)
716 732
717 733 b = OpCountingBytesIO(foo)
718 734 reader = dctx.stream_reader(b)
719 735
720 self.assertEqual(reader.read1(0), b'')
721 self.assertEqual(reader.read1(2), b'fo')
736 self.assertEqual(reader.read1(0), b"")
737 self.assertEqual(reader.read1(2), b"fo")
722 738 self.assertEqual(b._read_count, 1)
723 self.assertEqual(reader.read1(1), b'o')
739 self.assertEqual(reader.read1(1), b"o")
724 740 self.assertEqual(b._read_count, 1)
725 self.assertEqual(reader.read1(1), b'')
741 self.assertEqual(reader.read1(1), b"")
726 742 self.assertEqual(b._read_count, 2)
727 743
728 744 def test_read_lines(self):
729 745 cctx = zstd.ZstdCompressor()
730 source = b'\n'.join(('line %d' % i).encode('ascii') for i in range(1024))
746 source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024))
731 747
732 748 frame = cctx.compress(source)
733 749
734 750 dctx = zstd.ZstdDecompressor()
735 751 reader = dctx.stream_reader(frame)
736 tr = io.TextIOWrapper(reader, encoding='utf-8')
752 tr = io.TextIOWrapper(reader, encoding="utf-8")
737 753
738 754 lines = []
739 755 for line in tr:
740 lines.append(line.encode('utf-8'))
756 lines.append(line.encode("utf-8"))
741 757
742 758 self.assertEqual(len(lines), 1024)
743 self.assertEqual(b''.join(lines), source)
759 self.assertEqual(b"".join(lines), source)
744 760
745 761 reader = dctx.stream_reader(frame)
746 tr = io.TextIOWrapper(reader, encoding='utf-8')
762 tr = io.TextIOWrapper(reader, encoding="utf-8")
747 763
748 764 lines = tr.readlines()
749 765 self.assertEqual(len(lines), 1024)
750 self.assertEqual(''.join(lines).encode('utf-8'), source)
766 self.assertEqual("".join(lines).encode("utf-8"), source)
751 767
752 768 reader = dctx.stream_reader(frame)
753 tr = io.TextIOWrapper(reader, encoding='utf-8')
769 tr = io.TextIOWrapper(reader, encoding="utf-8")
754 770
755 771 lines = []
756 772 while True:
@@ -758,26 +774,26 class TestDecompressor_stream_reader(uni
758 774 if not line:
759 775 break
760 776
761 lines.append(line.encode('utf-8'))
777 lines.append(line.encode("utf-8"))
762 778
763 779 self.assertEqual(len(lines), 1024)
764 self.assertEqual(b''.join(lines), source)
780 self.assertEqual(b"".join(lines), source)
765 781
766 782
767 783 @make_cffi
768 class TestDecompressor_decompressobj(unittest.TestCase):
784 class TestDecompressor_decompressobj(TestCase):
769 785 def test_simple(self):
770 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
786 data = zstd.ZstdCompressor(level=1).compress(b"foobar")
771 787
772 788 dctx = zstd.ZstdDecompressor()
773 789 dobj = dctx.decompressobj()
774 self.assertEqual(dobj.decompress(data), b'foobar')
790 self.assertEqual(dobj.decompress(data), b"foobar")
775 791 self.assertIsNone(dobj.flush())
776 792 self.assertIsNone(dobj.flush(10))
777 793 self.assertIsNone(dobj.flush(length=100))
778 794
779 795 def test_input_types(self):
780 compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
796 compressed = zstd.ZstdCompressor(level=1).compress(b"foo")
781 797
782 798 dctx = zstd.ZstdDecompressor()
783 799
@@ -795,28 +811,28 class TestDecompressor_decompressobj(uni
795 811 self.assertIsNone(dobj.flush())
796 812 self.assertIsNone(dobj.flush(10))
797 813 self.assertIsNone(dobj.flush(length=100))
798 self.assertEqual(dobj.decompress(source), b'foo')
814 self.assertEqual(dobj.decompress(source), b"foo")
799 815 self.assertIsNone(dobj.flush())
800 816
801 817 def test_reuse(self):
802 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
818 data = zstd.ZstdCompressor(level=1).compress(b"foobar")
803 819
804 820 dctx = zstd.ZstdDecompressor()
805 821 dobj = dctx.decompressobj()
806 822 dobj.decompress(data)
807 823
808 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
824 with self.assertRaisesRegex(zstd.ZstdError, "cannot use a decompressobj"):
809 825 dobj.decompress(data)
810 826 self.assertIsNone(dobj.flush())
811 827
812 828 def test_bad_write_size(self):
813 829 dctx = zstd.ZstdDecompressor()
814 830
815 with self.assertRaisesRegexp(ValueError, 'write_size must be positive'):
831 with self.assertRaisesRegex(ValueError, "write_size must be positive"):
816 832 dctx.decompressobj(write_size=0)
817 833
818 834 def test_write_size(self):
819 source = b'foo' * 64 + b'bar' * 128
835 source = b"foo" * 64 + b"bar" * 128
820 836 data = zstd.ZstdCompressor(level=1).compress(source)
821 837
822 838 dctx = zstd.ZstdDecompressor()
@@ -836,7 +852,7 def decompress_via_writer(data):
836 852
837 853
838 854 @make_cffi
839 class TestDecompressor_stream_writer(unittest.TestCase):
855 class TestDecompressor_stream_writer(TestCase):
840 856 def test_io_api(self):
841 857 buffer = io.BytesIO()
842 858 dctx = zstd.ZstdDecompressor()
@@ -908,14 +924,14 class TestDecompressor_stream_writer(uni
908 924 writer.fileno()
909 925
910 926 def test_fileno_file(self):
911 with tempfile.TemporaryFile('wb') as tf:
927 with tempfile.TemporaryFile("wb") as tf:
912 928 dctx = zstd.ZstdDecompressor()
913 929 writer = dctx.stream_writer(tf)
914 930
915 931 self.assertEqual(writer.fileno(), tf.fileno())
916 932
917 933 def test_close(self):
918 foo = zstd.ZstdCompressor().compress(b'foo')
934 foo = zstd.ZstdCompressor().compress(b"foo")
919 935
920 936 buffer = NonClosingBytesIO()
921 937 dctx = zstd.ZstdDecompressor()
@@ -928,17 +944,17 class TestDecompressor_stream_writer(uni
928 944 self.assertTrue(writer.closed)
929 945 self.assertTrue(buffer.closed)
930 946
931 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
932 writer.write(b'')
947 with self.assertRaisesRegex(ValueError, "stream is closed"):
948 writer.write(b"")
933 949
934 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
950 with self.assertRaisesRegex(ValueError, "stream is closed"):
935 951 writer.flush()
936 952
937 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
953 with self.assertRaisesRegex(ValueError, "stream is closed"):
938 954 with writer:
939 955 pass
940 956
941 self.assertEqual(buffer.getvalue(), b'foo')
957 self.assertEqual(buffer.getvalue(), b"foo")
942 958
943 959 # Context manager exit should close stream.
944 960 buffer = NonClosingBytesIO()
@@ -948,7 +964,7 class TestDecompressor_stream_writer(uni
948 964 writer.write(foo)
949 965
950 966 self.assertTrue(writer.closed)
951 self.assertEqual(buffer.getvalue(), b'foo')
967 self.assertEqual(buffer.getvalue(), b"foo")
952 968
953 969 def test_flush(self):
954 970 buffer = OpCountingBytesIO()
@@ -962,12 +978,12 class TestDecompressor_stream_writer(uni
962 978
963 979 def test_empty_roundtrip(self):
964 980 cctx = zstd.ZstdCompressor()
965 empty = cctx.compress(b'')
966 self.assertEqual(decompress_via_writer(empty), b'')
981 empty = cctx.compress(b"")
982 self.assertEqual(decompress_via_writer(empty), b"")
967 983
968 984 def test_input_types(self):
969 985 cctx = zstd.ZstdCompressor(level=1)
970 compressed = cctx.compress(b'foo')
986 compressed = cctx.compress(b"foo")
971 987
972 988 mutable_array = bytearray(len(compressed))
973 989 mutable_array[:] = compressed
@@ -984,25 +1000,25 class TestDecompressor_stream_writer(uni
984 1000
985 1001 decompressor = dctx.stream_writer(buffer)
986 1002 decompressor.write(source)
987 self.assertEqual(buffer.getvalue(), b'foo')
1003 self.assertEqual(buffer.getvalue(), b"foo")
988 1004
989 1005 buffer = NonClosingBytesIO()
990 1006
991 1007 with dctx.stream_writer(buffer) as decompressor:
992 1008 self.assertEqual(decompressor.write(source), 3)
993 1009
994 self.assertEqual(buffer.getvalue(), b'foo')
1010 self.assertEqual(buffer.getvalue(), b"foo")
995 1011
996 1012 buffer = io.BytesIO()
997 1013 writer = dctx.stream_writer(buffer, write_return_read=True)
998 1014 self.assertEqual(writer.write(source), len(source))
999 self.assertEqual(buffer.getvalue(), b'foo')
1015 self.assertEqual(buffer.getvalue(), b"foo")
1000 1016
1001 1017 def test_large_roundtrip(self):
1002 1018 chunks = []
1003 1019 for i in range(255):
1004 chunks.append(struct.Struct('>B').pack(i) * 16384)
1005 orig = b''.join(chunks)
1020 chunks.append(struct.Struct(">B").pack(i) * 16384)
1021 orig = b"".join(chunks)
1006 1022 cctx = zstd.ZstdCompressor()
1007 1023 compressed = cctx.compress(orig)
1008 1024
@@ -1012,9 +1028,9 class TestDecompressor_stream_writer(uni
1012 1028 chunks = []
1013 1029 for i in range(255):
1014 1030 for j in range(255):
1015 chunks.append(struct.Struct('>B').pack(j) * i)
1031 chunks.append(struct.Struct(">B").pack(j) * i)
1016 1032
1017 orig = b''.join(chunks)
1033 orig = b"".join(chunks)
1018 1034 cctx = zstd.ZstdCompressor()
1019 1035 compressed = cctx.compress(orig)
1020 1036
@@ -1042,13 +1058,13 class TestDecompressor_stream_writer(uni
1042 1058 def test_dictionary(self):
1043 1059 samples = []
1044 1060 for i in range(128):
1045 samples.append(b'foo' * 64)
1046 samples.append(b'bar' * 64)
1047 samples.append(b'foobar' * 64)
1061 samples.append(b"foo" * 64)
1062 samples.append(b"bar" * 64)
1063 samples.append(b"foobar" * 64)
1048 1064
1049 1065 d = zstd.train_dictionary(8192, samples)
1050 1066
1051 orig = b'foobar' * 16384
1067 orig = b"foobar" * 16384
1052 1068 buffer = NonClosingBytesIO()
1053 1069 cctx = zstd.ZstdCompressor(dict_data=d)
1054 1070 with cctx.stream_writer(buffer) as compressor:
@@ -1083,22 +1099,22 class TestDecompressor_stream_writer(uni
1083 1099 self.assertGreater(size, 100000)
1084 1100
1085 1101 def test_write_size(self):
1086 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
1102 source = zstd.ZstdCompressor().compress(b"foobarfoobar")
1087 1103 dest = OpCountingBytesIO()
1088 1104 dctx = zstd.ZstdDecompressor()
1089 1105 with dctx.stream_writer(dest, write_size=1) as decompressor:
1090 s = struct.Struct('>B')
1106 s = struct.Struct(">B")
1091 1107 for c in source:
1092 1108 if not isinstance(c, str):
1093 1109 c = s.pack(c)
1094 1110 decompressor.write(c)
1095 1111
1096 self.assertEqual(dest.getvalue(), b'foobarfoobar')
1112 self.assertEqual(dest.getvalue(), b"foobarfoobar")
1097 1113 self.assertEqual(dest._write_count, len(dest.getvalue()))
1098 1114
1099 1115
1100 1116 @make_cffi
1101 class TestDecompressor_read_to_iter(unittest.TestCase):
1117 class TestDecompressor_read_to_iter(TestCase):
1102 1118 def test_type_validation(self):
1103 1119 dctx = zstd.ZstdDecompressor()
1104 1120
@@ -1106,10 +1122,10 class TestDecompressor_read_to_iter(unit
1106 1122 dctx.read_to_iter(io.BytesIO())
1107 1123
1108 1124 # Buffer protocol works.
1109 dctx.read_to_iter(b'foobar')
1125 dctx.read_to_iter(b"foobar")
1110 1126
1111 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1112 b''.join(dctx.read_to_iter(True))
1127 with self.assertRaisesRegex(ValueError, "must pass an object with a read"):
1128 b"".join(dctx.read_to_iter(True))
1113 1129
1114 1130 def test_empty_input(self):
1115 1131 dctx = zstd.ZstdDecompressor()
@@ -1120,25 +1136,25 class TestDecompressor_read_to_iter(unit
1120 1136 with self.assertRaises(StopIteration):
1121 1137 next(it)
1122 1138
1123 it = dctx.read_to_iter(b'')
1139 it = dctx.read_to_iter(b"")
1124 1140 with self.assertRaises(StopIteration):
1125 1141 next(it)
1126 1142
1127 1143 def test_invalid_input(self):
1128 1144 dctx = zstd.ZstdDecompressor()
1129 1145
1130 source = io.BytesIO(b'foobar')
1146 source = io.BytesIO(b"foobar")
1131 1147 it = dctx.read_to_iter(source)
1132 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
1148 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
1133 1149 next(it)
1134 1150
1135 it = dctx.read_to_iter(b'foobar')
1136 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
1151 it = dctx.read_to_iter(b"foobar")
1152 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
1137 1153 next(it)
1138 1154
1139 1155 def test_empty_roundtrip(self):
1140 1156 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1141 empty = cctx.compress(b'')
1157 empty = cctx.compress(b"")
1142 1158
1143 1159 source = io.BytesIO(empty)
1144 1160 source.seek(0)
@@ -1157,24 +1173,28 class TestDecompressor_read_to_iter(unit
1157 1173 def test_skip_bytes_too_large(self):
1158 1174 dctx = zstd.ZstdDecompressor()
1159 1175
1160 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
1161 b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1))
1176 with self.assertRaisesRegex(
1177 ValueError, "skip_bytes must be smaller than read_size"
1178 ):
1179 b"".join(dctx.read_to_iter(b"", skip_bytes=1, read_size=1))
1162 1180
1163 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
1164 b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10))
1181 with self.assertRaisesRegex(
1182 ValueError, "skip_bytes larger than first input chunk"
1183 ):
1184 b"".join(dctx.read_to_iter(b"foobar", skip_bytes=10))
1165 1185
1166 1186 def test_skip_bytes(self):
1167 1187 cctx = zstd.ZstdCompressor(write_content_size=False)
1168 compressed = cctx.compress(b'foobar')
1188 compressed = cctx.compress(b"foobar")
1169 1189
1170 1190 dctx = zstd.ZstdDecompressor()
1171 output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3))
1172 self.assertEqual(output, b'foobar')
1191 output = b"".join(dctx.read_to_iter(b"hdr" + compressed, skip_bytes=3))
1192 self.assertEqual(output, b"foobar")
1173 1193
1174 1194 def test_large_output(self):
1175 1195 source = io.BytesIO()
1176 source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1177 source.write(b'o')
1196 source.write(b"f" * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1197 source.write(b"o")
1178 1198 source.seek(0)
1179 1199
1180 1200 cctx = zstd.ZstdCompressor(level=1)
@@ -1191,7 +1211,7 class TestDecompressor_read_to_iter(unit
1191 1211 with self.assertRaises(StopIteration):
1192 1212 next(it)
1193 1213
1194 decompressed = b''.join(chunks)
1214 decompressed = b"".join(chunks)
1195 1215 self.assertEqual(decompressed, source.getvalue())
1196 1216
1197 1217 # And again with buffer protocol.
@@ -1203,12 +1223,12 class TestDecompressor_read_to_iter(unit
1203 1223 with self.assertRaises(StopIteration):
1204 1224 next(it)
1205 1225
1206 decompressed = b''.join(chunks)
1226 decompressed = b"".join(chunks)
1207 1227 self.assertEqual(decompressed, source.getvalue())
1208 1228
1209 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
1229 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
1210 1230 def test_large_input(self):
1211 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
1231 bytes = list(struct.Struct(">B").pack(i) for i in range(256))
1212 1232 compressed = NonClosingBytesIO()
1213 1233 input_size = 0
1214 1234 cctx = zstd.ZstdCompressor(level=1)
@@ -1217,14 +1237,18 class TestDecompressor_read_to_iter(unit
1217 1237 compressor.write(random.choice(bytes))
1218 1238 input_size += 1
1219 1239
1220 have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1240 have_compressed = (
1241 len(compressed.getvalue())
1242 > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1243 )
1221 1244 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
1222 1245 if have_compressed and have_raw:
1223 1246 break
1224 1247
1225 1248 compressed = io.BytesIO(compressed.getvalue())
1226 self.assertGreater(len(compressed.getvalue()),
1227 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
1249 self.assertGreater(
1250 len(compressed.getvalue()), zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1251 )
1228 1252
1229 1253 dctx = zstd.ZstdDecompressor()
1230 1254 it = dctx.read_to_iter(compressed)
@@ -1237,7 +1261,7 class TestDecompressor_read_to_iter(unit
1237 1261 with self.assertRaises(StopIteration):
1238 1262 next(it)
1239 1263
1240 decompressed = b''.join(chunks)
1264 decompressed = b"".join(chunks)
1241 1265 self.assertEqual(len(decompressed), input_size)
1242 1266
1243 1267 # And again with buffer protocol.
@@ -1251,7 +1275,7 class TestDecompressor_read_to_iter(unit
1251 1275 with self.assertRaises(StopIteration):
1252 1276 next(it)
1253 1277
1254 decompressed = b''.join(chunks)
1278 decompressed = b"".join(chunks)
1255 1279 self.assertEqual(len(decompressed), input_size)
1256 1280
1257 1281 def test_interesting(self):
@@ -1263,22 +1287,23 class TestDecompressor_read_to_iter(unit
1263 1287 compressed = NonClosingBytesIO()
1264 1288 with cctx.stream_writer(compressed) as compressor:
1265 1289 for i in range(256):
1266 chunk = b'\0' * 1024
1290 chunk = b"\0" * 1024
1267 1291 compressor.write(chunk)
1268 1292 source.write(chunk)
1269 1293
1270 1294 dctx = zstd.ZstdDecompressor()
1271 1295
1272 simple = dctx.decompress(compressed.getvalue(),
1273 max_output_size=len(source.getvalue()))
1296 simple = dctx.decompress(
1297 compressed.getvalue(), max_output_size=len(source.getvalue())
1298 )
1274 1299 self.assertEqual(simple, source.getvalue())
1275 1300
1276 1301 compressed = io.BytesIO(compressed.getvalue())
1277 streamed = b''.join(dctx.read_to_iter(compressed))
1302 streamed = b"".join(dctx.read_to_iter(compressed))
1278 1303 self.assertEqual(streamed, source.getvalue())
1279 1304
1280 1305 def test_read_write_size(self):
1281 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
1306 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar"))
1282 1307 dctx = zstd.ZstdDecompressor()
1283 1308 for chunk in dctx.read_to_iter(source, read_size=1, write_size=1):
1284 1309 self.assertEqual(len(chunk), 1)
@@ -1287,97 +1312,110 class TestDecompressor_read_to_iter(unit
1287 1312
1288 1313 def test_magic_less(self):
1289 1314 params = zstd.CompressionParameters.from_level(
1290 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
1315 1, format=zstd.FORMAT_ZSTD1_MAGICLESS
1316 )
1291 1317 cctx = zstd.ZstdCompressor(compression_params=params)
1292 frame = cctx.compress(b'foobar')
1318 frame = cctx.compress(b"foobar")
1293 1319
1294 self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd')
1320 self.assertNotEqual(frame[0:4], b"\x28\xb5\x2f\xfd")
1295 1321
1296 1322 dctx = zstd.ZstdDecompressor()
1297 with self.assertRaisesRegexp(
1298 zstd.ZstdError, 'error determining content size from frame header'):
1323 with self.assertRaisesRegex(
1324 zstd.ZstdError, "error determining content size from frame header"
1325 ):
1299 1326 dctx.decompress(frame)
1300 1327
1301 1328 dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
1302 res = b''.join(dctx.read_to_iter(frame))
1303 self.assertEqual(res, b'foobar')
1329 res = b"".join(dctx.read_to_iter(frame))
1330 self.assertEqual(res, b"foobar")
1304 1331
1305 1332
1306 1333 @make_cffi
1307 class TestDecompressor_content_dict_chain(unittest.TestCase):
1334 class TestDecompressor_content_dict_chain(TestCase):
1308 1335 def test_bad_inputs_simple(self):
1309 1336 dctx = zstd.ZstdDecompressor()
1310 1337
1311 1338 with self.assertRaises(TypeError):
1312 dctx.decompress_content_dict_chain(b'foo')
1339 dctx.decompress_content_dict_chain(b"foo")
1313 1340
1314 1341 with self.assertRaises(TypeError):
1315 dctx.decompress_content_dict_chain((b'foo', b'bar'))
1342 dctx.decompress_content_dict_chain((b"foo", b"bar"))
1316 1343
1317 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
1344 with self.assertRaisesRegex(ValueError, "empty input chain"):
1318 1345 dctx.decompress_content_dict_chain([])
1319 1346
1320 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
1321 dctx.decompress_content_dict_chain([u'foo'])
1347 with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"):
1348 dctx.decompress_content_dict_chain([u"foo"])
1322 1349
1323 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
1350 with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"):
1324 1351 dctx.decompress_content_dict_chain([True])
1325 1352
1326 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
1353 with self.assertRaisesRegex(
1354 ValueError, "chunk 0 is too small to contain a zstd frame"
1355 ):
1327 1356 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
1328 1357
1329 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
1330 dctx.decompress_content_dict_chain([b'foo' * 8])
1358 with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"):
1359 dctx.decompress_content_dict_chain([b"foo" * 8])
1331 1360
1332 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
1361 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64)
1333 1362
1334 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
1363 with self.assertRaisesRegex(
1364 ValueError, "chunk 0 missing content size in frame"
1365 ):
1335 1366 dctx.decompress_content_dict_chain([no_size])
1336 1367
1337 1368 # Corrupt first frame.
1338 frame = zstd.ZstdCompressor().compress(b'foo' * 64)
1369 frame = zstd.ZstdCompressor().compress(b"foo" * 64)
1339 1370 frame = frame[0:12] + frame[15:]
1340 with self.assertRaisesRegexp(zstd.ZstdError,
1341 'chunk 0 did not decompress full frame'):
1371 with self.assertRaisesRegex(
1372 zstd.ZstdError, "chunk 0 did not decompress full frame"
1373 ):
1342 1374 dctx.decompress_content_dict_chain([frame])
1343 1375
1344 1376 def test_bad_subsequent_input(self):
1345 initial = zstd.ZstdCompressor().compress(b'foo' * 64)
1377 initial = zstd.ZstdCompressor().compress(b"foo" * 64)
1346 1378
1347 1379 dctx = zstd.ZstdDecompressor()
1348 1380
1349 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
1350 dctx.decompress_content_dict_chain([initial, u'foo'])
1381 with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"):
1382 dctx.decompress_content_dict_chain([initial, u"foo"])
1351 1383
1352 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
1384 with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"):
1353 1385 dctx.decompress_content_dict_chain([initial, None])
1354 1386
1355 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
1387 with self.assertRaisesRegex(
1388 ValueError, "chunk 1 is too small to contain a zstd frame"
1389 ):
1356 1390 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
1357 1391
1358 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
1359 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
1392 with self.assertRaisesRegex(ValueError, "chunk 1 is not a valid zstd frame"):
1393 dctx.decompress_content_dict_chain([initial, b"foo" * 8])
1360 1394
1361 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
1395 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64)
1362 1396
1363 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
1397 with self.assertRaisesRegex(
1398 ValueError, "chunk 1 missing content size in frame"
1399 ):
1364 1400 dctx.decompress_content_dict_chain([initial, no_size])
1365 1401
1366 1402 # Corrupt second frame.
1367 cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
1368 frame = cctx.compress(b'bar' * 64)
1403 cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b"foo" * 64))
1404 frame = cctx.compress(b"bar" * 64)
1369 1405 frame = frame[0:12] + frame[15:]
1370 1406
1371 with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'):
1407 with self.assertRaisesRegex(
1408 zstd.ZstdError, "chunk 1 did not decompress full frame"
1409 ):
1372 1410 dctx.decompress_content_dict_chain([initial, frame])
1373 1411
1374 1412 def test_simple(self):
1375 1413 original = [
1376 b'foo' * 64,
1377 b'foobar' * 64,
1378 b'baz' * 64,
1379 b'foobaz' * 64,
1380 b'foobarbaz' * 64,
1414 b"foo" * 64,
1415 b"foobar" * 64,
1416 b"baz" * 64,
1417 b"foobaz" * 64,
1418 b"foobarbaz" * 64,
1381 1419 ]
1382 1420
1383 1421 chunks = []
@@ -1396,12 +1434,12 class TestDecompressor_content_dict_chai
1396 1434
1397 1435
1398 1436 # TODO enable for CFFI
1399 class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
1437 class TestDecompressor_multi_decompress_to_buffer(TestCase):
1400 1438 def test_invalid_inputs(self):
1401 1439 dctx = zstd.ZstdDecompressor()
1402 1440
1403 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1404 self.skipTest('multi_decompress_to_buffer not available')
1441 if not hasattr(dctx, "multi_decompress_to_buffer"):
1442 self.skipTest("multi_decompress_to_buffer not available")
1405 1443
1406 1444 with self.assertRaises(TypeError):
1407 1445 dctx.multi_decompress_to_buffer(True)
@@ -1409,22 +1447,24 class TestDecompressor_multi_decompress_
1409 1447 with self.assertRaises(TypeError):
1410 1448 dctx.multi_decompress_to_buffer((1, 2))
1411 1449
1412 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1413 dctx.multi_decompress_to_buffer([u'foo'])
1450 with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"):
1451 dctx.multi_decompress_to_buffer([u"foo"])
1414 1452
1415 with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
1416 dctx.multi_decompress_to_buffer([b'foobarbaz'])
1453 with self.assertRaisesRegex(
1454 ValueError, "could not determine decompressed size of item 0"
1455 ):
1456 dctx.multi_decompress_to_buffer([b"foobarbaz"])
1417 1457
1418 1458 def test_list_input(self):
1419 1459 cctx = zstd.ZstdCompressor()
1420 1460
1421 original = [b'foo' * 4, b'bar' * 6]
1461 original = [b"foo" * 4, b"bar" * 6]
1422 1462 frames = [cctx.compress(d) for d in original]
1423 1463
1424 1464 dctx = zstd.ZstdDecompressor()
1425 1465
1426 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1427 self.skipTest('multi_decompress_to_buffer not available')
1466 if not hasattr(dctx, "multi_decompress_to_buffer"):
1467 self.skipTest("multi_decompress_to_buffer not available")
1428 1468
1429 1469 result = dctx.multi_decompress_to_buffer(frames)
1430 1470
@@ -1442,14 +1482,14 class TestDecompressor_multi_decompress_
1442 1482 def test_list_input_frame_sizes(self):
1443 1483 cctx = zstd.ZstdCompressor()
1444 1484
1445 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
1485 original = [b"foo" * 4, b"bar" * 6, b"baz" * 8]
1446 1486 frames = [cctx.compress(d) for d in original]
1447 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1487 sizes = struct.pack("=" + "Q" * len(original), *map(len, original))
1448 1488
1449 1489 dctx = zstd.ZstdDecompressor()
1450 1490
1451 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1452 self.skipTest('multi_decompress_to_buffer not available')
1491 if not hasattr(dctx, "multi_decompress_to_buffer"):
1492 self.skipTest("multi_decompress_to_buffer not available")
1453 1493
1454 1494 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
1455 1495
@@ -1462,16 +1502,18 class TestDecompressor_multi_decompress_
1462 1502 def test_buffer_with_segments_input(self):
1463 1503 cctx = zstd.ZstdCompressor()
1464 1504
1465 original = [b'foo' * 4, b'bar' * 6]
1505 original = [b"foo" * 4, b"bar" * 6]
1466 1506 frames = [cctx.compress(d) for d in original]
1467 1507
1468 1508 dctx = zstd.ZstdDecompressor()
1469 1509
1470 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1471 self.skipTest('multi_decompress_to_buffer not available')
1510 if not hasattr(dctx, "multi_decompress_to_buffer"):
1511 self.skipTest("multi_decompress_to_buffer not available")
1472 1512
1473 segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
1474 b = zstd.BufferWithSegments(b''.join(frames), segments)
1513 segments = struct.pack(
1514 "=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])
1515 )
1516 b = zstd.BufferWithSegments(b"".join(frames), segments)
1475 1517
1476 1518 result = dctx.multi_decompress_to_buffer(b)
1477 1519
@@ -1483,19 +1525,25 class TestDecompressor_multi_decompress_
1483 1525
1484 1526 def test_buffer_with_segments_sizes(self):
1485 1527 cctx = zstd.ZstdCompressor(write_content_size=False)
1486 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
1528 original = [b"foo" * 4, b"bar" * 6, b"baz" * 8]
1487 1529 frames = [cctx.compress(d) for d in original]
1488 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1530 sizes = struct.pack("=" + "Q" * len(original), *map(len, original))
1489 1531
1490 1532 dctx = zstd.ZstdDecompressor()
1491 1533
1492 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1493 self.skipTest('multi_decompress_to_buffer not available')
1534 if not hasattr(dctx, "multi_decompress_to_buffer"):
1535 self.skipTest("multi_decompress_to_buffer not available")
1494 1536
1495 segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
1496 len(frames[0]), len(frames[1]),
1497 len(frames[0]) + len(frames[1]), len(frames[2]))
1498 b = zstd.BufferWithSegments(b''.join(frames), segments)
1537 segments = struct.pack(
1538 "=QQQQQQ",
1539 0,
1540 len(frames[0]),
1541 len(frames[0]),
1542 len(frames[1]),
1543 len(frames[0]) + len(frames[1]),
1544 len(frames[2]),
1545 )
1546 b = zstd.BufferWithSegments(b"".join(frames), segments)
1499 1547
1500 1548 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
1501 1549
@@ -1509,15 +1557,15 class TestDecompressor_multi_decompress_
1509 1557 cctx = zstd.ZstdCompressor()
1510 1558
1511 1559 original = [
1512 b'foo0' * 2,
1513 b'foo1' * 3,
1514 b'foo2' * 4,
1515 b'foo3' * 5,
1516 b'foo4' * 6,
1560 b"foo0" * 2,
1561 b"foo1" * 3,
1562 b"foo2" * 4,
1563 b"foo3" * 5,
1564 b"foo4" * 6,
1517 1565 ]
1518 1566
1519 if not hasattr(cctx, 'multi_compress_to_buffer'):
1520 self.skipTest('multi_compress_to_buffer not available')
1567 if not hasattr(cctx, "multi_compress_to_buffer"):
1568 self.skipTest("multi_compress_to_buffer not available")
1521 1569
1522 1570 frames = cctx.multi_compress_to_buffer(original)
1523 1571
@@ -1532,16 +1580,24 class TestDecompressor_multi_decompress_
1532 1580 self.assertEqual(data, decompressed[i].tobytes())
1533 1581
1534 1582 # And a manual mode.
1535 b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
1536 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1537 0, len(frames[0]),
1538 len(frames[0]), len(frames[1])))
1583 b = b"".join([frames[0].tobytes(), frames[1].tobytes()])
1584 b1 = zstd.BufferWithSegments(
1585 b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]))
1586 )
1539 1587
1540 b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
1541 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1542 0, len(frames[2]),
1543 len(frames[2]), len(frames[3]),
1544 len(frames[2]) + len(frames[3]), len(frames[4])))
1588 b = b"".join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
1589 b2 = zstd.BufferWithSegments(
1590 b,
1591 struct.pack(
1592 "=QQQQQQ",
1593 0,
1594 len(frames[2]),
1595 len(frames[2]),
1596 len(frames[3]),
1597 len(frames[2]) + len(frames[3]),
1598 len(frames[4]),
1599 ),
1600 )
1545 1601
1546 1602 c = zstd.BufferWithSegmentsCollection(b1, b2)
1547 1603
@@ -1560,8 +1616,8 class TestDecompressor_multi_decompress_
1560 1616
1561 1617 dctx = zstd.ZstdDecompressor(dict_data=d)
1562 1618
1563 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1564 self.skipTest('multi_decompress_to_buffer not available')
1619 if not hasattr(dctx, "multi_decompress_to_buffer"):
1620 self.skipTest("multi_decompress_to_buffer not available")
1565 1621
1566 1622 result = dctx.multi_decompress_to_buffer(frames)
1567 1623
@@ -1571,41 +1627,44 class TestDecompressor_multi_decompress_
1571 1627 cctx = zstd.ZstdCompressor()
1572 1628
1573 1629 frames = []
1574 frames.extend(cctx.compress(b'x' * 64) for i in range(256))
1575 frames.extend(cctx.compress(b'y' * 64) for i in range(256))
1630 frames.extend(cctx.compress(b"x" * 64) for i in range(256))
1631 frames.extend(cctx.compress(b"y" * 64) for i in range(256))
1576 1632
1577 1633 dctx = zstd.ZstdDecompressor()
1578 1634
1579 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1580 self.skipTest('multi_decompress_to_buffer not available')
1635 if not hasattr(dctx, "multi_decompress_to_buffer"):
1636 self.skipTest("multi_decompress_to_buffer not available")
1581 1637
1582 1638 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
1583 1639
1584 1640 self.assertEqual(len(result), len(frames))
1585 1641 self.assertEqual(result.size(), 2 * 64 * 256)
1586 self.assertEqual(result[0].tobytes(), b'x' * 64)
1587 self.assertEqual(result[256].tobytes(), b'y' * 64)
1642 self.assertEqual(result[0].tobytes(), b"x" * 64)
1643 self.assertEqual(result[256].tobytes(), b"y" * 64)
1588 1644
1589 1645 def test_item_failure(self):
1590 1646 cctx = zstd.ZstdCompressor()
1591 frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
1647 frames = [cctx.compress(b"x" * 128), cctx.compress(b"y" * 128)]
1592 1648
1593 frames[1] = frames[1][0:15] + b'extra' + frames[1][15:]
1649 frames[1] = frames[1][0:15] + b"extra" + frames[1][15:]
1594 1650
1595 1651 dctx = zstd.ZstdDecompressor()
1596 1652
1597 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1598 self.skipTest('multi_decompress_to_buffer not available')
1653 if not hasattr(dctx, "multi_decompress_to_buffer"):
1654 self.skipTest("multi_decompress_to_buffer not available")
1599 1655
1600 with self.assertRaisesRegexp(zstd.ZstdError,
1601 'error decompressing item 1: ('
1602 'Corrupted block|'
1603 'Destination buffer is too small)'):
1656 with self.assertRaisesRegex(
1657 zstd.ZstdError,
1658 "error decompressing item 1: ("
1659 "Corrupted block|"
1660 "Destination buffer is too small)",
1661 ):
1604 1662 dctx.multi_decompress_to_buffer(frames)
1605 1663
1606 with self.assertRaisesRegexp(zstd.ZstdError,
1607 'error decompressing item 1: ('
1608 'Corrupted block|'
1609 'Destination buffer is too small)'):
1664 with self.assertRaisesRegex(
1665 zstd.ZstdError,
1666 "error decompressing item 1: ("
1667 "Corrupted block|"
1668 "Destination buffer is too small)",
1669 ):
1610 1670 dctx.multi_decompress_to_buffer(frames, threads=2)
1611
@@ -6,29 +6,37 try:
6 6 import hypothesis
7 7 import hypothesis.strategies as strategies
8 8 except ImportError:
9 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest("hypothesis not available")
10 10
11 11 import zstandard as zstd
12 12
13 from . common import (
13 from .common import (
14 14 make_cffi,
15 15 NonClosingBytesIO,
16 16 random_input_data,
17 TestCase,
17 18 )
18 19
19 20
20 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
21 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
21 22 @make_cffi
22 class TestDecompressor_stream_reader_fuzzing(unittest.TestCase):
23 class TestDecompressor_stream_reader_fuzzing(TestCase):
23 24 @hypothesis.settings(
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
26 level=strategies.integers(min_value=1, max_value=5),
27 streaming=strategies.booleans(),
28 source_read_size=strategies.integers(1, 1048576),
29 read_sizes=strategies.data())
30 def test_stream_source_read_variance(self, original, level, streaming,
31 source_read_size, read_sizes):
25 suppress_health_check=[
26 hypothesis.HealthCheck.large_base_example,
27 hypothesis.HealthCheck.too_slow,
28 ]
29 )
30 @hypothesis.given(
31 original=strategies.sampled_from(random_input_data()),
32 level=strategies.integers(min_value=1, max_value=5),
33 streaming=strategies.booleans(),
34 source_read_size=strategies.integers(1, 1048576),
35 read_sizes=strategies.data(),
36 )
37 def test_stream_source_read_variance(
38 self, original, level, streaming, source_read_size, read_sizes
39 ):
32 40 cctx = zstd.ZstdCompressor(level=level)
33 41
34 42 if streaming:
@@ -53,18 +61,22 class TestDecompressor_stream_reader_fuz
53 61
54 62 chunks.append(chunk)
55 63
56 self.assertEqual(b''.join(chunks), original)
64 self.assertEqual(b"".join(chunks), original)
57 65
58 66 # Similar to above except we have a constant read() size.
59 67 @hypothesis.settings(
60 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
61 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
62 level=strategies.integers(min_value=1, max_value=5),
63 streaming=strategies.booleans(),
64 source_read_size=strategies.integers(1, 1048576),
65 read_size=strategies.integers(-1, 131072))
66 def test_stream_source_read_size(self, original, level, streaming,
67 source_read_size, read_size):
68 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
69 )
70 @hypothesis.given(
71 original=strategies.sampled_from(random_input_data()),
72 level=strategies.integers(min_value=1, max_value=5),
73 streaming=strategies.booleans(),
74 source_read_size=strategies.integers(1, 1048576),
75 read_size=strategies.integers(-1, 131072),
76 )
77 def test_stream_source_read_size(
78 self, original, level, streaming, source_read_size, read_size
79 ):
68 80 if read_size == 0:
69 81 read_size = 1
70 82
@@ -91,17 +103,24 class TestDecompressor_stream_reader_fuz
91 103
92 104 chunks.append(chunk)
93 105
94 self.assertEqual(b''.join(chunks), original)
106 self.assertEqual(b"".join(chunks), original)
95 107
96 108 @hypothesis.settings(
97 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
98 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
99 level=strategies.integers(min_value=1, max_value=5),
100 streaming=strategies.booleans(),
101 source_read_size=strategies.integers(1, 1048576),
102 read_sizes=strategies.data())
103 def test_buffer_source_read_variance(self, original, level, streaming,
104 source_read_size, read_sizes):
109 suppress_health_check=[
110 hypothesis.HealthCheck.large_base_example,
111 hypothesis.HealthCheck.too_slow,
112 ]
113 )
114 @hypothesis.given(
115 original=strategies.sampled_from(random_input_data()),
116 level=strategies.integers(min_value=1, max_value=5),
117 streaming=strategies.booleans(),
118 source_read_size=strategies.integers(1, 1048576),
119 read_sizes=strategies.data(),
120 )
121 def test_buffer_source_read_variance(
122 self, original, level, streaming, source_read_size, read_sizes
123 ):
105 124 cctx = zstd.ZstdCompressor(level=level)
106 125
107 126 if streaming:
@@ -125,18 +144,22 class TestDecompressor_stream_reader_fuz
125 144
126 145 chunks.append(chunk)
127 146
128 self.assertEqual(b''.join(chunks), original)
147 self.assertEqual(b"".join(chunks), original)
129 148
130 149 # Similar to above except we have a constant read() size.
131 150 @hypothesis.settings(
132 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
133 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
134 level=strategies.integers(min_value=1, max_value=5),
135 streaming=strategies.booleans(),
136 source_read_size=strategies.integers(1, 1048576),
137 read_size=strategies.integers(-1, 131072))
138 def test_buffer_source_constant_read_size(self, original, level, streaming,
139 source_read_size, read_size):
151 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
152 )
153 @hypothesis.given(
154 original=strategies.sampled_from(random_input_data()),
155 level=strategies.integers(min_value=1, max_value=5),
156 streaming=strategies.booleans(),
157 source_read_size=strategies.integers(1, 1048576),
158 read_size=strategies.integers(-1, 131072),
159 )
160 def test_buffer_source_constant_read_size(
161 self, original, level, streaming, source_read_size, read_size
162 ):
140 163 if read_size == 0:
141 164 read_size = -1
142 165
@@ -162,16 +185,18 class TestDecompressor_stream_reader_fuz
162 185
163 186 chunks.append(chunk)
164 187
165 self.assertEqual(b''.join(chunks), original)
188 self.assertEqual(b"".join(chunks), original)
166 189
167 190 @hypothesis.settings(
168 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
169 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
170 level=strategies.integers(min_value=1, max_value=5),
171 streaming=strategies.booleans(),
172 source_read_size=strategies.integers(1, 1048576))
173 def test_stream_source_readall(self, original, level, streaming,
174 source_read_size):
191 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
192 )
193 @hypothesis.given(
194 original=strategies.sampled_from(random_input_data()),
195 level=strategies.integers(min_value=1, max_value=5),
196 streaming=strategies.booleans(),
197 source_read_size=strategies.integers(1, 1048576),
198 )
199 def test_stream_source_readall(self, original, level, streaming, source_read_size):
175 200 cctx = zstd.ZstdCompressor(level=level)
176 201
177 202 if streaming:
@@ -190,14 +215,21 class TestDecompressor_stream_reader_fuz
190 215 self.assertEqual(data, original)
191 216
192 217 @hypothesis.settings(
193 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
194 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
195 level=strategies.integers(min_value=1, max_value=5),
196 streaming=strategies.booleans(),
197 source_read_size=strategies.integers(1, 1048576),
198 read_sizes=strategies.data())
199 def test_stream_source_read1_variance(self, original, level, streaming,
200 source_read_size, read_sizes):
218 suppress_health_check=[
219 hypothesis.HealthCheck.large_base_example,
220 hypothesis.HealthCheck.too_slow,
221 ]
222 )
223 @hypothesis.given(
224 original=strategies.sampled_from(random_input_data()),
225 level=strategies.integers(min_value=1, max_value=5),
226 streaming=strategies.booleans(),
227 source_read_size=strategies.integers(1, 1048576),
228 read_sizes=strategies.data(),
229 )
230 def test_stream_source_read1_variance(
231 self, original, level, streaming, source_read_size, read_sizes
232 ):
201 233 cctx = zstd.ZstdCompressor(level=level)
202 234
203 235 if streaming:
@@ -222,17 +254,24 class TestDecompressor_stream_reader_fuz
222 254
223 255 chunks.append(chunk)
224 256
225 self.assertEqual(b''.join(chunks), original)
257 self.assertEqual(b"".join(chunks), original)
226 258
227 259 @hypothesis.settings(
228 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
229 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
230 level=strategies.integers(min_value=1, max_value=5),
231 streaming=strategies.booleans(),
232 source_read_size=strategies.integers(1, 1048576),
233 read_sizes=strategies.data())
234 def test_stream_source_readinto1_variance(self, original, level, streaming,
235 source_read_size, read_sizes):
260 suppress_health_check=[
261 hypothesis.HealthCheck.large_base_example,
262 hypothesis.HealthCheck.too_slow,
263 ]
264 )
265 @hypothesis.given(
266 original=strategies.sampled_from(random_input_data()),
267 level=strategies.integers(min_value=1, max_value=5),
268 streaming=strategies.booleans(),
269 source_read_size=strategies.integers(1, 1048576),
270 read_sizes=strategies.data(),
271 )
272 def test_stream_source_readinto1_variance(
273 self, original, level, streaming, source_read_size, read_sizes
274 ):
236 275 cctx = zstd.ZstdCompressor(level=level)
237 276
238 277 if streaming:
@@ -259,18 +298,24 class TestDecompressor_stream_reader_fuz
259 298
260 299 chunks.append(bytes(b[0:count]))
261 300
262 self.assertEqual(b''.join(chunks), original)
301 self.assertEqual(b"".join(chunks), original)
263 302
264 303 @hypothesis.settings(
265 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
304 suppress_health_check=[
305 hypothesis.HealthCheck.large_base_example,
306 hypothesis.HealthCheck.too_slow,
307 ]
308 )
266 309 @hypothesis.given(
267 310 original=strategies.sampled_from(random_input_data()),
268 311 level=strategies.integers(min_value=1, max_value=5),
269 312 source_read_size=strategies.integers(1, 1048576),
270 313 seek_amounts=strategies.data(),
271 read_sizes=strategies.data())
272 def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
273 read_sizes):
314 read_sizes=strategies.data(),
315 )
316 def test_relative_seeks(
317 self, original, level, source_read_size, seek_amounts, read_sizes
318 ):
274 319 cctx = zstd.ZstdCompressor(level=level)
275 320 frame = cctx.compress(original)
276 321
@@ -288,18 +333,24 class TestDecompressor_stream_reader_fuz
288 333 if not chunk:
289 334 break
290 335
291 self.assertEqual(original[offset:offset + len(chunk)], chunk)
336 self.assertEqual(original[offset : offset + len(chunk)], chunk)
292 337
293 338 @hypothesis.settings(
294 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
339 suppress_health_check=[
340 hypothesis.HealthCheck.large_base_example,
341 hypothesis.HealthCheck.too_slow,
342 ]
343 )
295 344 @hypothesis.given(
296 345 originals=strategies.data(),
297 346 frame_count=strategies.integers(min_value=2, max_value=10),
298 347 level=strategies.integers(min_value=1, max_value=5),
299 348 source_read_size=strategies.integers(1, 1048576),
300 read_sizes=strategies.data())
301 def test_multiple_frames(self, originals, frame_count, level,
302 source_read_size, read_sizes):
349 read_sizes=strategies.data(),
350 )
351 def test_multiple_frames(
352 self, originals, frame_count, level, source_read_size, read_sizes
353 ):
303 354
304 355 cctx = zstd.ZstdCompressor(level=level)
305 356 source = io.BytesIO()
@@ -314,8 +365,9 class TestDecompressor_stream_reader_fuz
314 365
315 366 dctx = zstd.ZstdDecompressor()
316 367 buffer.seek(0)
317 reader = dctx.stream_reader(buffer, read_size=source_read_size,
318 read_across_frames=True)
368 reader = dctx.stream_reader(
369 buffer, read_size=source_read_size, read_across_frames=True
370 )
319 371
320 372 chunks = []
321 373
@@ -328,16 +380,24 class TestDecompressor_stream_reader_fuz
328 380
329 381 chunks.append(chunk)
330 382
331 self.assertEqual(source.getvalue(), b''.join(chunks))
383 self.assertEqual(source.getvalue(), b"".join(chunks))
332 384
333 385
334 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
386 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
335 387 @make_cffi
336 class TestDecompressor_stream_writer_fuzzing(unittest.TestCase):
337 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
338 level=strategies.integers(min_value=1, max_value=5),
339 write_size=strategies.integers(min_value=1, max_value=8192),
340 input_sizes=strategies.data())
388 class TestDecompressor_stream_writer_fuzzing(TestCase):
389 @hypothesis.settings(
390 suppress_health_check=[
391 hypothesis.HealthCheck.large_base_example,
392 hypothesis.HealthCheck.too_slow,
393 ]
394 )
395 @hypothesis.given(
396 original=strategies.sampled_from(random_input_data()),
397 level=strategies.integers(min_value=1, max_value=5),
398 write_size=strategies.integers(min_value=1, max_value=8192),
399 input_sizes=strategies.data(),
400 )
341 401 def test_write_size_variance(self, original, level, write_size, input_sizes):
342 402 cctx = zstd.ZstdCompressor(level=level)
343 403 frame = cctx.compress(original)
@@ -358,13 +418,21 class TestDecompressor_stream_writer_fuz
358 418 self.assertEqual(dest.getvalue(), original)
359 419
360 420
361 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
421 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
362 422 @make_cffi
363 class TestDecompressor_copy_stream_fuzzing(unittest.TestCase):
364 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
365 level=strategies.integers(min_value=1, max_value=5),
366 read_size=strategies.integers(min_value=1, max_value=8192),
367 write_size=strategies.integers(min_value=1, max_value=8192))
423 class TestDecompressor_copy_stream_fuzzing(TestCase):
424 @hypothesis.settings(
425 suppress_health_check=[
426 hypothesis.HealthCheck.large_base_example,
427 hypothesis.HealthCheck.too_slow,
428 ]
429 )
430 @hypothesis.given(
431 original=strategies.sampled_from(random_input_data()),
432 level=strategies.integers(min_value=1, max_value=5),
433 read_size=strategies.integers(min_value=1, max_value=8192),
434 write_size=strategies.integers(min_value=1, max_value=8192),
435 )
368 436 def test_read_write_size_variance(self, original, level, read_size, write_size):
369 437 cctx = zstd.ZstdCompressor(level=level)
370 438 frame = cctx.compress(original)
@@ -378,12 +446,20 class TestDecompressor_copy_stream_fuzzi
378 446 self.assertEqual(dest.getvalue(), original)
379 447
380 448
381 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
449 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
382 450 @make_cffi
383 class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
384 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
385 level=strategies.integers(min_value=1, max_value=5),
386 chunk_sizes=strategies.data())
451 class TestDecompressor_decompressobj_fuzzing(TestCase):
452 @hypothesis.settings(
453 suppress_health_check=[
454 hypothesis.HealthCheck.large_base_example,
455 hypothesis.HealthCheck.too_slow,
456 ]
457 )
458 @hypothesis.given(
459 original=strategies.sampled_from(random_input_data()),
460 level=strategies.integers(min_value=1, max_value=5),
461 chunk_sizes=strategies.data(),
462 )
387 463 def test_random_input_sizes(self, original, level, chunk_sizes):
388 464 cctx = zstd.ZstdCompressor(level=level)
389 465 frame = cctx.compress(original)
@@ -402,13 +478,22 class TestDecompressor_decompressobj_fuz
402 478
403 479 chunks.append(dobj.decompress(chunk))
404 480
405 self.assertEqual(b''.join(chunks), original)
481 self.assertEqual(b"".join(chunks), original)
406 482
407 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
408 level=strategies.integers(min_value=1, max_value=5),
409 write_size=strategies.integers(min_value=1,
410 max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE),
411 chunk_sizes=strategies.data())
483 @hypothesis.settings(
484 suppress_health_check=[
485 hypothesis.HealthCheck.large_base_example,
486 hypothesis.HealthCheck.too_slow,
487 ]
488 )
489 @hypothesis.given(
490 original=strategies.sampled_from(random_input_data()),
491 level=strategies.integers(min_value=1, max_value=5),
492 write_size=strategies.integers(
493 min_value=1, max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
494 ),
495 chunk_sizes=strategies.data(),
496 )
412 497 def test_random_output_sizes(self, original, level, write_size, chunk_sizes):
413 498 cctx = zstd.ZstdCompressor(level=level)
414 499 frame = cctx.compress(original)
@@ -427,16 +512,18 class TestDecompressor_decompressobj_fuz
427 512
428 513 chunks.append(dobj.decompress(chunk))
429 514
430 self.assertEqual(b''.join(chunks), original)
515 self.assertEqual(b"".join(chunks), original)
431 516
432 517
433 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
518 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
434 519 @make_cffi
435 class TestDecompressor_read_to_iter_fuzzing(unittest.TestCase):
436 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
437 level=strategies.integers(min_value=1, max_value=5),
438 read_size=strategies.integers(min_value=1, max_value=4096),
439 write_size=strategies.integers(min_value=1, max_value=4096))
520 class TestDecompressor_read_to_iter_fuzzing(TestCase):
521 @hypothesis.given(
522 original=strategies.sampled_from(random_input_data()),
523 level=strategies.integers(min_value=1, max_value=5),
524 read_size=strategies.integers(min_value=1, max_value=4096),
525 write_size=strategies.integers(min_value=1, max_value=4096),
526 )
440 527 def test_read_write_size_variance(self, original, level, read_size, write_size):
441 528 cctx = zstd.ZstdCompressor(level=level)
442 529 frame = cctx.compress(original)
@@ -444,29 +531,33 class TestDecompressor_read_to_iter_fuzz
444 531 source = io.BytesIO(frame)
445 532
446 533 dctx = zstd.ZstdDecompressor()
447 chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size))
534 chunks = list(
535 dctx.read_to_iter(source, read_size=read_size, write_size=write_size)
536 )
448 537
449 self.assertEqual(b''.join(chunks), original)
538 self.assertEqual(b"".join(chunks), original)
450 539
451 540
452 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
453 class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase):
454 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
455 min_size=1, max_size=1024),
456 threads=strategies.integers(min_value=1, max_value=8),
457 use_dict=strategies.booleans())
541 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
542 class TestDecompressor_multi_decompress_to_buffer_fuzzing(TestCase):
543 @hypothesis.given(
544 original=strategies.lists(
545 strategies.sampled_from(random_input_data()), min_size=1, max_size=1024
546 ),
547 threads=strategies.integers(min_value=1, max_value=8),
548 use_dict=strategies.booleans(),
549 )
458 550 def test_data_equivalence(self, original, threads, use_dict):
459 551 kwargs = {}
460 552 if use_dict:
461 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
553 kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])
462 554
463 cctx = zstd.ZstdCompressor(level=1,
464 write_content_size=True,
465 write_checksum=True,
466 **kwargs)
555 cctx = zstd.ZstdCompressor(
556 level=1, write_content_size=True, write_checksum=True, **kwargs
557 )
467 558
468 if not hasattr(cctx, 'multi_compress_to_buffer'):
469 self.skipTest('multi_compress_to_buffer not available')
559 if not hasattr(cctx, "multi_compress_to_buffer"):
560 self.skipTest("multi_compress_to_buffer not available")
470 561
471 562 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
472 563
@@ -2,14 +2,14 import unittest
2 2
3 3 import zstandard as zstd
4 4
5 from . common import (
5 from .common import (
6 6 make_cffi,
7 TestCase,
7 8 )
8 9
9 10
10 11 @make_cffi
11 class TestSizes(unittest.TestCase):
12 class TestSizes(TestCase):
12 13 def test_decompression_size(self):
13 14 size = zstd.estimate_decompression_context_size()
14 15 self.assertGreater(size, 100000)
15
@@ -4,65 +4,66 import unittest
4 4
5 5 import zstandard as zstd
6 6
7 from . common import (
7 from .common import (
8 8 make_cffi,
9 TestCase,
9 10 )
10 11
11 12
12 13 @make_cffi
13 class TestModuleAttributes(unittest.TestCase):
14 class TestModuleAttributes(TestCase):
14 15 def test_version(self):
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 3))
16 self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 4))
16 17
17 self.assertEqual(zstd.__version__, '0.12.0')
18 self.assertEqual(zstd.__version__, "0.13.0")
18 19
19 20 def test_constants(self):
20 21 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
21 self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
22 self.assertEqual(zstd.FRAME_HEADER, b"\x28\xb5\x2f\xfd")
22 23
23 24 def test_hasattr(self):
24 25 attrs = (
25 'CONTENTSIZE_UNKNOWN',
26 'CONTENTSIZE_ERROR',
27 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
28 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
31 'MAGIC_NUMBER',
32 'FLUSH_BLOCK',
33 'FLUSH_FRAME',
34 'BLOCKSIZELOG_MAX',
35 'BLOCKSIZE_MAX',
36 'WINDOWLOG_MIN',
37 'WINDOWLOG_MAX',
38 'CHAINLOG_MIN',
39 'CHAINLOG_MAX',
40 'HASHLOG_MIN',
41 'HASHLOG_MAX',
42 'HASHLOG3_MAX',
43 'MINMATCH_MIN',
44 'MINMATCH_MAX',
45 'SEARCHLOG_MIN',
46 'SEARCHLOG_MAX',
47 'SEARCHLENGTH_MIN',
48 'SEARCHLENGTH_MAX',
49 'TARGETLENGTH_MIN',
50 'TARGETLENGTH_MAX',
51 'LDM_MINMATCH_MIN',
52 'LDM_MINMATCH_MAX',
53 'LDM_BUCKETSIZELOG_MAX',
54 'STRATEGY_FAST',
55 'STRATEGY_DFAST',
56 'STRATEGY_GREEDY',
57 'STRATEGY_LAZY',
58 'STRATEGY_LAZY2',
59 'STRATEGY_BTLAZY2',
60 'STRATEGY_BTOPT',
61 'STRATEGY_BTULTRA',
62 'STRATEGY_BTULTRA2',
63 'DICT_TYPE_AUTO',
64 'DICT_TYPE_RAWCONTENT',
65 'DICT_TYPE_FULLDICT',
26 "CONTENTSIZE_UNKNOWN",
27 "CONTENTSIZE_ERROR",
28 "COMPRESSION_RECOMMENDED_INPUT_SIZE",
29 "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
30 "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
31 "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
32 "MAGIC_NUMBER",
33 "FLUSH_BLOCK",
34 "FLUSH_FRAME",
35 "BLOCKSIZELOG_MAX",
36 "BLOCKSIZE_MAX",
37 "WINDOWLOG_MIN",
38 "WINDOWLOG_MAX",
39 "CHAINLOG_MIN",
40 "CHAINLOG_MAX",
41 "HASHLOG_MIN",
42 "HASHLOG_MAX",
43 "HASHLOG3_MAX",
44 "MINMATCH_MIN",
45 "MINMATCH_MAX",
46 "SEARCHLOG_MIN",
47 "SEARCHLOG_MAX",
48 "SEARCHLENGTH_MIN",
49 "SEARCHLENGTH_MAX",
50 "TARGETLENGTH_MIN",
51 "TARGETLENGTH_MAX",
52 "LDM_MINMATCH_MIN",
53 "LDM_MINMATCH_MAX",
54 "LDM_BUCKETSIZELOG_MAX",
55 "STRATEGY_FAST",
56 "STRATEGY_DFAST",
57 "STRATEGY_GREEDY",
58 "STRATEGY_LAZY",
59 "STRATEGY_LAZY2",
60 "STRATEGY_BTLAZY2",
61 "STRATEGY_BTOPT",
62 "STRATEGY_BTULTRA",
63 "STRATEGY_BTULTRA2",
64 "DICT_TYPE_AUTO",
65 "DICT_TYPE_RAWCONTENT",
66 "DICT_TYPE_FULLDICT",
66 67 )
67 68
68 69 for a in attrs:
@@ -4,10 +4,11 import unittest
4 4
5 5 import zstandard as zstd
6 6
7 from . common import (
7 from .common import (
8 8 generate_samples,
9 9 make_cffi,
10 10 random_input_data,
11 TestCase,
11 12 )
12 13
13 14 if sys.version_info[0] >= 3:
@@ -17,24 +18,24 else:
17 18
18 19
19 20 @make_cffi
20 class TestTrainDictionary(unittest.TestCase):
21 class TestTrainDictionary(TestCase):
21 22 def test_no_args(self):
22 23 with self.assertRaises(TypeError):
23 24 zstd.train_dictionary()
24 25
25 26 def test_bad_args(self):
26 27 with self.assertRaises(TypeError):
27 zstd.train_dictionary(8192, u'foo')
28 zstd.train_dictionary(8192, u"foo")
28 29
29 30 with self.assertRaises(ValueError):
30 zstd.train_dictionary(8192, [u'foo'])
31 zstd.train_dictionary(8192, [u"foo"])
31 32
32 33 def test_no_params(self):
33 34 d = zstd.train_dictionary(8192, random_input_data())
34 35 self.assertIsInstance(d.dict_id(), int_type)
35 36
36 37 # The dictionary ID may be different across platforms.
37 expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
38 expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id())
38 39
39 40 data = d.as_bytes()
40 41 self.assertEqual(data[0:8], expected)
@@ -44,46 +45,48 class TestTrainDictionary(unittest.TestC
44 45 self.assertIsInstance(d.dict_id(), int_type)
45 46
46 47 data = d.as_bytes()
47 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
48 self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")
48 49
49 50 self.assertEqual(d.k, 64)
50 51 self.assertEqual(d.d, 16)
51 52
52 53 def test_set_dict_id(self):
53 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
54 dict_id=42)
54 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, dict_id=42)
55 55 self.assertEqual(d.dict_id(), 42)
56 56
57 57 def test_optimize(self):
58 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
59 d=16)
58 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16)
60 59
61 60 # This varies by platform.
62 61 self.assertIn(d.k, (50, 2000))
63 62 self.assertEqual(d.d, 16)
64 63
64
65 65 @make_cffi
66 class TestCompressionDict(unittest.TestCase):
66 class TestCompressionDict(TestCase):
67 67 def test_bad_mode(self):
68 with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
69 zstd.ZstdCompressionDict(b'foo', dict_type=42)
68 with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
69 zstd.ZstdCompressionDict(b"foo", dict_type=42)
70 70
71 71 def test_bad_precompute_compress(self):
72 72 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
73 73
74 with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
74 with self.assertRaisesRegex(ValueError, "must specify one of level or "):
75 75 d.precompute_compress()
76 76
77 with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
78 d.precompute_compress(level=3,
79 compression_params=zstd.CompressionParameters())
77 with self.assertRaisesRegex(ValueError, "must only specify one of level or "):
78 d.precompute_compress(
79 level=3, compression_params=zstd.CompressionParameters()
80 )
80 81
81 82 def test_precompute_compress_rawcontent(self):
82 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
83 dict_type=zstd.DICT_TYPE_RAWCONTENT)
83 d = zstd.ZstdCompressionDict(
84 b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT
85 )
84 86 d.precompute_compress(level=1)
85 87
86 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
87 dict_type=zstd.DICT_TYPE_FULLDICT)
88 with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
88 d = zstd.ZstdCompressionDict(
89 b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT
90 )
91 with self.assertRaisesRegex(zstd.ZstdError, "unable to precompute dictionary"):
89 92 d.precompute_compress(level=1)
@@ -28,38 +28,48 import platform
28 28 # defining a variable and `setup.py` could write the file with whatever
29 29 # policy was specified at build time. Until someone needs it, we go with
30 30 # the hacky but simple environment variable approach.
31 _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
31 _module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default")
32 32
33 if _module_policy == 'default':
34 if platform.python_implementation() in ('CPython',):
33 if _module_policy == "default":
34 if platform.python_implementation() in ("CPython",):
35 35 from zstd import *
36 backend = 'cext'
37 elif platform.python_implementation() in ('PyPy',):
36
37 backend = "cext"
38 elif platform.python_implementation() in ("PyPy",):
38 39 from .cffi import *
39 backend = 'cffi'
40
41 backend = "cffi"
40 42 else:
41 43 try:
42 44 from zstd import *
43 backend = 'cext'
45
46 backend = "cext"
44 47 except ImportError:
45 48 from .cffi import *
46 backend = 'cffi'
47 elif _module_policy == 'cffi_fallback':
49
50 backend = "cffi"
51 elif _module_policy == "cffi_fallback":
48 52 try:
49 53 from zstd import *
50 backend = 'cext'
54
55 backend = "cext"
51 56 except ImportError:
52 57 from .cffi import *
53 backend = 'cffi'
54 elif _module_policy == 'cext':
58
59 backend = "cffi"
60 elif _module_policy == "cext":
55 61 from zstd import *
56 backend = 'cext'
57 elif _module_policy == 'cffi':
62
63 backend = "cext"
64 elif _module_policy == "cffi":
58 65 from .cffi import *
59 backend = 'cffi'
66
67 backend = "cffi"
60 68 else:
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
62 'cext, or cffi' % _module_policy)
69 raise ImportError(
70 "unknown module import policy: %s; use default, cffi_fallback, "
71 "cext, or cffi" % _module_policy
72 )
63 73
64 74 # Keep this in sync with python-zstandard.h.
65 __version__ = '0.12.0'
75 __version__ = "0.13.0"
This diff has been collapsed as it changes many lines, (1196 lines changed) Show them Hide them
@@ -14,68 +14,67 from __future__ import absolute_import,
14 14 #'BufferSegments',
15 15 #'BufferWithSegments',
16 16 #'BufferWithSegmentsCollection',
17 'CompressionParameters',
18 'ZstdCompressionDict',
19 'ZstdCompressionParameters',
20 'ZstdCompressor',
21 'ZstdError',
22 'ZstdDecompressor',
23 'FrameParameters',
24 'estimate_decompression_context_size',
25 'frame_content_size',
26 'frame_header_size',
27 'get_frame_parameters',
28 'train_dictionary',
29
17 "CompressionParameters",
18 "ZstdCompressionDict",
19 "ZstdCompressionParameters",
20 "ZstdCompressor",
21 "ZstdError",
22 "ZstdDecompressor",
23 "FrameParameters",
24 "estimate_decompression_context_size",
25 "frame_content_size",
26 "frame_header_size",
27 "get_frame_parameters",
28 "train_dictionary",
30 29 # Constants.
31 'FLUSH_BLOCK',
32 'FLUSH_FRAME',
33 'COMPRESSOBJ_FLUSH_FINISH',
34 'COMPRESSOBJ_FLUSH_BLOCK',
35 'ZSTD_VERSION',
36 'FRAME_HEADER',
37 'CONTENTSIZE_UNKNOWN',
38 'CONTENTSIZE_ERROR',
39 'MAX_COMPRESSION_LEVEL',
40 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
41 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
42 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
43 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
44 'MAGIC_NUMBER',
45 'BLOCKSIZELOG_MAX',
46 'BLOCKSIZE_MAX',
47 'WINDOWLOG_MIN',
48 'WINDOWLOG_MAX',
49 'CHAINLOG_MIN',
50 'CHAINLOG_MAX',
51 'HASHLOG_MIN',
52 'HASHLOG_MAX',
53 'HASHLOG3_MAX',
54 'MINMATCH_MIN',
55 'MINMATCH_MAX',
56 'SEARCHLOG_MIN',
57 'SEARCHLOG_MAX',
58 'SEARCHLENGTH_MIN',
59 'SEARCHLENGTH_MAX',
60 'TARGETLENGTH_MIN',
61 'TARGETLENGTH_MAX',
62 'LDM_MINMATCH_MIN',
63 'LDM_MINMATCH_MAX',
64 'LDM_BUCKETSIZELOG_MAX',
65 'STRATEGY_FAST',
66 'STRATEGY_DFAST',
67 'STRATEGY_GREEDY',
68 'STRATEGY_LAZY',
69 'STRATEGY_LAZY2',
70 'STRATEGY_BTLAZY2',
71 'STRATEGY_BTOPT',
72 'STRATEGY_BTULTRA',
73 'STRATEGY_BTULTRA2',
74 'DICT_TYPE_AUTO',
75 'DICT_TYPE_RAWCONTENT',
76 'DICT_TYPE_FULLDICT',
77 'FORMAT_ZSTD1',
78 'FORMAT_ZSTD1_MAGICLESS',
30 "FLUSH_BLOCK",
31 "FLUSH_FRAME",
32 "COMPRESSOBJ_FLUSH_FINISH",
33 "COMPRESSOBJ_FLUSH_BLOCK",
34 "ZSTD_VERSION",
35 "FRAME_HEADER",
36 "CONTENTSIZE_UNKNOWN",
37 "CONTENTSIZE_ERROR",
38 "MAX_COMPRESSION_LEVEL",
39 "COMPRESSION_RECOMMENDED_INPUT_SIZE",
40 "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
41 "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
42 "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
43 "MAGIC_NUMBER",
44 "BLOCKSIZELOG_MAX",
45 "BLOCKSIZE_MAX",
46 "WINDOWLOG_MIN",
47 "WINDOWLOG_MAX",
48 "CHAINLOG_MIN",
49 "CHAINLOG_MAX",
50 "HASHLOG_MIN",
51 "HASHLOG_MAX",
52 "HASHLOG3_MAX",
53 "MINMATCH_MIN",
54 "MINMATCH_MAX",
55 "SEARCHLOG_MIN",
56 "SEARCHLOG_MAX",
57 "SEARCHLENGTH_MIN",
58 "SEARCHLENGTH_MAX",
59 "TARGETLENGTH_MIN",
60 "TARGETLENGTH_MAX",
61 "LDM_MINMATCH_MIN",
62 "LDM_MINMATCH_MAX",
63 "LDM_BUCKETSIZELOG_MAX",
64 "STRATEGY_FAST",
65 "STRATEGY_DFAST",
66 "STRATEGY_GREEDY",
67 "STRATEGY_LAZY",
68 "STRATEGY_LAZY2",
69 "STRATEGY_BTLAZY2",
70 "STRATEGY_BTOPT",
71 "STRATEGY_BTULTRA",
72 "STRATEGY_BTULTRA2",
73 "DICT_TYPE_AUTO",
74 "DICT_TYPE_RAWCONTENT",
75 "DICT_TYPE_FULLDICT",
76 "FORMAT_ZSTD1",
77 "FORMAT_ZSTD1_MAGICLESS",
79 78 ]
80 79
81 80 import io
@@ -105,10 +104,14 new_nonzero = ffi.new_allocator(should_c
105 104
106 105 MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
107 106 MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
108 FRAME_HEADER = b'\x28\xb5\x2f\xfd'
107 FRAME_HEADER = b"\x28\xb5\x2f\xfd"
109 108 CONTENTSIZE_UNKNOWN = lib.ZSTD_CONTENTSIZE_UNKNOWN
110 109 CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR
111 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
110 ZSTD_VERSION = (
111 lib.ZSTD_VERSION_MAJOR,
112 lib.ZSTD_VERSION_MINOR,
113 lib.ZSTD_VERSION_RELEASE,
114 )
112 115
113 116 BLOCKSIZELOG_MAX = lib.ZSTD_BLOCKSIZELOG_MAX
114 117 BLOCKSIZE_MAX = lib.ZSTD_BLOCKSIZE_MAX
@@ -165,9 +168,9 def _cpu_count():
165 168 # Linux.
166 169 try:
167 170 if sys.version_info[0] == 2:
168 return os.sysconf(b'SC_NPROCESSORS_ONLN')
171 return os.sysconf(b"SC_NPROCESSORS_ONLN")
169 172 else:
170 return os.sysconf(u'SC_NPROCESSORS_ONLN')
173 return os.sysconf("SC_NPROCESSORS_ONLN")
171 174 except (AttributeError, ValueError):
172 175 pass
173 176
@@ -183,7 +186,8 def _zstd_error(zresult):
183 186 # Resolves to bytes on Python 2 and 3. We use the string for formatting
184 187 # into error messages, which will be literal unicode. So convert it to
185 188 # unicode.
186 return ffi.string(lib.ZSTD_getErrorName(zresult)).decode('utf-8')
189 return ffi.string(lib.ZSTD_getErrorName(zresult)).decode("utf-8")
190
187 191
188 192 def _make_cctx_params(params):
189 193 res = lib.ZSTD_createCCtxParams()
@@ -221,19 +225,20 def _make_cctx_params(params):
221 225
222 226 return res
223 227
228
224 229 class ZstdCompressionParameters(object):
225 230 @staticmethod
226 231 def from_level(level, source_size=0, dict_size=0, **kwargs):
227 232 params = lib.ZSTD_getCParams(level, source_size, dict_size)
228 233
229 234 args = {
230 'window_log': 'windowLog',
231 'chain_log': 'chainLog',
232 'hash_log': 'hashLog',
233 'search_log': 'searchLog',
234 'min_match': 'minMatch',
235 'target_length': 'targetLength',
236 'compression_strategy': 'strategy',
235 "window_log": "windowLog",
236 "chain_log": "chainLog",
237 "hash_log": "hashLog",
238 "search_log": "searchLog",
239 "min_match": "minMatch",
240 "target_length": "targetLength",
241 "compression_strategy": "strategy",
237 242 }
238 243
239 244 for arg, attr in args.items():
@@ -242,14 +247,33 class ZstdCompressionParameters(object):
242 247
243 248 return ZstdCompressionParameters(**kwargs)
244 249
245 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
246 chain_log=0, search_log=0, min_match=0, target_length=0,
247 strategy=-1, compression_strategy=-1,
248 write_content_size=1, write_checksum=0,
249 write_dict_id=0, job_size=0, overlap_log=-1,
250 overlap_size_log=-1, force_max_window=0, enable_ldm=0,
251 ldm_hash_log=0, ldm_min_match=0, ldm_bucket_size_log=0,
252 ldm_hash_rate_log=-1, ldm_hash_every_log=-1, threads=0):
250 def __init__(
251 self,
252 format=0,
253 compression_level=0,
254 window_log=0,
255 hash_log=0,
256 chain_log=0,
257 search_log=0,
258 min_match=0,
259 target_length=0,
260 strategy=-1,
261 compression_strategy=-1,
262 write_content_size=1,
263 write_checksum=0,
264 write_dict_id=0,
265 job_size=0,
266 overlap_log=-1,
267 overlap_size_log=-1,
268 force_max_window=0,
269 enable_ldm=0,
270 ldm_hash_log=0,
271 ldm_min_match=0,
272 ldm_bucket_size_log=0,
273 ldm_hash_rate_log=-1,
274 ldm_hash_every_log=-1,
275 threads=0,
276 ):
253 277
254 278 params = lib.ZSTD_createCCtxParams()
255 279 if params == ffi.NULL:
@@ -267,7 +291,9 class ZstdCompressionParameters(object):
267 291 _set_compression_parameter(params, lib.ZSTD_c_nbWorkers, threads)
268 292
269 293 _set_compression_parameter(params, lib.ZSTD_c_format, format)
270 _set_compression_parameter(params, lib.ZSTD_c_compressionLevel, compression_level)
294 _set_compression_parameter(
295 params, lib.ZSTD_c_compressionLevel, compression_level
296 )
271 297 _set_compression_parameter(params, lib.ZSTD_c_windowLog, window_log)
272 298 _set_compression_parameter(params, lib.ZSTD_c_hashLog, hash_log)
273 299 _set_compression_parameter(params, lib.ZSTD_c_chainLog, chain_log)
@@ -276,7 +302,7 class ZstdCompressionParameters(object):
276 302 _set_compression_parameter(params, lib.ZSTD_c_targetLength, target_length)
277 303
278 304 if strategy != -1 and compression_strategy != -1:
279 raise ValueError('cannot specify both compression_strategy and strategy')
305 raise ValueError("cannot specify both compression_strategy and strategy")
280 306
281 307 if compression_strategy != -1:
282 308 strategy = compression_strategy
@@ -284,13 +310,15 class ZstdCompressionParameters(object):
284 310 strategy = 0
285 311
286 312 _set_compression_parameter(params, lib.ZSTD_c_strategy, strategy)
287 _set_compression_parameter(params, lib.ZSTD_c_contentSizeFlag, write_content_size)
313 _set_compression_parameter(
314 params, lib.ZSTD_c_contentSizeFlag, write_content_size
315 )
288 316 _set_compression_parameter(params, lib.ZSTD_c_checksumFlag, write_checksum)
289 317 _set_compression_parameter(params, lib.ZSTD_c_dictIDFlag, write_dict_id)
290 318 _set_compression_parameter(params, lib.ZSTD_c_jobSize, job_size)
291 319
292 320 if overlap_log != -1 and overlap_size_log != -1:
293 raise ValueError('cannot specify both overlap_log and overlap_size_log')
321 raise ValueError("cannot specify both overlap_log and overlap_size_log")
294 322
295 323 if overlap_size_log != -1:
296 324 overlap_log = overlap_size_log
@@ -299,13 +327,19 class ZstdCompressionParameters(object):
299 327
300 328 _set_compression_parameter(params, lib.ZSTD_c_overlapLog, overlap_log)
301 329 _set_compression_parameter(params, lib.ZSTD_c_forceMaxWindow, force_max_window)
302 _set_compression_parameter(params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm)
330 _set_compression_parameter(
331 params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm
332 )
303 333 _set_compression_parameter(params, lib.ZSTD_c_ldmHashLog, ldm_hash_log)
304 334 _set_compression_parameter(params, lib.ZSTD_c_ldmMinMatch, ldm_min_match)
305 _set_compression_parameter(params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log)
335 _set_compression_parameter(
336 params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log
337 )
306 338
307 339 if ldm_hash_rate_log != -1 and ldm_hash_every_log != -1:
308 raise ValueError('cannot specify both ldm_hash_rate_log and ldm_hash_every_log')
340 raise ValueError(
341 "cannot specify both ldm_hash_rate_log and ldm_hash_every_log"
342 )
309 343
310 344 if ldm_hash_every_log != -1:
311 345 ldm_hash_rate_log = ldm_hash_every_log
@@ -380,7 +414,9 class ZstdCompressionParameters(object):
380 414
381 415 @property
382 416 def enable_ldm(self):
383 return _get_compression_parameter(self._params, lib.ZSTD_c_enableLongDistanceMatching)
417 return _get_compression_parameter(
418 self._params, lib.ZSTD_c_enableLongDistanceMatching
419 )
384 420
385 421 @property
386 422 def ldm_hash_log(self):
@@ -409,8 +445,10 class ZstdCompressionParameters(object):
409 445 def estimated_compression_context_size(self):
410 446 return lib.ZSTD_estimateCCtxSize_usingCCtxParams(self._params)
411 447
448
412 449 CompressionParameters = ZstdCompressionParameters
413 450
451
414 452 def estimate_decompression_context_size():
415 453 return lib.ZSTD_estimateDCtxSize()
416 454
@@ -418,24 +456,25 def estimate_decompression_context_size(
418 456 def _set_compression_parameter(params, param, value):
419 457 zresult = lib.ZSTD_CCtxParams_setParameter(params, param, value)
420 458 if lib.ZSTD_isError(zresult):
421 raise ZstdError('unable to set compression context parameter: %s' %
422 _zstd_error(zresult))
459 raise ZstdError(
460 "unable to set compression context parameter: %s" % _zstd_error(zresult)
461 )
423 462
424 463
425 464 def _get_compression_parameter(params, param):
426 result = ffi.new('int *')
465 result = ffi.new("int *")
427 466
428 467 zresult = lib.ZSTD_CCtxParams_getParameter(params, param, result)
429 468 if lib.ZSTD_isError(zresult):
430 raise ZstdError('unable to get compression context parameter: %s' %
431 _zstd_error(zresult))
469 raise ZstdError(
470 "unable to get compression context parameter: %s" % _zstd_error(zresult)
471 )
432 472
433 473 return result[0]
434 474
435 475
436 476 class ZstdCompressionWriter(object):
437 def __init__(self, compressor, writer, source_size, write_size,
438 write_return_read):
477 def __init__(self, compressor, writer, source_size, write_size, write_return_read):
439 478 self._compressor = compressor
440 479 self._writer = writer
441 480 self._write_size = write_size
@@ -444,24 +483,22 class ZstdCompressionWriter(object):
444 483 self._closed = False
445 484 self._bytes_compressed = 0
446 485
447 self._dst_buffer = ffi.new('char[]', write_size)
448 self._out_buffer = ffi.new('ZSTD_outBuffer *')
486 self._dst_buffer = ffi.new("char[]", write_size)
487 self._out_buffer = ffi.new("ZSTD_outBuffer *")
449 488 self._out_buffer.dst = self._dst_buffer
450 489 self._out_buffer.size = len(self._dst_buffer)
451 490 self._out_buffer.pos = 0
452 491
453 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx,
454 source_size)
492 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx, source_size)
455 493 if lib.ZSTD_isError(zresult):
456 raise ZstdError('error setting source size: %s' %
457 _zstd_error(zresult))
494 raise ZstdError("error setting source size: %s" % _zstd_error(zresult))
458 495
459 496 def __enter__(self):
460 497 if self._closed:
461 raise ValueError('stream is closed')
498 raise ValueError("stream is closed")
462 499
463 500 if self._entered:
464 raise ZstdError('cannot __enter__ multiple times')
501 raise ZstdError("cannot __enter__ multiple times")
465 502
466 503 self._entered = True
467 504 return self
@@ -480,11 +517,11 class ZstdCompressionWriter(object):
480 517 return lib.ZSTD_sizeof_CCtx(self._compressor._cctx)
481 518
482 519 def fileno(self):
483 f = getattr(self._writer, 'fileno', None)
520 f = getattr(self._writer, "fileno", None)
484 521 if f:
485 522 return f()
486 523 else:
487 raise OSError('fileno not available on underlying writer')
524 raise OSError("fileno not available on underlying writer")
488 525
489 526 def close(self):
490 527 if self._closed:
@@ -496,7 +533,7 class ZstdCompressionWriter(object):
496 533 self._closed = True
497 534
498 535 # Call close() on underlying stream as well.
499 f = getattr(self._writer, 'close', None)
536 f = getattr(self._writer, "close", None)
500 537 if f:
501 538 f()
502 539
@@ -529,7 +566,7 class ZstdCompressionWriter(object):
529 566 return True
530 567
531 568 def writelines(self, lines):
532 raise NotImplementedError('writelines() is not yet implemented')
569 raise NotImplementedError("writelines() is not yet implemented")
533 570
534 571 def read(self, size=-1):
535 572 raise io.UnsupportedOperation()
@@ -542,13 +579,13 class ZstdCompressionWriter(object):
542 579
543 580 def write(self, data):
544 581 if self._closed:
545 raise ValueError('stream is closed')
582 raise ValueError("stream is closed")
546 583
547 584 total_write = 0
548 585
549 586 data_buffer = ffi.from_buffer(data)
550 587
551 in_buffer = ffi.new('ZSTD_inBuffer *')
588 in_buffer = ffi.new("ZSTD_inBuffer *")
552 589 in_buffer.src = data_buffer
553 590 in_buffer.size = len(data_buffer)
554 591 in_buffer.pos = 0
@@ -557,12 +594,11 class ZstdCompressionWriter(object):
557 594 out_buffer.pos = 0
558 595
559 596 while in_buffer.pos < in_buffer.size:
560 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
561 out_buffer, in_buffer,
562 lib.ZSTD_e_continue)
597 zresult = lib.ZSTD_compressStream2(
598 self._compressor._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue
599 )
563 600 if lib.ZSTD_isError(zresult):
564 raise ZstdError('zstd compress error: %s' %
565 _zstd_error(zresult))
601 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
566 602
567 603 if out_buffer.pos:
568 604 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
@@ -581,28 +617,27 class ZstdCompressionWriter(object):
581 617 elif flush_mode == FLUSH_FRAME:
582 618 flush = lib.ZSTD_e_end
583 619 else:
584 raise ValueError('unknown flush_mode: %r' % flush_mode)
620 raise ValueError("unknown flush_mode: %r" % flush_mode)
585 621
586 622 if self._closed:
587 raise ValueError('stream is closed')
623 raise ValueError("stream is closed")
588 624
589 625 total_write = 0
590 626
591 627 out_buffer = self._out_buffer
592 628 out_buffer.pos = 0
593 629
594 in_buffer = ffi.new('ZSTD_inBuffer *')
630 in_buffer = ffi.new("ZSTD_inBuffer *")
595 631 in_buffer.src = ffi.NULL
596 632 in_buffer.size = 0
597 633 in_buffer.pos = 0
598 634
599 635 while True:
600 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
601 out_buffer, in_buffer,
602 flush)
636 zresult = lib.ZSTD_compressStream2(
637 self._compressor._cctx, out_buffer, in_buffer, flush
638 )
603 639 if lib.ZSTD_isError(zresult):
604 raise ZstdError('zstd compress error: %s' %
605 _zstd_error(zresult))
640 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
606 641
607 642 if out_buffer.pos:
608 643 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
@@ -622,10 +657,10 class ZstdCompressionWriter(object):
622 657 class ZstdCompressionObj(object):
623 658 def compress(self, data):
624 659 if self._finished:
625 raise ZstdError('cannot call compress() after compressor finished')
660 raise ZstdError("cannot call compress() after compressor finished")
626 661
627 662 data_buffer = ffi.from_buffer(data)
628 source = ffi.new('ZSTD_inBuffer *')
663 source = ffi.new("ZSTD_inBuffer *")
629 664 source.src = data_buffer
630 665 source.size = len(data_buffer)
631 666 source.pos = 0
@@ -633,26 +668,24 class ZstdCompressionObj(object):
633 668 chunks = []
634 669
635 670 while source.pos < len(data):
636 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
637 self._out,
638 source,
639 lib.ZSTD_e_continue)
671 zresult = lib.ZSTD_compressStream2(
672 self._compressor._cctx, self._out, source, lib.ZSTD_e_continue
673 )
640 674 if lib.ZSTD_isError(zresult):
641 raise ZstdError('zstd compress error: %s' %
642 _zstd_error(zresult))
675 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
643 676
644 677 if self._out.pos:
645 678 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
646 679 self._out.pos = 0
647 680
648 return b''.join(chunks)
681 return b"".join(chunks)
649 682
650 683 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
651 684 if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
652 raise ValueError('flush mode not recognized')
685 raise ValueError("flush mode not recognized")
653 686
654 687 if self._finished:
655 raise ZstdError('compressor object already finished')
688 raise ZstdError("compressor object already finished")
656 689
657 690 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
658 691 z_flush_mode = lib.ZSTD_e_flush
@@ -660,11 +693,11 class ZstdCompressionObj(object):
660 693 z_flush_mode = lib.ZSTD_e_end
661 694 self._finished = True
662 695 else:
663 raise ZstdError('unhandled flush mode')
696 raise ZstdError("unhandled flush mode")
664 697
665 698 assert self._out.pos == 0
666 699
667 in_buffer = ffi.new('ZSTD_inBuffer *')
700 in_buffer = ffi.new("ZSTD_inBuffer *")
668 701 in_buffer.src = ffi.NULL
669 702 in_buffer.size = 0
670 703 in_buffer.pos = 0
@@ -672,13 +705,13 class ZstdCompressionObj(object):
672 705 chunks = []
673 706
674 707 while True:
675 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
676 self._out,
677 in_buffer,
678 z_flush_mode)
708 zresult = lib.ZSTD_compressStream2(
709 self._compressor._cctx, self._out, in_buffer, z_flush_mode
710 )
679 711 if lib.ZSTD_isError(zresult):
680 raise ZstdError('error ending compression stream: %s' %
681 _zstd_error(zresult))
712 raise ZstdError(
713 "error ending compression stream: %s" % _zstd_error(zresult)
714 )
682 715
683 716 if self._out.pos:
684 717 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
@@ -687,19 +720,19 class ZstdCompressionObj(object):
687 720 if not zresult:
688 721 break
689 722
690 return b''.join(chunks)
723 return b"".join(chunks)
691 724
692 725
693 726 class ZstdCompressionChunker(object):
694 727 def __init__(self, compressor, chunk_size):
695 728 self._compressor = compressor
696 self._out = ffi.new('ZSTD_outBuffer *')
697 self._dst_buffer = ffi.new('char[]', chunk_size)
729 self._out = ffi.new("ZSTD_outBuffer *")
730 self._dst_buffer = ffi.new("char[]", chunk_size)
698 731 self._out.dst = self._dst_buffer
699 732 self._out.size = chunk_size
700 733 self._out.pos = 0
701 734
702 self._in = ffi.new('ZSTD_inBuffer *')
735 self._in = ffi.new("ZSTD_inBuffer *")
703 736 self._in.src = ffi.NULL
704 737 self._in.size = 0
705 738 self._in.pos = 0
@@ -707,11 +740,13 class ZstdCompressionChunker(object):
707 740
708 741 def compress(self, data):
709 742 if self._finished:
710 raise ZstdError('cannot call compress() after compression finished')
743 raise ZstdError("cannot call compress() after compression finished")
711 744
712 745 if self._in.src != ffi.NULL:
713 raise ZstdError('cannot perform operation before consuming output '
714 'from previous operation')
746 raise ZstdError(
747 "cannot perform operation before consuming output "
748 "from previous operation"
749 )
715 750
716 751 data_buffer = ffi.from_buffer(data)
717 752
@@ -723,10 +758,9 class ZstdCompressionChunker(object):
723 758 self._in.pos = 0
724 759
725 760 while self._in.pos < self._in.size:
726 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
727 self._out,
728 self._in,
729 lib.ZSTD_e_continue)
761 zresult = lib.ZSTD_compressStream2(
762 self._compressor._cctx, self._out, self._in, lib.ZSTD_e_continue
763 )
730 764
731 765 if self._in.pos == self._in.size:
732 766 self._in.src = ffi.NULL
@@ -734,8 +768,7 class ZstdCompressionChunker(object):
734 768 self._in.pos = 0
735 769
736 770 if lib.ZSTD_isError(zresult):
737 raise ZstdError('zstd compress error: %s' %
738 _zstd_error(zresult))
771 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
739 772
740 773 if self._out.pos == self._out.size:
741 774 yield ffi.buffer(self._out.dst, self._out.pos)[:]
@@ -743,18 +776,19 class ZstdCompressionChunker(object):
743 776
744 777 def flush(self):
745 778 if self._finished:
746 raise ZstdError('cannot call flush() after compression finished')
779 raise ZstdError("cannot call flush() after compression finished")
747 780
748 781 if self._in.src != ffi.NULL:
749 raise ZstdError('cannot call flush() before consuming output from '
750 'previous operation')
782 raise ZstdError(
783 "cannot call flush() before consuming output from " "previous operation"
784 )
751 785
752 786 while True:
753 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
754 self._out, self._in,
755 lib.ZSTD_e_flush)
787 zresult = lib.ZSTD_compressStream2(
788 self._compressor._cctx, self._out, self._in, lib.ZSTD_e_flush
789 )
756 790 if lib.ZSTD_isError(zresult):
757 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
791 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
758 792
759 793 if self._out.pos:
760 794 yield ffi.buffer(self._out.dst, self._out.pos)[:]
@@ -765,18 +799,20 class ZstdCompressionChunker(object):
765 799
766 800 def finish(self):
767 801 if self._finished:
768 raise ZstdError('cannot call finish() after compression finished')
802 raise ZstdError("cannot call finish() after compression finished")
769 803
770 804 if self._in.src != ffi.NULL:
771 raise ZstdError('cannot call finish() before consuming output from '
772 'previous operation')
805 raise ZstdError(
806 "cannot call finish() before consuming output from "
807 "previous operation"
808 )
773 809
774 810 while True:
775 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
776 self._out, self._in,
777 lib.ZSTD_e_end)
811 zresult = lib.ZSTD_compressStream2(
812 self._compressor._cctx, self._out, self._in, lib.ZSTD_e_end
813 )
778 814 if lib.ZSTD_isError(zresult):
779 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
815 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
780 816
781 817 if self._out.pos:
782 818 yield ffi.buffer(self._out.dst, self._out.pos)[:]
@@ -798,13 +834,13 class ZstdCompressionReader(object):
798 834 self._finished_input = False
799 835 self._finished_output = False
800 836
801 self._in_buffer = ffi.new('ZSTD_inBuffer *')
837 self._in_buffer = ffi.new("ZSTD_inBuffer *")
802 838 # Holds a ref so backing bytes in self._in_buffer stay alive.
803 839 self._source_buffer = None
804 840
805 841 def __enter__(self):
806 842 if self._entered:
807 raise ValueError('cannot __enter__ multiple times')
843 raise ValueError("cannot __enter__ multiple times")
808 844
809 845 self._entered = True
810 846 return self
@@ -833,10 +869,10 class ZstdCompressionReader(object):
833 869 raise io.UnsupportedOperation()
834 870
835 871 def write(self, data):
836 raise OSError('stream is not writable')
872 raise OSError("stream is not writable")
837 873
838 874 def writelines(self, ignored):
839 raise OSError('stream is not writable')
875 raise OSError("stream is not writable")
840 876
841 877 def isatty(self):
842 878 return False
@@ -865,7 +901,7 class ZstdCompressionReader(object):
865 901
866 902 chunks.append(chunk)
867 903
868 return b''.join(chunks)
904 return b"".join(chunks)
869 905
870 906 def __iter__(self):
871 907 raise io.UnsupportedOperation()
@@ -879,7 +915,7 class ZstdCompressionReader(object):
879 915 if self._finished_input:
880 916 return
881 917
882 if hasattr(self._source, 'read'):
918 if hasattr(self._source, "read"):
883 919 data = self._source.read(self._read_size)
884 920
885 921 if not data:
@@ -902,9 +938,9 class ZstdCompressionReader(object):
902 938
903 939 old_pos = out_buffer.pos
904 940
905 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
906 out_buffer, self._in_buffer,
907 lib.ZSTD_e_continue)
941 zresult = lib.ZSTD_compressStream2(
942 self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_continue
943 )
908 944
909 945 self._bytes_compressed += out_buffer.pos - old_pos
910 946
@@ -914,31 +950,30 class ZstdCompressionReader(object):
914 950 self._in_buffer.size = 0
915 951 self._source_buffer = None
916 952
917 if not hasattr(self._source, 'read'):
953 if not hasattr(self._source, "read"):
918 954 self._finished_input = True
919 955
920 956 if lib.ZSTD_isError(zresult):
921 raise ZstdError('zstd compress error: %s',
922 _zstd_error(zresult))
957 raise ZstdError("zstd compress error: %s", _zstd_error(zresult))
923 958
924 959 return out_buffer.pos and out_buffer.pos == out_buffer.size
925 960
926 961 def read(self, size=-1):
927 962 if self._closed:
928 raise ValueError('stream is closed')
963 raise ValueError("stream is closed")
929 964
930 965 if size < -1:
931 raise ValueError('cannot read negative amounts less than -1')
966 raise ValueError("cannot read negative amounts less than -1")
932 967
933 968 if size == -1:
934 969 return self.readall()
935 970
936 971 if self._finished_output or size == 0:
937 return b''
972 return b""
938 973
939 974 # Need a dedicated ref to dest buffer otherwise it gets collected.
940 dst_buffer = ffi.new('char[]', size)
941 out_buffer = ffi.new('ZSTD_outBuffer *')
975 dst_buffer = ffi.new("char[]", size)
976 out_buffer = ffi.new("ZSTD_outBuffer *")
942 977 out_buffer.dst = dst_buffer
943 978 out_buffer.size = size
944 979 out_buffer.pos = 0
@@ -955,15 +990,14 class ZstdCompressionReader(object):
955 990 # EOF
956 991 old_pos = out_buffer.pos
957 992
958 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
959 out_buffer, self._in_buffer,
960 lib.ZSTD_e_end)
993 zresult = lib.ZSTD_compressStream2(
994 self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
995 )
961 996
962 997 self._bytes_compressed += out_buffer.pos - old_pos
963 998
964 999 if lib.ZSTD_isError(zresult):
965 raise ZstdError('error ending compression stream: %s',
966 _zstd_error(zresult))
1000 raise ZstdError("error ending compression stream: %s", _zstd_error(zresult))
967 1001
968 1002 if zresult == 0:
969 1003 self._finished_output = True
@@ -972,20 +1006,20 class ZstdCompressionReader(object):
972 1006
973 1007 def read1(self, size=-1):
974 1008 if self._closed:
975 raise ValueError('stream is closed')
1009 raise ValueError("stream is closed")
976 1010
977 1011 if size < -1:
978 raise ValueError('cannot read negative amounts less than -1')
1012 raise ValueError("cannot read negative amounts less than -1")
979 1013
980 1014 if self._finished_output or size == 0:
981 return b''
1015 return b""
982 1016
983 1017 # -1 returns arbitrary number of bytes.
984 1018 if size == -1:
985 1019 size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
986 1020
987 dst_buffer = ffi.new('char[]', size)
988 out_buffer = ffi.new('ZSTD_outBuffer *')
1021 dst_buffer = ffi.new("char[]", size)
1022 out_buffer = ffi.new("ZSTD_outBuffer *")
989 1023 out_buffer.dst = dst_buffer
990 1024 out_buffer.size = size
991 1025 out_buffer.pos = 0
@@ -1020,15 +1054,16 class ZstdCompressionReader(object):
1020 1054 # EOF.
1021 1055 old_pos = out_buffer.pos
1022 1056
1023 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1024 out_buffer, self._in_buffer,
1025 lib.ZSTD_e_end)
1057 zresult = lib.ZSTD_compressStream2(
1058 self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
1059 )
1026 1060
1027 1061 self._bytes_compressed += out_buffer.pos - old_pos
1028 1062
1029 1063 if lib.ZSTD_isError(zresult):
1030 raise ZstdError('error ending compression stream: %s' %
1031 _zstd_error(zresult))
1064 raise ZstdError(
1065 "error ending compression stream: %s" % _zstd_error(zresult)
1066 )
1032 1067
1033 1068 if zresult == 0:
1034 1069 self._finished_output = True
@@ -1037,15 +1072,15 class ZstdCompressionReader(object):
1037 1072
1038 1073 def readinto(self, b):
1039 1074 if self._closed:
1040 raise ValueError('stream is closed')
1075 raise ValueError("stream is closed")
1041 1076
1042 1077 if self._finished_output:
1043 1078 return 0
1044 1079
1045 1080 # TODO use writable=True once we require CFFI >= 1.12.
1046 1081 dest_buffer = ffi.from_buffer(b)
1047 ffi.memmove(b, b'', 0)
1048 out_buffer = ffi.new('ZSTD_outBuffer *')
1082 ffi.memmove(b, b"", 0)
1083 out_buffer = ffi.new("ZSTD_outBuffer *")
1049 1084 out_buffer.dst = dest_buffer
1050 1085 out_buffer.size = len(dest_buffer)
1051 1086 out_buffer.pos = 0
@@ -1060,15 +1095,14 class ZstdCompressionReader(object):
1060 1095
1061 1096 # EOF.
1062 1097 old_pos = out_buffer.pos
1063 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1064 out_buffer, self._in_buffer,
1065 lib.ZSTD_e_end)
1098 zresult = lib.ZSTD_compressStream2(
1099 self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
1100 )
1066 1101
1067 1102 self._bytes_compressed += out_buffer.pos - old_pos
1068 1103
1069 1104 if lib.ZSTD_isError(zresult):
1070 raise ZstdError('error ending compression stream: %s',
1071 _zstd_error(zresult))
1105 raise ZstdError("error ending compression stream: %s", _zstd_error(zresult))
1072 1106
1073 1107 if zresult == 0:
1074 1108 self._finished_output = True
@@ -1077,16 +1111,16 class ZstdCompressionReader(object):
1077 1111
1078 1112 def readinto1(self, b):
1079 1113 if self._closed:
1080 raise ValueError('stream is closed')
1114 raise ValueError("stream is closed")
1081 1115
1082 1116 if self._finished_output:
1083 1117 return 0
1084 1118
1085 1119 # TODO use writable=True once we require CFFI >= 1.12.
1086 1120 dest_buffer = ffi.from_buffer(b)
1087 ffi.memmove(b, b'', 0)
1088
1089 out_buffer = ffi.new('ZSTD_outBuffer *')
1121 ffi.memmove(b, b"", 0)
1122
1123 out_buffer = ffi.new("ZSTD_outBuffer *")
1090 1124 out_buffer.dst = dest_buffer
1091 1125 out_buffer.size = len(dest_buffer)
1092 1126 out_buffer.pos = 0
@@ -1107,15 +1141,16 class ZstdCompressionReader(object):
1107 1141 # EOF.
1108 1142 old_pos = out_buffer.pos
1109 1143
1110 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1111 out_buffer, self._in_buffer,
1112 lib.ZSTD_e_end)
1144 zresult = lib.ZSTD_compressStream2(
1145 self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
1146 )
1113 1147
1114 1148 self._bytes_compressed += out_buffer.pos - old_pos
1115 1149
1116 1150 if lib.ZSTD_isError(zresult):
1117 raise ZstdError('error ending compression stream: %s' %
1118 _zstd_error(zresult))
1151 raise ZstdError(
1152 "error ending compression stream: %s" % _zstd_error(zresult)
1153 )
1119 1154
1120 1155 if zresult == 0:
1121 1156 self._finished_output = True
@@ -1124,29 +1159,35 class ZstdCompressionReader(object):
1124 1159
1125 1160
1126 1161 class ZstdCompressor(object):
1127 def __init__(self, level=3, dict_data=None, compression_params=None,
1128 write_checksum=None, write_content_size=None,
1129 write_dict_id=None, threads=0):
1162 def __init__(
1163 self,
1164 level=3,
1165 dict_data=None,
1166 compression_params=None,
1167 write_checksum=None,
1168 write_content_size=None,
1169 write_dict_id=None,
1170 threads=0,
1171 ):
1130 1172 if level > lib.ZSTD_maxCLevel():
1131 raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
1173 raise ValueError("level must be less than %d" % lib.ZSTD_maxCLevel())
1132 1174
1133 1175 if threads < 0:
1134 1176 threads = _cpu_count()
1135 1177
1136 1178 if compression_params and write_checksum is not None:
1137 raise ValueError('cannot define compression_params and '
1138 'write_checksum')
1179 raise ValueError("cannot define compression_params and " "write_checksum")
1139 1180
1140 1181 if compression_params and write_content_size is not None:
1141 raise ValueError('cannot define compression_params and '
1142 'write_content_size')
1182 raise ValueError(
1183 "cannot define compression_params and " "write_content_size"
1184 )
1143 1185
1144 1186 if compression_params and write_dict_id is not None:
1145 raise ValueError('cannot define compression_params and '
1146 'write_dict_id')
1187 raise ValueError("cannot define compression_params and " "write_dict_id")
1147 1188
1148 1189 if compression_params and threads:
1149 raise ValueError('cannot define compression_params and threads')
1190 raise ValueError("cannot define compression_params and threads")
1150 1191
1151 1192 if compression_params:
1152 1193 self._params = _make_cctx_params(compression_params)
@@ -1160,27 +1201,24 class ZstdCompressor(object):
1160 1201
1161 1202 self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
1162 1203
1163 _set_compression_parameter(self._params,
1164 lib.ZSTD_c_compressionLevel,
1165 level)
1204 _set_compression_parameter(self._params, lib.ZSTD_c_compressionLevel, level)
1166 1205
1167 1206 _set_compression_parameter(
1168 1207 self._params,
1169 1208 lib.ZSTD_c_contentSizeFlag,
1170 write_content_size if write_content_size is not None else 1)
1171
1172 _set_compression_parameter(self._params,
1173 lib.ZSTD_c_checksumFlag,
1174 1 if write_checksum else 0)
1175
1176 _set_compression_parameter(self._params,
1177 lib.ZSTD_c_dictIDFlag,
1178 1 if write_dict_id else 0)
1209 write_content_size if write_content_size is not None else 1,
1210 )
1211
1212 _set_compression_parameter(
1213 self._params, lib.ZSTD_c_checksumFlag, 1 if write_checksum else 0
1214 )
1215
1216 _set_compression_parameter(
1217 self._params, lib.ZSTD_c_dictIDFlag, 1 if write_dict_id else 0
1218 )
1179 1219
1180 1220 if threads:
1181 _set_compression_parameter(self._params,
1182 lib.ZSTD_c_nbWorkers,
1183 threads)
1221 _set_compression_parameter(self._params, lib.ZSTD_c_nbWorkers, threads)
1184 1222
1185 1223 cctx = lib.ZSTD_createCCtx()
1186 1224 if cctx == ffi.NULL:
@@ -1194,15 +1232,16 class ZstdCompressor(object):
1194 1232 try:
1195 1233 self._setup_cctx()
1196 1234 finally:
1197 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx,
1198 size=lib.ZSTD_sizeof_CCtx(cctx))
1235 self._cctx = ffi.gc(
1236 cctx, lib.ZSTD_freeCCtx, size=lib.ZSTD_sizeof_CCtx(cctx)
1237 )
1199 1238
1200 1239 def _setup_cctx(self):
1201 zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx,
1202 self._params)
1240 zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx, self._params)
1203 1241 if lib.ZSTD_isError(zresult):
1204 raise ZstdError('could not set compression parameters: %s' %
1205 _zstd_error(zresult))
1242 raise ZstdError(
1243 "could not set compression parameters: %s" % _zstd_error(zresult)
1244 )
1206 1245
1207 1246 dict_data = self._dict_data
1208 1247
@@ -1211,12 +1250,17 class ZstdCompressor(object):
1211 1250 zresult = lib.ZSTD_CCtx_refCDict(self._cctx, dict_data._cdict)
1212 1251 else:
1213 1252 zresult = lib.ZSTD_CCtx_loadDictionary_advanced(
1214 self._cctx, dict_data.as_bytes(), len(dict_data),
1215 lib.ZSTD_dlm_byRef, dict_data._dict_type)
1253 self._cctx,
1254 dict_data.as_bytes(),
1255 len(dict_data),
1256 lib.ZSTD_dlm_byRef,
1257 dict_data._dict_type,
1258 )
1216 1259
1217 1260 if lib.ZSTD_isError(zresult):
1218 raise ZstdError('could not load compression dictionary: %s' %
1219 _zstd_error(zresult))
1261 raise ZstdError(
1262 "could not load compression dictionary: %s" % _zstd_error(zresult)
1263 )
1220 1264
1221 1265 def memory_size(self):
1222 1266 return lib.ZSTD_sizeof_CCtx(self._cctx)
@@ -1227,15 +1271,14 class ZstdCompressor(object):
1227 1271 data_buffer = ffi.from_buffer(data)
1228 1272
1229 1273 dest_size = lib.ZSTD_compressBound(len(data_buffer))
1230 out = new_nonzero('char[]', dest_size)
1274 out = new_nonzero("char[]", dest_size)
1231 1275
1232 1276 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, len(data_buffer))
1233 1277 if lib.ZSTD_isError(zresult):
1234 raise ZstdError('error setting source size: %s' %
1235 _zstd_error(zresult))
1236
1237 out_buffer = ffi.new('ZSTD_outBuffer *')
1238 in_buffer = ffi.new('ZSTD_inBuffer *')
1278 raise ZstdError("error setting source size: %s" % _zstd_error(zresult))
1279
1280 out_buffer = ffi.new("ZSTD_outBuffer *")
1281 in_buffer = ffi.new("ZSTD_inBuffer *")
1239 1282
1240 1283 out_buffer.dst = out
1241 1284 out_buffer.size = dest_size
@@ -1245,16 +1288,14 class ZstdCompressor(object):
1245 1288 in_buffer.size = len(data_buffer)
1246 1289 in_buffer.pos = 0
1247 1290
1248 zresult = lib.ZSTD_compressStream2(self._cctx,
1249 out_buffer,
1250 in_buffer,
1251 lib.ZSTD_e_end)
1291 zresult = lib.ZSTD_compressStream2(
1292 self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end
1293 )
1252 1294
1253 1295 if lib.ZSTD_isError(zresult):
1254 raise ZstdError('cannot compress: %s' %
1255 _zstd_error(zresult))
1296 raise ZstdError("cannot compress: %s" % _zstd_error(zresult))
1256 1297 elif zresult:
1257 raise ZstdError('unexpected partial frame flush')
1298 raise ZstdError("unexpected partial frame flush")
1258 1299
1259 1300 return ffi.buffer(out, out_buffer.pos)[:]
1260 1301
@@ -1266,12 +1307,11 class ZstdCompressor(object):
1266 1307
1267 1308 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1268 1309 if lib.ZSTD_isError(zresult):
1269 raise ZstdError('error setting source size: %s' %
1270 _zstd_error(zresult))
1310 raise ZstdError("error setting source size: %s" % _zstd_error(zresult))
1271 1311
1272 1312 cobj = ZstdCompressionObj()
1273 cobj._out = ffi.new('ZSTD_outBuffer *')
1274 cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1313 cobj._out = ffi.new("ZSTD_outBuffer *")
1314 cobj._dst_buffer = ffi.new("char[]", COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1275 1315 cobj._out.dst = cobj._dst_buffer
1276 1316 cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1277 1317 cobj._out.pos = 0
@@ -1288,19 +1328,23 class ZstdCompressor(object):
1288 1328
1289 1329 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1290 1330 if lib.ZSTD_isError(zresult):
1291 raise ZstdError('error setting source size: %s' %
1292 _zstd_error(zresult))
1331 raise ZstdError("error setting source size: %s" % _zstd_error(zresult))
1293 1332
1294 1333 return ZstdCompressionChunker(self, chunk_size=chunk_size)
1295 1334
1296 def copy_stream(self, ifh, ofh, size=-1,
1297 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1298 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1299
1300 if not hasattr(ifh, 'read'):
1301 raise ValueError('first argument must have a read() method')
1302 if not hasattr(ofh, 'write'):
1303 raise ValueError('second argument must have a write() method')
1335 def copy_stream(
1336 self,
1337 ifh,
1338 ofh,
1339 size=-1,
1340 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1341 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1342 ):
1343
1344 if not hasattr(ifh, "read"):
1345 raise ValueError("first argument must have a read() method")
1346 if not hasattr(ofh, "write"):
1347 raise ValueError("second argument must have a write() method")
1304 1348
1305 1349 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1306 1350
@@ -1309,13 +1353,12 class ZstdCompressor(object):
1309 1353
1310 1354 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1311 1355 if lib.ZSTD_isError(zresult):
1312 raise ZstdError('error setting source size: %s' %
1313 _zstd_error(zresult))
1314
1315 in_buffer = ffi.new('ZSTD_inBuffer *')
1316 out_buffer = ffi.new('ZSTD_outBuffer *')
1317
1318 dst_buffer = ffi.new('char[]', write_size)
1356 raise ZstdError("error setting source size: %s" % _zstd_error(zresult))
1357
1358 in_buffer = ffi.new("ZSTD_inBuffer *")
1359 out_buffer = ffi.new("ZSTD_outBuffer *")
1360
1361 dst_buffer = ffi.new("char[]", write_size)
1319 1362 out_buffer.dst = dst_buffer
1320 1363 out_buffer.size = write_size
1321 1364 out_buffer.pos = 0
@@ -1334,13 +1377,11 class ZstdCompressor(object):
1334 1377 in_buffer.pos = 0
1335 1378
1336 1379 while in_buffer.pos < in_buffer.size:
1337 zresult = lib.ZSTD_compressStream2(self._cctx,
1338 out_buffer,
1339 in_buffer,
1340 lib.ZSTD_e_continue)
1380 zresult = lib.ZSTD_compressStream2(
1381 self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue
1382 )
1341 1383 if lib.ZSTD_isError(zresult):
1342 raise ZstdError('zstd compress error: %s' %
1343 _zstd_error(zresult))
1384 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
1344 1385
1345 1386 if out_buffer.pos:
1346 1387 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
@@ -1349,13 +1390,13 class ZstdCompressor(object):
1349 1390
1350 1391 # We've finished reading. Flush the compressor.
1351 1392 while True:
1352 zresult = lib.ZSTD_compressStream2(self._cctx,
1353 out_buffer,
1354 in_buffer,
1355 lib.ZSTD_e_end)
1393 zresult = lib.ZSTD_compressStream2(
1394 self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end
1395 )
1356 1396 if lib.ZSTD_isError(zresult):
1357 raise ZstdError('error ending compression stream: %s' %
1358 _zstd_error(zresult))
1397 raise ZstdError(
1398 "error ending compression stream: %s" % _zstd_error(zresult)
1399 )
1359 1400
1360 1401 if out_buffer.pos:
1361 1402 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
@@ -1367,8 +1408,9 class ZstdCompressor(object):
1367 1408
1368 1409 return total_read, total_write
1369 1410
1370 def stream_reader(self, source, size=-1,
1371 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
1411 def stream_reader(
1412 self, source, size=-1, read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE
1413 ):
1372 1414 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1373 1415
1374 1416 try:
@@ -1381,40 +1423,48 class ZstdCompressor(object):
1381 1423
1382 1424 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1383 1425 if lib.ZSTD_isError(zresult):
1384 raise ZstdError('error setting source size: %s' %
1385 _zstd_error(zresult))
1426 raise ZstdError("error setting source size: %s" % _zstd_error(zresult))
1386 1427
1387 1428 return ZstdCompressionReader(self, source, read_size)
1388 1429
1389 def stream_writer(self, writer, size=-1,
1390 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1391 write_return_read=False):
1392
1393 if not hasattr(writer, 'write'):
1394 raise ValueError('must pass an object with a write() method')
1430 def stream_writer(
1431 self,
1432 writer,
1433 size=-1,
1434 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1435 write_return_read=False,
1436 ):
1437
1438 if not hasattr(writer, "write"):
1439 raise ValueError("must pass an object with a write() method")
1395 1440
1396 1441 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1397 1442
1398 1443 if size < 0:
1399 1444 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1400 1445
1401 return ZstdCompressionWriter(self, writer, size, write_size,
1402 write_return_read)
1446 return ZstdCompressionWriter(self, writer, size, write_size, write_return_read)
1403 1447
1404 1448 write_to = stream_writer
1405 1449
1406 def read_to_iter(self, reader, size=-1,
1407 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1408 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1409 if hasattr(reader, 'read'):
1450 def read_to_iter(
1451 self,
1452 reader,
1453 size=-1,
1454 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1455 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1456 ):
1457 if hasattr(reader, "read"):
1410 1458 have_read = True
1411 elif hasattr(reader, '__getitem__'):
1459 elif hasattr(reader, "__getitem__"):
1412 1460 have_read = False
1413 1461 buffer_offset = 0
1414 1462 size = len(reader)
1415 1463 else:
1416 raise ValueError('must pass an object with a read() method or '
1417 'conforms to buffer protocol')
1464 raise ValueError(
1465 "must pass an object with a read() method or "
1466 "conforms to buffer protocol"
1467 )
1418 1468
1419 1469 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1420 1470
@@ -1423,17 +1473,16 class ZstdCompressor(object):
1423 1473
1424 1474 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1425 1475 if lib.ZSTD_isError(zresult):
1426 raise ZstdError('error setting source size: %s' %
1427 _zstd_error(zresult))
1428
1429 in_buffer = ffi.new('ZSTD_inBuffer *')
1430 out_buffer = ffi.new('ZSTD_outBuffer *')
1476 raise ZstdError("error setting source size: %s" % _zstd_error(zresult))
1477
1478 in_buffer = ffi.new("ZSTD_inBuffer *")
1479 out_buffer = ffi.new("ZSTD_outBuffer *")
1431 1480
1432 1481 in_buffer.src = ffi.NULL
1433 1482 in_buffer.size = 0
1434 1483 in_buffer.pos = 0
1435 1484
1436 dst_buffer = ffi.new('char[]', write_size)
1485 dst_buffer = ffi.new("char[]", write_size)
1437 1486 out_buffer.dst = dst_buffer
1438 1487 out_buffer.size = write_size
1439 1488 out_buffer.pos = 0
@@ -1449,7 +1498,7 class ZstdCompressor(object):
1449 1498 else:
1450 1499 remaining = len(reader) - buffer_offset
1451 1500 slice_size = min(remaining, read_size)
1452 read_result = reader[buffer_offset:buffer_offset + slice_size]
1501 read_result = reader[buffer_offset : buffer_offset + slice_size]
1453 1502 buffer_offset += slice_size
1454 1503
1455 1504 # No new input data. Break out of the read loop.
@@ -1464,11 +1513,11 class ZstdCompressor(object):
1464 1513 in_buffer.pos = 0
1465 1514
1466 1515 while in_buffer.pos < in_buffer.size:
1467 zresult = lib.ZSTD_compressStream2(self._cctx, out_buffer, in_buffer,
1468 lib.ZSTD_e_continue)
1516 zresult = lib.ZSTD_compressStream2(
1517 self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue
1518 )
1469 1519 if lib.ZSTD_isError(zresult):
1470 raise ZstdError('zstd compress error: %s' %
1471 _zstd_error(zresult))
1520 raise ZstdError("zstd compress error: %s" % _zstd_error(zresult))
1472 1521
1473 1522 if out_buffer.pos:
1474 1523 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
@@ -1484,13 +1533,13 class ZstdCompressor(object):
1484 1533 # remains.
1485 1534 while True:
1486 1535 assert out_buffer.pos == 0
1487 zresult = lib.ZSTD_compressStream2(self._cctx,
1488 out_buffer,
1489 in_buffer,
1490 lib.ZSTD_e_end)
1536 zresult = lib.ZSTD_compressStream2(
1537 self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end
1538 )
1491 1539 if lib.ZSTD_isError(zresult):
1492 raise ZstdError('error ending compression stream: %s' %
1493 _zstd_error(zresult))
1540 raise ZstdError(
1541 "error ending compression stream: %s" % _zstd_error(zresult)
1542 )
1494 1543
1495 1544 if out_buffer.pos:
1496 1545 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
@@ -1522,7 +1571,7 def frame_content_size(data):
1522 1571 size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer))
1523 1572
1524 1573 if size == lib.ZSTD_CONTENTSIZE_ERROR:
1525 raise ZstdError('error when determining content size')
1574 raise ZstdError("error when determining content size")
1526 1575 elif size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
1527 1576 return -1
1528 1577 else:
@@ -1534,24 +1583,23 def frame_header_size(data):
1534 1583
1535 1584 zresult = lib.ZSTD_frameHeaderSize(data_buffer, len(data_buffer))
1536 1585 if lib.ZSTD_isError(zresult):
1537 raise ZstdError('could not determine frame header size: %s' %
1538 _zstd_error(zresult))
1586 raise ZstdError(
1587 "could not determine frame header size: %s" % _zstd_error(zresult)
1588 )
1539 1589
1540 1590 return zresult
1541 1591
1542 1592
1543 1593 def get_frame_parameters(data):
1544 params = ffi.new('ZSTD_frameHeader *')
1594 params = ffi.new("ZSTD_frameHeader *")
1545 1595
1546 1596 data_buffer = ffi.from_buffer(data)
1547 1597 zresult = lib.ZSTD_getFrameHeader(params, data_buffer, len(data_buffer))
1548 1598 if lib.ZSTD_isError(zresult):
1549 raise ZstdError('cannot get frame parameters: %s' %
1550 _zstd_error(zresult))
1599 raise ZstdError("cannot get frame parameters: %s" % _zstd_error(zresult))
1551 1600
1552 1601 if zresult:
1553 raise ZstdError('not enough data for frame parameters; need %d bytes' %
1554 zresult)
1602 raise ZstdError("not enough data for frame parameters; need %d bytes" % zresult)
1555 1603
1556 1604 return FrameParameters(params[0])
1557 1605
@@ -1563,10 +1611,10 class ZstdCompressionDict(object):
1563 1611 self.k = k
1564 1612 self.d = d
1565 1613
1566 if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT,
1567 DICT_TYPE_FULLDICT):
1568 raise ValueError('invalid dictionary load mode: %d; must use '
1569 'DICT_TYPE_* constants')
1614 if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT, DICT_TYPE_FULLDICT):
1615 raise ValueError(
1616 "invalid dictionary load mode: %d; must use " "DICT_TYPE_* constants"
1617 )
1570 1618
1571 1619 self._dict_type = dict_type
1572 1620 self._cdict = None
@@ -1582,16 +1630,15 class ZstdCompressionDict(object):
1582 1630
1583 1631 def precompute_compress(self, level=0, compression_params=None):
1584 1632 if level and compression_params:
1585 raise ValueError('must only specify one of level or '
1586 'compression_params')
1633 raise ValueError("must only specify one of level or " "compression_params")
1587 1634
1588 1635 if not level and not compression_params:
1589 raise ValueError('must specify one of level or compression_params')
1636 raise ValueError("must specify one of level or compression_params")
1590 1637
1591 1638 if level:
1592 1639 cparams = lib.ZSTD_getCParams(level, 0, len(self._data))
1593 1640 else:
1594 cparams = ffi.new('ZSTD_compressionParameters')
1641 cparams = ffi.new("ZSTD_compressionParameters")
1595 1642 cparams.chainLog = compression_params.chain_log
1596 1643 cparams.hashLog = compression_params.hash_log
1597 1644 cparams.minMatch = compression_params.min_match
@@ -1600,59 +1647,75 class ZstdCompressionDict(object):
1600 1647 cparams.targetLength = compression_params.target_length
1601 1648 cparams.windowLog = compression_params.window_log
1602 1649
1603 cdict = lib.ZSTD_createCDict_advanced(self._data, len(self._data),
1604 lib.ZSTD_dlm_byRef,
1605 self._dict_type,
1606 cparams,
1607 lib.ZSTD_defaultCMem)
1650 cdict = lib.ZSTD_createCDict_advanced(
1651 self._data,
1652 len(self._data),
1653 lib.ZSTD_dlm_byRef,
1654 self._dict_type,
1655 cparams,
1656 lib.ZSTD_defaultCMem,
1657 )
1608 1658 if cdict == ffi.NULL:
1609 raise ZstdError('unable to precompute dictionary')
1610
1611 self._cdict = ffi.gc(cdict, lib.ZSTD_freeCDict,
1612 size=lib.ZSTD_sizeof_CDict(cdict))
1659 raise ZstdError("unable to precompute dictionary")
1660
1661 self._cdict = ffi.gc(
1662 cdict, lib.ZSTD_freeCDict, size=lib.ZSTD_sizeof_CDict(cdict)
1663 )
1613 1664
1614 1665 @property
1615 1666 def _ddict(self):
1616 ddict = lib.ZSTD_createDDict_advanced(self._data, len(self._data),
1617 lib.ZSTD_dlm_byRef,
1618 self._dict_type,
1619 lib.ZSTD_defaultCMem)
1667 ddict = lib.ZSTD_createDDict_advanced(
1668 self._data,
1669 len(self._data),
1670 lib.ZSTD_dlm_byRef,
1671 self._dict_type,
1672 lib.ZSTD_defaultCMem,
1673 )
1620 1674
1621 1675 if ddict == ffi.NULL:
1622 raise ZstdError('could not create decompression dict')
1623
1624 ddict = ffi.gc(ddict, lib.ZSTD_freeDDict,
1625 size=lib.ZSTD_sizeof_DDict(ddict))
1626 self.__dict__['_ddict'] = ddict
1676 raise ZstdError("could not create decompression dict")
1677
1678 ddict = ffi.gc(ddict, lib.ZSTD_freeDDict, size=lib.ZSTD_sizeof_DDict(ddict))
1679 self.__dict__["_ddict"] = ddict
1627 1680
1628 1681 return ddict
1629 1682
1630 def train_dictionary(dict_size, samples, k=0, d=0, notifications=0, dict_id=0,
1631 level=0, steps=0, threads=0):
1683
1684 def train_dictionary(
1685 dict_size,
1686 samples,
1687 k=0,
1688 d=0,
1689 notifications=0,
1690 dict_id=0,
1691 level=0,
1692 steps=0,
1693 threads=0,
1694 ):
1632 1695 if not isinstance(samples, list):
1633 raise TypeError('samples must be a list')
1696 raise TypeError("samples must be a list")
1634 1697
1635 1698 if threads < 0:
1636 1699 threads = _cpu_count()
1637 1700
1638 1701 total_size = sum(map(len, samples))
1639 1702
1640 samples_buffer = new_nonzero('char[]', total_size)
1641 sample_sizes = new_nonzero('size_t[]', len(samples))
1703 samples_buffer = new_nonzero("char[]", total_size)
1704 sample_sizes = new_nonzero("size_t[]", len(samples))
1642 1705
1643 1706 offset = 0
1644 1707 for i, sample in enumerate(samples):
1645 1708 if not isinstance(sample, bytes_type):
1646 raise ValueError('samples must be bytes')
1709 raise ValueError("samples must be bytes")
1647 1710
1648 1711 l = len(sample)
1649 1712 ffi.memmove(samples_buffer + offset, sample, l)
1650 1713 offset += l
1651 1714 sample_sizes[i] = l
1652 1715
1653 dict_data = new_nonzero('char[]', dict_size)
1654
1655 dparams = ffi.new('ZDICT_cover_params_t *')[0]
1716 dict_data = new_nonzero("char[]", dict_size)
1717
1718 dparams = ffi.new("ZDICT_cover_params_t *")[0]
1656 1719 dparams.k = k
1657 1720 dparams.d = d
1658 1721 dparams.steps = steps
@@ -1661,34 +1724,51 def train_dictionary(dict_size, samples,
1661 1724 dparams.zParams.dictID = dict_id
1662 1725 dparams.zParams.compressionLevel = level
1663 1726
1664 if (not dparams.k and not dparams.d and not dparams.steps
1665 and not dparams.nbThreads and not dparams.zParams.notificationLevel
1727 if (
1728 not dparams.k
1729 and not dparams.d
1730 and not dparams.steps
1731 and not dparams.nbThreads
1732 and not dparams.zParams.notificationLevel
1666 1733 and not dparams.zParams.dictID
1667 and not dparams.zParams.compressionLevel):
1734 and not dparams.zParams.compressionLevel
1735 ):
1668 1736 zresult = lib.ZDICT_trainFromBuffer(
1669 ffi.addressof(dict_data), dict_size,
1737 ffi.addressof(dict_data),
1738 dict_size,
1670 1739 ffi.addressof(samples_buffer),
1671 ffi.addressof(sample_sizes, 0), len(samples))
1740 ffi.addressof(sample_sizes, 0),
1741 len(samples),
1742 )
1672 1743 elif dparams.steps or dparams.nbThreads:
1673 1744 zresult = lib.ZDICT_optimizeTrainFromBuffer_cover(
1674 ffi.addressof(dict_data), dict_size,
1745 ffi.addressof(dict_data),
1746 dict_size,
1675 1747 ffi.addressof(samples_buffer),
1676 ffi.addressof(sample_sizes, 0), len(samples),
1677 ffi.addressof(dparams))
1748 ffi.addressof(sample_sizes, 0),
1749 len(samples),
1750 ffi.addressof(dparams),
1751 )
1678 1752 else:
1679 1753 zresult = lib.ZDICT_trainFromBuffer_cover(
1680 ffi.addressof(dict_data), dict_size,
1754 ffi.addressof(dict_data),
1755 dict_size,
1681 1756 ffi.addressof(samples_buffer),
1682 ffi.addressof(sample_sizes, 0), len(samples),
1683 dparams)
1757 ffi.addressof(sample_sizes, 0),
1758 len(samples),
1759 dparams,
1760 )
1684 1761
1685 1762 if lib.ZDICT_isError(zresult):
1686 msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode('utf-8')
1687 raise ZstdError('cannot train dict: %s' % msg)
1688
1689 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:],
1690 dict_type=DICT_TYPE_FULLDICT,
1691 k=dparams.k, d=dparams.d)
1763 msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode("utf-8")
1764 raise ZstdError("cannot train dict: %s" % msg)
1765
1766 return ZstdCompressionDict(
1767 ffi.buffer(dict_data, zresult)[:],
1768 dict_type=DICT_TYPE_FULLDICT,
1769 k=dparams.k,
1770 d=dparams.d,
1771 )
1692 1772
1693 1773
1694 1774 class ZstdDecompressionObj(object):
@@ -1699,21 +1779,21 class ZstdDecompressionObj(object):
1699 1779
1700 1780 def decompress(self, data):
1701 1781 if self._finished:
1702 raise ZstdError('cannot use a decompressobj multiple times')
1703
1704 in_buffer = ffi.new('ZSTD_inBuffer *')
1705 out_buffer = ffi.new('ZSTD_outBuffer *')
1782 raise ZstdError("cannot use a decompressobj multiple times")
1783
1784 in_buffer = ffi.new("ZSTD_inBuffer *")
1785 out_buffer = ffi.new("ZSTD_outBuffer *")
1706 1786
1707 1787 data_buffer = ffi.from_buffer(data)
1708 1788
1709 1789 if len(data_buffer) == 0:
1710 return b''
1790 return b""
1711 1791
1712 1792 in_buffer.src = data_buffer
1713 1793 in_buffer.size = len(data_buffer)
1714 1794 in_buffer.pos = 0
1715 1795
1716 dst_buffer = ffi.new('char[]', self._write_size)
1796 dst_buffer = ffi.new("char[]", self._write_size)
1717 1797 out_buffer.dst = dst_buffer
1718 1798 out_buffer.size = len(dst_buffer)
1719 1799 out_buffer.pos = 0
@@ -1721,11 +1801,11 class ZstdDecompressionObj(object):
1721 1801 chunks = []
1722 1802
1723 1803 while True:
1724 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1725 out_buffer, in_buffer)
1804 zresult = lib.ZSTD_decompressStream(
1805 self._decompressor._dctx, out_buffer, in_buffer
1806 )
1726 1807 if lib.ZSTD_isError(zresult):
1727 raise ZstdError('zstd decompressor error: %s' %
1728 _zstd_error(zresult))
1808 raise ZstdError("zstd decompressor error: %s" % _zstd_error(zresult))
1729 1809
1730 1810 if zresult == 0:
1731 1811 self._finished = True
@@ -1734,13 +1814,14 class ZstdDecompressionObj(object):
1734 1814 if out_buffer.pos:
1735 1815 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
1736 1816
1737 if (zresult == 0 or
1738 (in_buffer.pos == in_buffer.size and out_buffer.pos == 0)):
1817 if zresult == 0 or (
1818 in_buffer.pos == in_buffer.size and out_buffer.pos == 0
1819 ):
1739 1820 break
1740 1821
1741 1822 out_buffer.pos = 0
1742 1823
1743 return b''.join(chunks)
1824 return b"".join(chunks)
1744 1825
1745 1826 def flush(self, length=0):
1746 1827 pass
@@ -1757,13 +1838,13 class ZstdDecompressionReader(object):
1757 1838 self._bytes_decompressed = 0
1758 1839 self._finished_input = False
1759 1840 self._finished_output = False
1760 self._in_buffer = ffi.new('ZSTD_inBuffer *')
1841 self._in_buffer = ffi.new("ZSTD_inBuffer *")
1761 1842 # Holds a ref to self._in_buffer.src.
1762 1843 self._source_buffer = None
1763 1844
1764 1845 def __enter__(self):
1765 1846 if self._entered:
1766 raise ValueError('cannot __enter__ multiple times')
1847 raise ValueError("cannot __enter__ multiple times")
1767 1848
1768 1849 self._entered = True
1769 1850 return self
@@ -1824,7 +1905,7 class ZstdDecompressionReader(object):
1824 1905
1825 1906 chunks.append(chunk)
1826 1907
1827 return b''.join(chunks)
1908 return b"".join(chunks)
1828 1909
1829 1910 def __iter__(self):
1830 1911 raise io.UnsupportedOperation()
@@ -1844,7 +1925,7 class ZstdDecompressionReader(object):
1844 1925 return
1845 1926
1846 1927 # Else populate the input buffer from our source.
1847 if hasattr(self._source, 'read'):
1928 if hasattr(self._source, "read"):
1848 1929 data = self._source.read(self._read_size)
1849 1930
1850 1931 if not data:
@@ -1866,8 +1947,9 class ZstdDecompressionReader(object):
1866 1947
1867 1948 Returns True if data in output buffer should be emitted.
1868 1949 """
1869 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1870 out_buffer, self._in_buffer)
1950 zresult = lib.ZSTD_decompressStream(
1951 self._decompressor._dctx, out_buffer, self._in_buffer
1952 )
1871 1953
1872 1954 if self._in_buffer.pos == self._in_buffer.size:
1873 1955 self._in_buffer.src = ffi.NULL
@@ -1875,38 +1957,39 class ZstdDecompressionReader(object):
1875 1957 self._in_buffer.size = 0
1876 1958 self._source_buffer = None
1877 1959
1878 if not hasattr(self._source, 'read'):
1960 if not hasattr(self._source, "read"):
1879 1961 self._finished_input = True
1880 1962
1881 1963 if lib.ZSTD_isError(zresult):
1882 raise ZstdError('zstd decompress error: %s' %
1883 _zstd_error(zresult))
1964 raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult))
1884 1965
1885 1966 # Emit data if there is data AND either:
1886 1967 # a) output buffer is full (read amount is satisfied)
1887 1968 # b) we're at end of a frame and not in frame spanning mode
1888 return (out_buffer.pos and
1889 (out_buffer.pos == out_buffer.size or
1890 zresult == 0 and not self._read_across_frames))
1969 return out_buffer.pos and (
1970 out_buffer.pos == out_buffer.size
1971 or zresult == 0
1972 and not self._read_across_frames
1973 )
1891 1974
1892 1975 def read(self, size=-1):
1893 1976 if self._closed:
1894 raise ValueError('stream is closed')
1977 raise ValueError("stream is closed")
1895 1978
1896 1979 if size < -1:
1897 raise ValueError('cannot read negative amounts less than -1')
1980 raise ValueError("cannot read negative amounts less than -1")
1898 1981
1899 1982 if size == -1:
1900 1983 # This is recursive. But it gets the job done.
1901 1984 return self.readall()
1902 1985
1903 1986 if self._finished_output or size == 0:
1904 return b''
1987 return b""
1905 1988
1906 1989 # We /could/ call into readinto() here. But that introduces more
1907 1990 # overhead.
1908 dst_buffer = ffi.new('char[]', size)
1909 out_buffer = ffi.new('ZSTD_outBuffer *')
1991 dst_buffer = ffi.new("char[]", size)
1992 out_buffer = ffi.new("ZSTD_outBuffer *")
1910 1993 out_buffer.dst = dst_buffer
1911 1994 out_buffer.size = size
1912 1995 out_buffer.pos = 0
@@ -1927,15 +2010,15 class ZstdDecompressionReader(object):
1927 2010
1928 2011 def readinto(self, b):
1929 2012 if self._closed:
1930 raise ValueError('stream is closed')
2013 raise ValueError("stream is closed")
1931 2014
1932 2015 if self._finished_output:
1933 2016 return 0
1934 2017
1935 2018 # TODO use writable=True once we require CFFI >= 1.12.
1936 2019 dest_buffer = ffi.from_buffer(b)
1937 ffi.memmove(b, b'', 0)
1938 out_buffer = ffi.new('ZSTD_outBuffer *')
2020 ffi.memmove(b, b"", 0)
2021 out_buffer = ffi.new("ZSTD_outBuffer *")
1939 2022 out_buffer.dst = dest_buffer
1940 2023 out_buffer.size = len(dest_buffer)
1941 2024 out_buffer.pos = 0
@@ -1956,20 +2039,20 class ZstdDecompressionReader(object):
1956 2039
1957 2040 def read1(self, size=-1):
1958 2041 if self._closed:
1959 raise ValueError('stream is closed')
2042 raise ValueError("stream is closed")
1960 2043
1961 2044 if size < -1:
1962 raise ValueError('cannot read negative amounts less than -1')
2045 raise ValueError("cannot read negative amounts less than -1")
1963 2046
1964 2047 if self._finished_output or size == 0:
1965 return b''
2048 return b""
1966 2049
1967 2050 # -1 returns arbitrary number of bytes.
1968 2051 if size == -1:
1969 2052 size = DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1970 2053
1971 dst_buffer = ffi.new('char[]', size)
1972 out_buffer = ffi.new('ZSTD_outBuffer *')
2054 dst_buffer = ffi.new("char[]", size)
2055 out_buffer = ffi.new("ZSTD_outBuffer *")
1973 2056 out_buffer.dst = dst_buffer
1974 2057 out_buffer.size = size
1975 2058 out_buffer.pos = 0
@@ -1990,16 +2073,16 class ZstdDecompressionReader(object):
1990 2073
1991 2074 def readinto1(self, b):
1992 2075 if self._closed:
1993 raise ValueError('stream is closed')
2076 raise ValueError("stream is closed")
1994 2077
1995 2078 if self._finished_output:
1996 2079 return 0
1997 2080
1998 2081 # TODO use writable=True once we require CFFI >= 1.12.
1999 2082 dest_buffer = ffi.from_buffer(b)
2000 ffi.memmove(b, b'', 0)
2001
2002 out_buffer = ffi.new('ZSTD_outBuffer *')
2083 ffi.memmove(b, b"", 0)
2084
2085 out_buffer = ffi.new("ZSTD_outBuffer *")
2003 2086 out_buffer.dst = dest_buffer
2004 2087 out_buffer.size = len(dest_buffer)
2005 2088 out_buffer.pos = 0
@@ -2016,33 +2099,31 class ZstdDecompressionReader(object):
2016 2099
2017 2100 def seek(self, pos, whence=os.SEEK_SET):
2018 2101 if self._closed:
2019 raise ValueError('stream is closed')
2102 raise ValueError("stream is closed")
2020 2103
2021 2104 read_amount = 0
2022 2105
2023 2106 if whence == os.SEEK_SET:
2024 2107 if pos < 0:
2025 raise ValueError('cannot seek to negative position with SEEK_SET')
2108 raise ValueError("cannot seek to negative position with SEEK_SET")
2026 2109
2027 2110 if pos < self._bytes_decompressed:
2028 raise ValueError('cannot seek zstd decompression stream '
2029 'backwards')
2111 raise ValueError("cannot seek zstd decompression stream " "backwards")
2030 2112
2031 2113 read_amount = pos - self._bytes_decompressed
2032 2114
2033 2115 elif whence == os.SEEK_CUR:
2034 2116 if pos < 0:
2035 raise ValueError('cannot seek zstd decompression stream '
2036 'backwards')
2117 raise ValueError("cannot seek zstd decompression stream " "backwards")
2037 2118
2038 2119 read_amount = pos
2039 2120 elif whence == os.SEEK_END:
2040 raise ValueError('zstd decompression streams cannot be seeked '
2041 'with SEEK_END')
2121 raise ValueError(
2122 "zstd decompression streams cannot be seeked " "with SEEK_END"
2123 )
2042 2124
2043 2125 while read_amount:
2044 result = self.read(min(read_amount,
2045 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE))
2126 result = self.read(min(read_amount, DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE))
2046 2127
2047 2128 if not result:
2048 2129 break
@@ -2051,6 +2132,7 class ZstdDecompressionReader(object):
2051 2132
2052 2133 return self._bytes_decompressed
2053 2134
2135
2054 2136 class ZstdDecompressionWriter(object):
2055 2137 def __init__(self, decompressor, writer, write_size, write_return_read):
2056 2138 decompressor._ensure_dctx()
@@ -2064,10 +2146,10 class ZstdDecompressionWriter(object):
2064 2146
2065 2147 def __enter__(self):
2066 2148 if self._closed:
2067 raise ValueError('stream is closed')
2149 raise ValueError("stream is closed")
2068 2150
2069 2151 if self._entered:
2070 raise ZstdError('cannot __enter__ multiple times')
2152 raise ZstdError("cannot __enter__ multiple times")
2071 2153
2072 2154 self._entered = True
2073 2155
@@ -2089,7 +2171,7 class ZstdDecompressionWriter(object):
2089 2171 finally:
2090 2172 self._closed = True
2091 2173
2092 f = getattr(self._writer, 'close', None)
2174 f = getattr(self._writer, "close", None)
2093 2175 if f:
2094 2176 f()
2095 2177
@@ -2098,17 +2180,17 class ZstdDecompressionWriter(object):
2098 2180 return self._closed
2099 2181
2100 2182 def fileno(self):
2101 f = getattr(self._writer, 'fileno', None)
2183 f = getattr(self._writer, "fileno", None)
2102 2184 if f:
2103 2185 return f()
2104 2186 else:
2105 raise OSError('fileno not available on underlying writer')
2187 raise OSError("fileno not available on underlying writer")
2106 2188
2107 2189 def flush(self):
2108 2190 if self._closed:
2109 raise ValueError('stream is closed')
2110
2111 f = getattr(self._writer, 'flush', None)
2191 raise ValueError("stream is closed")
2192
2193 f = getattr(self._writer, "flush", None)
2112 2194 if f:
2113 2195 return f()
2114 2196
@@ -2153,19 +2235,19 class ZstdDecompressionWriter(object):
2153 2235
2154 2236 def write(self, data):
2155 2237 if self._closed:
2156 raise ValueError('stream is closed')
2238 raise ValueError("stream is closed")
2157 2239
2158 2240 total_write = 0
2159 2241
2160 in_buffer = ffi.new('ZSTD_inBuffer *')
2161 out_buffer = ffi.new('ZSTD_outBuffer *')
2242 in_buffer = ffi.new("ZSTD_inBuffer *")
2243 out_buffer = ffi.new("ZSTD_outBuffer *")
2162 2244
2163 2245 data_buffer = ffi.from_buffer(data)
2164 2246 in_buffer.src = data_buffer
2165 2247 in_buffer.size = len(data_buffer)
2166 2248 in_buffer.pos = 0
2167 2249
2168 dst_buffer = ffi.new('char[]', self._write_size)
2250 dst_buffer = ffi.new("char[]", self._write_size)
2169 2251 out_buffer.dst = dst_buffer
2170 2252 out_buffer.size = len(dst_buffer)
2171 2253 out_buffer.pos = 0
@@ -2175,8 +2257,7 class ZstdDecompressionWriter(object):
2175 2257 while in_buffer.pos < in_buffer.size:
2176 2258 zresult = lib.ZSTD_decompressStream(dctx, out_buffer, in_buffer)
2177 2259 if lib.ZSTD_isError(zresult):
2178 raise ZstdError('zstd decompress error: %s' %
2179 _zstd_error(zresult))
2260 raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult))
2180 2261
2181 2262 if out_buffer.pos:
2182 2263 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
@@ -2206,8 +2287,9 class ZstdDecompressor(object):
2206 2287 try:
2207 2288 self._ensure_dctx()
2208 2289 finally:
2209 self._dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx,
2210 size=lib.ZSTD_sizeof_DCtx(dctx))
2290 self._dctx = ffi.gc(
2291 dctx, lib.ZSTD_freeDCtx, size=lib.ZSTD_sizeof_DCtx(dctx)
2292 )
2211 2293
2212 2294 def memory_size(self):
2213 2295 return lib.ZSTD_sizeof_DCtx(self._dctx)
@@ -2220,85 +2302,96 class ZstdDecompressor(object):
2220 2302 output_size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer))
2221 2303
2222 2304 if output_size == lib.ZSTD_CONTENTSIZE_ERROR:
2223 raise ZstdError('error determining content size from frame header')
2305 raise ZstdError("error determining content size from frame header")
2224 2306 elif output_size == 0:
2225 return b''
2307 return b""
2226 2308 elif output_size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2227 2309 if not max_output_size:
2228 raise ZstdError('could not determine content size in frame header')
2229
2230 result_buffer = ffi.new('char[]', max_output_size)
2310 raise ZstdError("could not determine content size in frame header")
2311
2312 result_buffer = ffi.new("char[]", max_output_size)
2231 2313 result_size = max_output_size
2232 2314 output_size = 0
2233 2315 else:
2234 result_buffer = ffi.new('char[]', output_size)
2316 result_buffer = ffi.new("char[]", output_size)
2235 2317 result_size = output_size
2236 2318
2237 out_buffer = ffi.new('ZSTD_outBuffer *')
2319 out_buffer = ffi.new("ZSTD_outBuffer *")
2238 2320 out_buffer.dst = result_buffer
2239 2321 out_buffer.size = result_size
2240 2322 out_buffer.pos = 0
2241 2323
2242 in_buffer = ffi.new('ZSTD_inBuffer *')
2324 in_buffer = ffi.new("ZSTD_inBuffer *")
2243 2325 in_buffer.src = data_buffer
2244 2326 in_buffer.size = len(data_buffer)
2245 2327 in_buffer.pos = 0
2246 2328
2247 2329 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2248 2330 if lib.ZSTD_isError(zresult):
2249 raise ZstdError('decompression error: %s' %
2250 _zstd_error(zresult))
2331 raise ZstdError("decompression error: %s" % _zstd_error(zresult))
2251 2332 elif zresult:
2252 raise ZstdError('decompression error: did not decompress full frame')
2333 raise ZstdError("decompression error: did not decompress full frame")
2253 2334 elif output_size and out_buffer.pos != output_size:
2254 raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
2255 (zresult, output_size))
2335 raise ZstdError(
2336 "decompression error: decompressed %d bytes; expected %d"
2337 % (zresult, output_size)
2338 )
2256 2339
2257 2340 return ffi.buffer(result_buffer, out_buffer.pos)[:]
2258 2341
2259 def stream_reader(self, source, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2260 read_across_frames=False):
2342 def stream_reader(
2343 self,
2344 source,
2345 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2346 read_across_frames=False,
2347 ):
2261 2348 self._ensure_dctx()
2262 2349 return ZstdDecompressionReader(self, source, read_size, read_across_frames)
2263 2350
2264 2351 def decompressobj(self, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2265 2352 if write_size < 1:
2266 raise ValueError('write_size must be positive')
2353 raise ValueError("write_size must be positive")
2267 2354
2268 2355 self._ensure_dctx()
2269 2356 return ZstdDecompressionObj(self, write_size=write_size)
2270 2357
2271 def read_to_iter(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2272 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2273 skip_bytes=0):
2358 def read_to_iter(
2359 self,
2360 reader,
2361 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2362 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2363 skip_bytes=0,
2364 ):
2274 2365 if skip_bytes >= read_size:
2275 raise ValueError('skip_bytes must be smaller than read_size')
2276
2277 if hasattr(reader, 'read'):
2366 raise ValueError("skip_bytes must be smaller than read_size")
2367
2368 if hasattr(reader, "read"):
2278 2369 have_read = True
2279 elif hasattr(reader, '__getitem__'):
2370 elif hasattr(reader, "__getitem__"):
2280 2371 have_read = False
2281 2372 buffer_offset = 0
2282 2373 size = len(reader)
2283 2374 else:
2284 raise ValueError('must pass an object with a read() method or '
2285 'conforms to buffer protocol')
2375 raise ValueError(
2376 "must pass an object with a read() method or "
2377 "conforms to buffer protocol"
2378 )
2286 2379
2287 2380 if skip_bytes:
2288 2381 if have_read:
2289 2382 reader.read(skip_bytes)
2290 2383 else:
2291 2384 if skip_bytes > size:
2292 raise ValueError('skip_bytes larger than first input chunk')
2385 raise ValueError("skip_bytes larger than first input chunk")
2293 2386
2294 2387 buffer_offset = skip_bytes
2295 2388
2296 2389 self._ensure_dctx()
2297 2390
2298 in_buffer = ffi.new('ZSTD_inBuffer *')
2299 out_buffer = ffi.new('ZSTD_outBuffer *')
2300
2301 dst_buffer = ffi.new('char[]', write_size)
2391 in_buffer = ffi.new("ZSTD_inBuffer *")
2392 out_buffer = ffi.new("ZSTD_outBuffer *")
2393
2394 dst_buffer = ffi.new("char[]", write_size)
2302 2395 out_buffer.dst = dst_buffer
2303 2396 out_buffer.size = len(dst_buffer)
2304 2397 out_buffer.pos = 0
@@ -2311,7 +2404,7 class ZstdDecompressor(object):
2311 2404 else:
2312 2405 remaining = size - buffer_offset
2313 2406 slice_size = min(remaining, read_size)
2314 read_result = reader[buffer_offset:buffer_offset + slice_size]
2407 read_result = reader[buffer_offset : buffer_offset + slice_size]
2315 2408 buffer_offset += slice_size
2316 2409
2317 2410 # No new input. Break out of read loop.
@@ -2330,8 +2423,7 class ZstdDecompressor(object):
2330 2423
2331 2424 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2332 2425 if lib.ZSTD_isError(zresult):
2333 raise ZstdError('zstd decompress error: %s' %
2334 _zstd_error(zresult))
2426 raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult))
2335 2427
2336 2428 if out_buffer.pos:
2337 2429 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
@@ -2348,30 +2440,37 class ZstdDecompressor(object):
2348 2440
2349 2441 read_from = read_to_iter
2350 2442
2351 def stream_writer(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2352 write_return_read=False):
2353 if not hasattr(writer, 'write'):
2354 raise ValueError('must pass an object with a write() method')
2355
2356 return ZstdDecompressionWriter(self, writer, write_size,
2357 write_return_read)
2443 def stream_writer(
2444 self,
2445 writer,
2446 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2447 write_return_read=False,
2448 ):
2449 if not hasattr(writer, "write"):
2450 raise ValueError("must pass an object with a write() method")
2451
2452 return ZstdDecompressionWriter(self, writer, write_size, write_return_read)
2358 2453
2359 2454 write_to = stream_writer
2360 2455
2361 def copy_stream(self, ifh, ofh,
2362 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2363 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2364 if not hasattr(ifh, 'read'):
2365 raise ValueError('first argument must have a read() method')
2366 if not hasattr(ofh, 'write'):
2367 raise ValueError('second argument must have a write() method')
2456 def copy_stream(
2457 self,
2458 ifh,
2459 ofh,
2460 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2461 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2462 ):
2463 if not hasattr(ifh, "read"):
2464 raise ValueError("first argument must have a read() method")
2465 if not hasattr(ofh, "write"):
2466 raise ValueError("second argument must have a write() method")
2368 2467
2369 2468 self._ensure_dctx()
2370 2469
2371 in_buffer = ffi.new('ZSTD_inBuffer *')
2372 out_buffer = ffi.new('ZSTD_outBuffer *')
2373
2374 dst_buffer = ffi.new('char[]', write_size)
2470 in_buffer = ffi.new("ZSTD_inBuffer *")
2471 out_buffer = ffi.new("ZSTD_outBuffer *")
2472
2473 dst_buffer = ffi.new("char[]", write_size)
2375 2474 out_buffer.dst = dst_buffer
2376 2475 out_buffer.size = write_size
2377 2476 out_buffer.pos = 0
@@ -2394,8 +2493,9 class ZstdDecompressor(object):
2394 2493 while in_buffer.pos < in_buffer.size:
2395 2494 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2396 2495 if lib.ZSTD_isError(zresult):
2397 raise ZstdError('zstd decompressor error: %s' %
2398 _zstd_error(zresult))
2496 raise ZstdError(
2497 "zstd decompressor error: %s" % _zstd_error(zresult)
2498 )
2399 2499
2400 2500 if out_buffer.pos:
2401 2501 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
@@ -2408,48 +2508,47 class ZstdDecompressor(object):
2408 2508
2409 2509 def decompress_content_dict_chain(self, frames):
2410 2510 if not isinstance(frames, list):
2411 raise TypeError('argument must be a list')
2511 raise TypeError("argument must be a list")
2412 2512
2413 2513 if not frames:
2414 raise ValueError('empty input chain')
2514 raise ValueError("empty input chain")
2415 2515
2416 2516 # First chunk should not be using a dictionary. We handle it specially.
2417 2517 chunk = frames[0]
2418 2518 if not isinstance(chunk, bytes_type):
2419 raise ValueError('chunk 0 must be bytes')
2519 raise ValueError("chunk 0 must be bytes")
2420 2520
2421 2521 # All chunks should be zstd frames and should have content size set.
2422 2522 chunk_buffer = ffi.from_buffer(chunk)
2423 params = ffi.new('ZSTD_frameHeader *')
2523 params = ffi.new("ZSTD_frameHeader *")
2424 2524 zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer))
2425 2525 if lib.ZSTD_isError(zresult):
2426 raise ValueError('chunk 0 is not a valid zstd frame')
2526 raise ValueError("chunk 0 is not a valid zstd frame")
2427 2527 elif zresult:
2428 raise ValueError('chunk 0 is too small to contain a zstd frame')
2528 raise ValueError("chunk 0 is too small to contain a zstd frame")
2429 2529
2430 2530 if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2431 raise ValueError('chunk 0 missing content size in frame')
2531 raise ValueError("chunk 0 missing content size in frame")
2432 2532
2433 2533 self._ensure_dctx(load_dict=False)
2434 2534
2435 last_buffer = ffi.new('char[]', params.frameContentSize)
2436
2437 out_buffer = ffi.new('ZSTD_outBuffer *')
2535 last_buffer = ffi.new("char[]", params.frameContentSize)
2536
2537 out_buffer = ffi.new("ZSTD_outBuffer *")
2438 2538 out_buffer.dst = last_buffer
2439 2539 out_buffer.size = len(last_buffer)
2440 2540 out_buffer.pos = 0
2441 2541
2442 in_buffer = ffi.new('ZSTD_inBuffer *')
2542 in_buffer = ffi.new("ZSTD_inBuffer *")
2443 2543 in_buffer.src = chunk_buffer
2444 2544 in_buffer.size = len(chunk_buffer)
2445 2545 in_buffer.pos = 0
2446 2546
2447 2547 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2448 2548 if lib.ZSTD_isError(zresult):
2449 raise ZstdError('could not decompress chunk 0: %s' %
2450 _zstd_error(zresult))
2549 raise ZstdError("could not decompress chunk 0: %s" % _zstd_error(zresult))
2451 2550 elif zresult:
2452 raise ZstdError('chunk 0 did not decompress full frame')
2551 raise ZstdError("chunk 0 did not decompress full frame")
2453 2552
2454 2553 # Special case of chain length of 1
2455 2554 if len(frames) == 1:
@@ -2459,19 +2558,19 class ZstdDecompressor(object):
2459 2558 while i < len(frames):
2460 2559 chunk = frames[i]
2461 2560 if not isinstance(chunk, bytes_type):
2462 raise ValueError('chunk %d must be bytes' % i)
2561 raise ValueError("chunk %d must be bytes" % i)
2463 2562
2464 2563 chunk_buffer = ffi.from_buffer(chunk)
2465 2564 zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer))
2466 2565 if lib.ZSTD_isError(zresult):
2467 raise ValueError('chunk %d is not a valid zstd frame' % i)
2566 raise ValueError("chunk %d is not a valid zstd frame" % i)
2468 2567 elif zresult:
2469 raise ValueError('chunk %d is too small to contain a zstd frame' % i)
2568 raise ValueError("chunk %d is too small to contain a zstd frame" % i)
2470 2569
2471 2570 if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2472 raise ValueError('chunk %d missing content size in frame' % i)
2473
2474 dest_buffer = ffi.new('char[]', params.frameContentSize)
2571 raise ValueError("chunk %d missing content size in frame" % i)
2572
2573 dest_buffer = ffi.new("char[]", params.frameContentSize)
2475 2574
2476 2575 out_buffer.dst = dest_buffer
2477 2576 out_buffer.size = len(dest_buffer)
@@ -2483,10 +2582,11 class ZstdDecompressor(object):
2483 2582
2484 2583 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2485 2584 if lib.ZSTD_isError(zresult):
2486 raise ZstdError('could not decompress chunk %d: %s' %
2487 _zstd_error(zresult))
2585 raise ZstdError(
2586 "could not decompress chunk %d: %s" % _zstd_error(zresult)
2587 )
2488 2588 elif zresult:
2489 raise ZstdError('chunk %d did not decompress full frame' % i)
2589 raise ZstdError("chunk %d did not decompress full frame" % i)
2490 2590
2491 2591 last_buffer = dest_buffer
2492 2592 i += 1
@@ -2497,19 +2597,19 class ZstdDecompressor(object):
2497 2597 lib.ZSTD_DCtx_reset(self._dctx, lib.ZSTD_reset_session_only)
2498 2598
2499 2599 if self._max_window_size:
2500 zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx,
2501 self._max_window_size)
2600 zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx, self._max_window_size)
2502 2601 if lib.ZSTD_isError(zresult):
2503 raise ZstdError('unable to set max window size: %s' %
2504 _zstd_error(zresult))
2602 raise ZstdError(
2603 "unable to set max window size: %s" % _zstd_error(zresult)
2604 )
2505 2605
2506 2606 zresult = lib.ZSTD_DCtx_setFormat(self._dctx, self._format)
2507 2607 if lib.ZSTD_isError(zresult):
2508 raise ZstdError('unable to set decoding format: %s' %
2509 _zstd_error(zresult))
2608 raise ZstdError("unable to set decoding format: %s" % _zstd_error(zresult))
2510 2609
2511 2610 if self._dict_data and load_dict:
2512 2611 zresult = lib.ZSTD_DCtx_refDDict(self._dctx, self._dict_data._ddict)
2513 2612 if lib.ZSTD_isError(zresult):
2514 raise ZstdError('unable to reference prepared dictionary: %s' %
2515 _zstd_error(zresult))
2613 raise ZstdError(
2614 "unable to reference prepared dictionary: %s" % _zstd_error(zresult)
2615 )
@@ -210,7 +210,7 void zstd_module_init(PyObject* m) {
210 210 We detect this mismatch here and refuse to load the module if this
211 211 scenario is detected.
212 212 */
213 if (ZSTD_VERSION_NUMBER != 10403 || ZSTD_versionNumber() != 10403) {
213 if (ZSTD_VERSION_NUMBER != 10404 || ZSTD_versionNumber() != 10404) {
214 214 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
215 215 return;
216 216 }
@@ -164,7 +164,7 MEM_STATIC unsigned BIT_highbit32 (U32 v
164 164 _BitScanReverse ( &r, val );
165 165 return (unsigned) r;
166 166 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
167 return 31 - __builtin_clz (val);
167 return __builtin_clz (val) ^ 31;
168 168 # elif defined(__ICCARM__) /* IAR Intrinsic */
169 169 return 31 - __CLZ(val);
170 170 # else /* Software version */
@@ -244,9 +244,9 MEM_STATIC void BIT_flushBitsFast(BIT_CS
244 244 {
245 245 size_t const nbBytes = bitC->bitPos >> 3;
246 246 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
247 assert(bitC->ptr <= bitC->endPtr);
247 248 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
248 249 bitC->ptr += nbBytes;
249 assert(bitC->ptr <= bitC->endPtr);
250 250 bitC->bitPos &= 7;
251 251 bitC->bitContainer >>= nbBytes*8;
252 252 }
@@ -260,6 +260,7 MEM_STATIC void BIT_flushBits(BIT_CStrea
260 260 {
261 261 size_t const nbBytes = bitC->bitPos >> 3;
262 262 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
263 assert(bitC->ptr <= bitC->endPtr);
263 264 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
264 265 bitC->ptr += nbBytes;
265 266 if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -61,6 +61,13
61 61 # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
62 62 #endif
63 63
64 /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
65 #if defined(__GNUC__)
66 # define UNUSED_ATTR __attribute__((unused))
67 #else
68 # define UNUSED_ATTR
69 #endif
70
64 71 /* force no inlining */
65 72 #ifdef _MSC_VER
66 73 # define FORCE_NOINLINE static __declspec(noinline)
@@ -127,9 +134,14
127 134 } \
128 135 }
129 136
130 /* vectorization */
137 /* vectorization
138 * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
131 139 #if !defined(__clang__) && defined(__GNUC__)
132 # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
140 # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
141 # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
142 # else
143 # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
144 # endif
133 145 #else
134 146 # define DONT_VECTORIZE
135 147 #endif
@@ -308,7 +308,7 If there is an error, the function will
308 308 *******************************************/
309 309 /* FSE buffer bounds */
310 310 #define FSE_NCOUNTBOUND 512
311 #define FSE_BLOCKBOUND(size) (size + (size>>7))
311 #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
312 312 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
313 313
314 314 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
@@ -52,7 +52,9
52 52 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
53 53
54 54 /* check and forward error code */
55 #ifndef CHECK_F
55 56 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
57 #endif
56 58
57 59
58 60 /* **************************************************************
@@ -47,6 +47,79 extern "C" {
47 47 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
48 48 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
49 49
50 /* detects whether we are being compiled under msan */
51 #if defined (__has_feature)
52 # if __has_feature(memory_sanitizer)
53 # define MEMORY_SANITIZER 1
54 # endif
55 #endif
56
57 #if defined (MEMORY_SANITIZER)
58 /* Not all platforms that support msan provide sanitizers/msan_interface.h.
59 * We therefore declare the functions we need ourselves, rather than trying to
60 * include the header file... */
61
62 #include <stdint.h> /* intptr_t */
63
64 /* Make memory region fully initialized (without changing its contents). */
65 void __msan_unpoison(const volatile void *a, size_t size);
66
67 /* Make memory region fully uninitialized (without changing its contents).
68 This is a legacy interface that does not update origin information. Use
69 __msan_allocated_memory() instead. */
70 void __msan_poison(const volatile void *a, size_t size);
71
72 /* Returns the offset of the first (at least partially) poisoned byte in the
73 memory range, or -1 if the whole range is good. */
74 intptr_t __msan_test_shadow(const volatile void *x, size_t size);
75 #endif
76
77 /* detects whether we are being compiled under asan */
78 #if defined (__has_feature)
79 # if __has_feature(address_sanitizer)
80 # define ADDRESS_SANITIZER 1
81 # endif
82 #elif defined(__SANITIZE_ADDRESS__)
83 # define ADDRESS_SANITIZER 1
84 #endif
85
86 #if defined (ADDRESS_SANITIZER)
87 /* Not all platforms that support asan provide sanitizers/asan_interface.h.
88 * We therefore declare the functions we need ourselves, rather than trying to
89 * include the header file... */
90
91 /**
92 * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
93 *
94 * This memory must be previously allocated by your program. Instrumented
95 * code is forbidden from accessing addresses in this region until it is
96 * unpoisoned. This function is not guaranteed to poison the entire region -
97 * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
98 * alignment restrictions.
99 *
100 * \note This function is not thread-safe because no two threads can poison or
101 * unpoison memory in the same memory region simultaneously.
102 *
103 * \param addr Start of memory region.
104 * \param size Size of memory region. */
105 void __asan_poison_memory_region(void const volatile *addr, size_t size);
106
107 /**
108 * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
109 *
110 * This memory must be previously allocated by your program. Accessing
111 * addresses in this region is allowed until this region is poisoned again.
112 * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
113 * to ASan alignment restrictions.
114 *
115 * \note This function is not thread-safe because no two threads can
116 * poison or unpoison memory in the same memory region simultaneously.
117 *
118 * \param addr Start of memory region.
119 * \param size Size of memory region. */
120 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
121 #endif
122
50 123
51 124 /*-**************************************************************
52 125 * Basic Types
@@ -127,9 +127,13 POOL_ctx* POOL_create_advanced(size_t nu
127 127 ctx->queueTail = 0;
128 128 ctx->numThreadsBusy = 0;
129 129 ctx->queueEmpty = 1;
130 (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
131 (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
132 (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
130 {
131 int error = 0;
132 error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
133 error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
134 error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
135 if (error) { POOL_free(ctx); return NULL; }
136 }
133 137 ctx->shutdown = 0;
134 138 /* Allocate space for the thread handles */
135 139 ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
@@ -14,6 +14,8
14 14 * This file will hold wrapper for systems, which do not support pthreads
15 15 */
16 16
17 #include "threading.h"
18
17 19 /* create fake symbol to avoid empty translation unit warning */
18 20 int g_ZSTD_threading_useless_symbol;
19 21
@@ -28,7 +30,6 int g_ZSTD_threading_useless_symbol;
28 30 /* === Dependencies === */
29 31 #include <process.h>
30 32 #include <errno.h>
31 #include "threading.h"
32 33
33 34
34 35 /* === Implementation === */
@@ -73,3 +74,47 int ZSTD_pthread_join(ZSTD_pthread_t thr
73 74 }
74 75
75 76 #endif /* ZSTD_MULTITHREAD */
77
78 #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
79
80 #include <stdlib.h>
81
82 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
83 {
84 *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
85 if (!*mutex)
86 return 1;
87 return pthread_mutex_init(*mutex, attr);
88 }
89
90 int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
91 {
92 if (!*mutex)
93 return 0;
94 {
95 int const ret = pthread_mutex_destroy(*mutex);
96 free(*mutex);
97 return ret;
98 }
99 }
100
101 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
102 {
103 *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
104 if (!*cond)
105 return 1;
106 return pthread_cond_init(*cond, attr);
107 }
108
109 int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
110 {
111 if (!*cond)
112 return 0;
113 {
114 int const ret = pthread_cond_destroy(*cond);
115 free(*cond);
116 return ret;
117 }
118 }
119
120 #endif
@@ -13,6 +13,8
13 13 #ifndef THREADING_H_938743
14 14 #define THREADING_H_938743
15 15
16 #include "debug.h"
17
16 18 #if defined (__cplusplus)
17 19 extern "C" {
18 20 #endif
@@ -75,10 +77,12 int ZSTD_pthread_join(ZSTD_pthread_t thr
75 77 */
76 78
77 79
78 #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
80 #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
79 81 /* === POSIX Systems === */
80 82 # include <pthread.h>
81 83
84 #if DEBUGLEVEL < 1
85
82 86 #define ZSTD_pthread_mutex_t pthread_mutex_t
83 87 #define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
84 88 #define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
@@ -96,6 +100,33 int ZSTD_pthread_join(ZSTD_pthread_t thr
96 100 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
97 101 #define ZSTD_pthread_join(a, b) pthread_join((a),(b))
98 102
103 #else /* DEBUGLEVEL >= 1 */
104
105 /* Debug implementation of threading.
106 * In this implementation we use pointers for mutexes and condition variables.
107 * This way, if we forget to init/destroy them the program will crash or ASAN
108 * will report leaks.
109 */
110
111 #define ZSTD_pthread_mutex_t pthread_mutex_t*
112 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
113 int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
114 #define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
115 #define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
116
117 #define ZSTD_pthread_cond_t pthread_cond_t*
118 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
119 int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
120 #define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
121 #define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
122 #define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
123
124 #define ZSTD_pthread_t pthread_t
125 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
126 #define ZSTD_pthread_join(a, b) pthread_join((a),(b))
127
128 #endif
129
99 130 #else /* ZSTD_MULTITHREAD not defined */
100 131 /* No multithreading support */
101 132
@@ -197,79 +197,56 static void ZSTD_copy8(void* dst, const
197 197 static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
198 198 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
199 199
200 #define WILDCOPY_OVERLENGTH 8
201 #define VECLEN 16
200 #define WILDCOPY_OVERLENGTH 32
201 #define WILDCOPY_VECLEN 16
202 202
203 203 typedef enum {
204 204 ZSTD_no_overlap,
205 ZSTD_overlap_src_before_dst,
205 ZSTD_overlap_src_before_dst
206 206 /* ZSTD_overlap_dst_before_src, */
207 207 } ZSTD_overlap_e;
208 208
209 209 /*! ZSTD_wildcopy() :
210 * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
210 * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
211 * @param ovtype controls the overlap detection
212 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
213 * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
214 * The src buffer must be before the dst buffer.
215 */
211 216 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
212 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
217 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
213 218 {
214 219 ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
215 220 const BYTE* ip = (const BYTE*)src;
216 221 BYTE* op = (BYTE*)dst;
217 222 BYTE* const oend = op + length;
218 223
219 assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
220 if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
221 do
222 COPY8(op, ip)
223 while (op < oend);
224 }
225 else {
226 if ((length & 8) == 0)
227 COPY8(op, ip);
228 do {
229 COPY16(op, ip);
230 }
231 while (op < oend);
232 }
233 }
234
235 /*! ZSTD_wildcopy_16min() :
236 * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
237 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
238 void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
239 {
240 ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
241 const BYTE* ip = (const BYTE*)src;
242 BYTE* op = (BYTE*)dst;
243 BYTE* const oend = op + length;
224 assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
244 225
245 assert(length >= 8);
246 assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
247
248 if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
249 do
250 COPY8(op, ip)
251 while (op < oend);
252 }
253 else {
254 if ((length & 8) == 0)
255 COPY8(op, ip);
256 do {
226 if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
227 /* Handle short offset copies. */
228 do {
229 COPY8(op, ip)
230 } while (op < oend);
231 } else {
232 assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
233 /* Separate out the first two COPY16() calls because the copy length is
234 * almost certain to be short, so the branches have different
235 * probabilities.
236 * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
237 * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
238 */
257 239 COPY16(op, ip);
258 }
259 while (op < oend);
240 COPY16(op, ip);
241 if (op >= oend) return;
242 do {
243 COPY16(op, ip);
244 COPY16(op, ip);
245 }
246 while (op < oend);
260 247 }
261 248 }
262 249
263 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
264 {
265 const BYTE* ip = (const BYTE*)src;
266 BYTE* op = (BYTE*)dst;
267 BYTE* const oend = (BYTE*)dstEnd;
268 do
269 COPY8(op, ip)
270 while (op < oend);
271 }
272
273 250
274 251 /*-*******************************************
275 252 * Private declarations
@@ -323,7 +300,7 MEM_STATIC U32 ZSTD_highbit32(U32 val)
323 300 _BitScanReverse(&r, val);
324 301 return (unsigned)r;
325 302 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
326 return 31 - __builtin_clz(val);
303 return __builtin_clz (val) ^ 31;
327 304 # elif defined(__ICCARM__) /* IAR Intrinsic */
328 305 return 31 - __CLZ(val);
329 306 # else /* Software version */
This diff has been collapsed as it changes many lines, (1099 lines changed) Show them Hide them
@@ -42,15 +42,15 size_t ZSTD_compressBound(size_t srcSize
42 42 * Context memory management
43 43 ***************************************/
44 44 struct ZSTD_CDict_s {
45 void* dictBuffer;
46 45 const void* dictContent;
47 46 size_t dictContentSize;
48 void* workspace;
49 size_t workspaceSize;
47 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
48 ZSTD_cwksp workspace;
50 49 ZSTD_matchState_t matchState;
51 50 ZSTD_compressedBlockState_t cBlockState;
52 51 ZSTD_customMem customMem;
53 52 U32 dictID;
53 int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
54 54 }; /* typedef'd to ZSTD_CDict within "zstd.h" */
55 55
56 56 ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -84,23 +84,26 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD
84 84
85 85 ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
86 86 {
87 ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace;
87 ZSTD_cwksp ws;
88 ZSTD_CCtx* cctx;
88 89 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
89 90 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
90 memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */
91 ZSTD_cwksp_init(&ws, workspace, workspaceSize);
92
93 cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
94 if (cctx == NULL) {
95 return NULL;
96 }
97 memset(cctx, 0, sizeof(ZSTD_CCtx));
98 ZSTD_cwksp_move(&cctx->workspace, &ws);
91 99 cctx->staticSize = workspaceSize;
92 cctx->workSpace = (void*)(cctx+1);
93 cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
94 100
95 101 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
96 if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL;
97 assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
98 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace;
99 cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1;
100 {
101 void* const ptr = cctx->blockState.nextCBlock + 1;
102 cctx->entropyWorkspace = (U32*)ptr;
103 }
102 if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
103 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
104 cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
105 cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(
106 &cctx->workspace, HUF_WORKSPACE_SIZE);
104 107 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
105 108 return cctx;
106 109 }
@@ -128,11 +131,11 static void ZSTD_freeCCtxContent(ZSTD_CC
128 131 {
129 132 assert(cctx != NULL);
130 133 assert(cctx->staticSize == 0);
131 ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
132 134 ZSTD_clearAllDicts(cctx);
133 135 #ifdef ZSTD_MULTITHREAD
134 136 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
135 137 #endif
138 ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
136 139 }
137 140
138 141 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
@@ -140,8 +143,13 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
140 143 if (cctx==NULL) return 0; /* support free on NULL */
141 144 RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
142 145 "not compatible with static CCtx");
143 ZSTD_freeCCtxContent(cctx);
144 ZSTD_free(cctx, cctx->customMem);
146 {
147 int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
148 ZSTD_freeCCtxContent(cctx);
149 if (!cctxInWorkspace) {
150 ZSTD_free(cctx, cctx->customMem);
151 }
152 }
145 153 return 0;
146 154 }
147 155
@@ -160,7 +168,9 static size_t ZSTD_sizeof_mtctx(const ZS
160 168 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
161 169 {
162 170 if (cctx==NULL) return 0; /* support sizeof on NULL */
163 return sizeof(*cctx) + cctx->workSpaceSize
171 /* cctx may be in the workspace */
172 return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
173 + ZSTD_cwksp_sizeof(&cctx->workspace)
164 174 + ZSTD_sizeof_localDict(cctx->localDict)
165 175 + ZSTD_sizeof_mtctx(cctx);
166 176 }
@@ -229,23 +239,23 size_t ZSTD_CCtxParams_init_advanced(ZST
229 239 RETURN_ERROR_IF(!cctxParams, GENERIC);
230 240 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
231 241 memset(cctxParams, 0, sizeof(*cctxParams));
242 assert(!ZSTD_checkCParams(params.cParams));
232 243 cctxParams->cParams = params.cParams;
233 244 cctxParams->fParams = params.fParams;
234 245 cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
235 assert(!ZSTD_checkCParams(params.cParams));
236 246 return 0;
237 247 }
238 248
239 249 /* ZSTD_assignParamsToCCtxParams() :
240 250 * params is presumed valid at this stage */
241 251 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
242 ZSTD_CCtx_params cctxParams, ZSTD_parameters params)
252 const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
243 253 {
244 ZSTD_CCtx_params ret = cctxParams;
254 ZSTD_CCtx_params ret = *cctxParams;
255 assert(!ZSTD_checkCParams(params.cParams));
245 256 ret.cParams = params.cParams;
246 257 ret.fParams = params.fParams;
247 258 ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
248 assert(!ZSTD_checkCParams(params.cParams));
249 259 return ret;
250 260 }
251 261
@@ -378,7 +388,7 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c
378 388 case ZSTD_c_forceAttachDict:
379 389 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
380 390 bounds.lowerBound = ZSTD_dictDefaultAttach;
381 bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */
391 bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
382 392 return bounds;
383 393
384 394 case ZSTD_c_literalCompressionMode:
@@ -392,6 +402,11 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c
392 402 bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
393 403 return bounds;
394 404
405 case ZSTD_c_srcSizeHint:
406 bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
407 bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
408 return bounds;
409
395 410 default:
396 411 { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
397 412 return boundError;
@@ -448,6 +463,7 static int ZSTD_isUpdateAuthorized(ZSTD_
448 463 case ZSTD_c_forceAttachDict:
449 464 case ZSTD_c_literalCompressionMode:
450 465 case ZSTD_c_targetCBlockSize:
466 case ZSTD_c_srcSizeHint:
451 467 default:
452 468 return 0;
453 469 }
@@ -494,6 +510,7 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*
494 510 case ZSTD_c_ldmMinMatch:
495 511 case ZSTD_c_ldmBucketSizeLog:
496 512 case ZSTD_c_targetCBlockSize:
513 case ZSTD_c_srcSizeHint:
497 514 break;
498 515
499 516 default: RETURN_ERROR(parameter_unsupported);
@@ -517,33 +534,33 size_t ZSTD_CCtxParams_setParameter(ZSTD
517 534 if (value) { /* 0 : does not change current level */
518 535 CCtxParams->compressionLevel = value;
519 536 }
520 if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
537 if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
521 538 return 0; /* return type (size_t) cannot represent negative values */
522 539 }
523 540
524 541 case ZSTD_c_windowLog :
525 542 if (value!=0) /* 0 => use default */
526 543 BOUNDCHECK(ZSTD_c_windowLog, value);
527 CCtxParams->cParams.windowLog = value;
544 CCtxParams->cParams.windowLog = (U32)value;
528 545 return CCtxParams->cParams.windowLog;
529 546
530 547 case ZSTD_c_hashLog :
531 548 if (value!=0) /* 0 => use default */
532 549 BOUNDCHECK(ZSTD_c_hashLog, value);
533 CCtxParams->cParams.hashLog = value;
550 CCtxParams->cParams.hashLog = (U32)value;
534 551 return CCtxParams->cParams.hashLog;
535 552
536 553 case ZSTD_c_chainLog :
537 554 if (value!=0) /* 0 => use default */
538 555 BOUNDCHECK(ZSTD_c_chainLog, value);
539 CCtxParams->cParams.chainLog = value;
556 CCtxParams->cParams.chainLog = (U32)value;
540 557 return CCtxParams->cParams.chainLog;
541 558
542 559 case ZSTD_c_searchLog :
543 560 if (value!=0) /* 0 => use default */
544 561 BOUNDCHECK(ZSTD_c_searchLog, value);
545 CCtxParams->cParams.searchLog = value;
546 return value;
562 CCtxParams->cParams.searchLog = (U32)value;
563 return (size_t)value;
547 564
548 565 case ZSTD_c_minMatch :
549 566 if (value!=0) /* 0 => use default */
@@ -674,6 +691,12 size_t ZSTD_CCtxParams_setParameter(ZSTD
674 691 CCtxParams->targetCBlockSize = value;
675 692 return CCtxParams->targetCBlockSize;
676 693
694 case ZSTD_c_srcSizeHint :
695 if (value!=0) /* 0 ==> default */
696 BOUNDCHECK(ZSTD_c_srcSizeHint, value);
697 CCtxParams->srcSizeHint = value;
698 return CCtxParams->srcSizeHint;
699
677 700 default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
678 701 }
679 702 }
@@ -779,6 +802,9 size_t ZSTD_CCtxParams_getParameter(
779 802 case ZSTD_c_targetCBlockSize :
780 803 *value = (int)CCtxParams->targetCBlockSize;
781 804 break;
805 case ZSTD_c_srcSizeHint :
806 *value = (int)CCtxParams->srcSizeHint;
807 break;
782 808 default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
783 809 }
784 810 return 0;
@@ -1029,7 +1055,11 ZSTD_adjustCParams(ZSTD_compressionParam
1029 1055 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1030 1056 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
1031 1057 {
1032 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
1058 ZSTD_compressionParameters cParams;
1059 if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
1060 srcSizeHint = CCtxParams->srcSizeHint;
1061 }
1062 cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
1033 1063 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
1034 1064 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
1035 1065 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
@@ -1049,10 +1079,19 ZSTD_sizeof_matchState(const ZSTD_compre
1049 1079 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1050 1080 size_t const hSize = ((size_t)1) << cParams->hashLog;
1051 1081 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1052 size_t const h3Size = ((size_t)1) << hashLog3;
1053 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1054 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
1055 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
1082 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1083 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
1084 * surrounded by redzones in ASAN. */
1085 size_t const tableSpace = chainSize * sizeof(U32)
1086 + hSize * sizeof(U32)
1087 + h3Size * sizeof(U32);
1088 size_t const optPotentialSpace =
1089 ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
1090 + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
1091 + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
1092 + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
1093 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1094 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1056 1095 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
1057 1096 ? optPotentialSpace
1058 1097 : 0;
@@ -1069,20 +1108,23 size_t ZSTD_estimateCCtxSize_usingCCtxPa
1069 1108 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
1070 1109 U32 const divider = (cParams.minMatch==3) ? 3 : 4;
1071 1110 size_t const maxNbSeq = blockSize / divider;
1072 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
1073 size_t const entropySpace = HUF_WORKSPACE_SIZE;
1074 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
1111 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1112 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
1113 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1114 size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
1115 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1075 1116 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
1076 1117
1077 1118 size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
1078 size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq);
1119 size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq));
1079 1120
1080 1121 size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace +
1081 1122 matchStateSize + ldmSpace + ldmSeqSpace;
1082
1083 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
1084 DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
1085 return sizeof(ZSTD_CCtx) + neededSpace;
1123 size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx));
1124
1125 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace);
1126 DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
1127 return cctxSpace + neededSpace;
1086 1128 }
1087 1129 }
1088 1130
@@ -1118,7 +1160,8 size_t ZSTD_estimateCStreamSize_usingCCt
1118 1160 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
1119 1161 size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
1120 1162 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
1121 size_t const streamingSize = inBuffSize + outBuffSize;
1163 size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize)
1164 + ZSTD_cwksp_alloc_size(outBuffSize);
1122 1165
1123 1166 return CCtxSize + streamingSize;
1124 1167 }
@@ -1186,17 +1229,6 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
1186 1229 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
1187 1230 }
1188 1231
1189
1190
1191 static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
1192 ZSTD_compressionParameters cParams2)
1193 {
1194 return (cParams1.hashLog == cParams2.hashLog)
1195 & (cParams1.chainLog == cParams2.chainLog)
1196 & (cParams1.strategy == cParams2.strategy) /* opt parser space */
1197 & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */
1198 }
1199
1200 1232 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
1201 1233 ZSTD_compressionParameters cParams2)
1202 1234 {
@@ -1211,71 +1243,6 static void ZSTD_assertEqualCParams(ZSTD
1211 1243 assert(cParams1.strategy == cParams2.strategy);
1212 1244 }
1213 1245
1214 /** The parameters are equivalent if ldm is not enabled in both sets or
1215 * all the parameters are equivalent. */
1216 static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
1217 ldmParams_t ldmParams2)
1218 {
1219 return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) ||
1220 (ldmParams1.enableLdm == ldmParams2.enableLdm &&
1221 ldmParams1.hashLog == ldmParams2.hashLog &&
1222 ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
1223 ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
1224 ldmParams1.hashRateLog == ldmParams2.hashRateLog);
1225 }
1226
1227 typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
1228
1229 /* ZSTD_sufficientBuff() :
1230 * check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
1231 * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
1232 static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
1233 size_t maxNbLit1,
1234 ZSTD_buffered_policy_e buffPol2,
1235 ZSTD_compressionParameters cParams2,
1236 U64 pledgedSrcSize)
1237 {
1238 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
1239 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
1240 size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4);
1241 size_t const maxNbLit2 = blockSize2;
1242 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
1243 DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
1244 (U32)neededBufferSize2, (U32)bufferSize1);
1245 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u",
1246 (U32)maxNbSeq2, (U32)maxNbSeq1);
1247 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u",
1248 (U32)maxNbLit2, (U32)maxNbLit1);
1249 return (maxNbLit2 <= maxNbLit1)
1250 & (maxNbSeq2 <= maxNbSeq1)
1251 & (neededBufferSize2 <= bufferSize1);
1252 }
1253
1254 /** Equivalence for resetCCtx purposes */
1255 static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
1256 ZSTD_CCtx_params params2,
1257 size_t buffSize1,
1258 size_t maxNbSeq1, size_t maxNbLit1,
1259 ZSTD_buffered_policy_e buffPol2,
1260 U64 pledgedSrcSize)
1261 {
1262 DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize);
1263 if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) {
1264 DEBUGLOG(4, "ZSTD_equivalentCParams() == 0");
1265 return 0;
1266 }
1267 if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) {
1268 DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0");
1269 return 0;
1270 }
1271 if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2,
1272 params2.cParams, pledgedSrcSize)) {
1273 DEBUGLOG(4, "ZSTD_sufficientBuff() == 0");
1274 return 0;
1275 }
1276 return 1;
1277 }
1278
1279 1246 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
1280 1247 {
1281 1248 int i;
@@ -1301,87 +1268,104 static void ZSTD_invalidateMatchState(ZS
1301 1268 ms->dictMatchState = NULL;
1302 1269 }
1303 1270
1304 /*! ZSTD_continueCCtx() :
1305 * reuse CCtx without reset (note : requires no dictionary) */
1306 static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
1307 {
1308 size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1309 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1310 DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place");
1311
1312 cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
1313 cctx->appliedParams = params;
1314 cctx->blockState.matchState.cParams = params.cParams;
1315 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1316 cctx->consumedSrcSize = 0;
1317 cctx->producedCSize = 0;
1318 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1319 cctx->appliedParams.fParams.contentSizeFlag = 0;
1320 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1321 (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
1322 cctx->stage = ZSTDcs_init;
1323 cctx->dictID = 0;
1324 if (params.ldmParams.enableLdm)
1325 ZSTD_window_clear(&cctx->ldmState.window);
1326 ZSTD_referenceExternalSequences(cctx, NULL, 0);
1327 ZSTD_invalidateMatchState(&cctx->blockState.matchState);
1328 ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
1329 XXH64_reset(&cctx->xxhState, 0);
1330 return 0;
1331 }
1332
1333 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
1334
1335 typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
1336
1337 static void*
1271 /**
1272 * Indicates whether this compression proceeds directly from user-provided
1273 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
1274 * whether the context needs to buffer the input/output (ZSTDb_buffered).
1275 */
1276 typedef enum {
1277 ZSTDb_not_buffered,
1278 ZSTDb_buffered
1279 } ZSTD_buffered_policy_e;
1280
1281 /**
1282 * Controls, for this matchState reset, whether the tables need to be cleared /
1283 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
1284 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
1285 * subsequent operation will overwrite the table space anyways (e.g., copying
1286 * the matchState contents in from a CDict).
1287 */
1288 typedef enum {
1289 ZSTDcrp_makeClean,
1290 ZSTDcrp_leaveDirty
1291 } ZSTD_compResetPolicy_e;
1292
1293 /**
1294 * Controls, for this matchState reset, whether indexing can continue where it
1295 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
1296 * (ZSTDirp_reset).
1297 */
1298 typedef enum {
1299 ZSTDirp_continue,
1300 ZSTDirp_reset
1301 } ZSTD_indexResetPolicy_e;
1302
1303 typedef enum {
1304 ZSTD_resetTarget_CDict,
1305 ZSTD_resetTarget_CCtx
1306 } ZSTD_resetTarget_e;
1307
1308 static size_t
1338 1309 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1339 void* ptr,
1310 ZSTD_cwksp* ws,
1340 1311 const ZSTD_compressionParameters* cParams,
1341 ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
1312 const ZSTD_compResetPolicy_e crp,
1313 const ZSTD_indexResetPolicy_e forceResetIndex,
1314 const ZSTD_resetTarget_e forWho)
1342 1315 {
1343 1316 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1344 1317 size_t const hSize = ((size_t)1) << cParams->hashLog;
1345 1318 U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1346 size_t const h3Size = ((size_t)1) << hashLog3;
1347 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1348
1349 assert(((size_t)ptr & 3) == 0);
1319 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1320
1321 DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
1322 if (forceResetIndex == ZSTDirp_reset) {
1323 memset(&ms->window, 0, sizeof(ms->window));
1324 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
1325 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1326 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
1327 ZSTD_cwksp_mark_tables_dirty(ws);
1328 }
1350 1329
1351 1330 ms->hashLog3 = hashLog3;
1352 memset(&ms->window, 0, sizeof(ms->window));
1353 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
1354 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1355 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
1331
1356 1332 ZSTD_invalidateMatchState(ms);
1357 1333
1334 assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
1335
1336 ZSTD_cwksp_clear_tables(ws);
1337
1338 DEBUGLOG(5, "reserving table space");
1339 /* table Space */
1340 ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
1341 ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
1342 ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
1343 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1344 "failed a workspace allocation in ZSTD_reset_matchState");
1345
1346 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
1347 if (crp!=ZSTDcrp_leaveDirty) {
1348 /* reset tables only */
1349 ZSTD_cwksp_clean_tables(ws);
1350 }
1351
1358 1352 /* opt parser space */
1359 1353 if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
1360 1354 DEBUGLOG(4, "reserving optimal parser space");
1361 ms->opt.litFreq = (unsigned*)ptr;
1362 ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
1363 ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
1364 ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
1365 ptr = ms->opt.offCodeFreq + (MaxOff+1);
1366 ms->opt.matchTable = (ZSTD_match_t*)ptr;
1367 ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1;
1368 ms->opt.priceTable = (ZSTD_optimal_t*)ptr;
1369 ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1;
1355 ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
1356 ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
1357 ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
1358 ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
1359 ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
1360 ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1370 1361 }
1371 1362
1372 /* table Space */
1373 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
1374 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
1375 if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
1376 ms->hashTable = (U32*)(ptr);
1377 ms->chainTable = ms->hashTable + hSize;
1378 ms->hashTable3 = ms->chainTable + chainSize;
1379 ptr = ms->hashTable3 + h3Size;
1380
1381 1363 ms->cParams = *cParams;
1382 1364
1383 assert(((size_t)ptr & 3) == 0);
1384 return ptr;
1365 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1366 "failed a workspace allocation in ZSTD_reset_matchState");
1367
1368 return 0;
1385 1369 }
1386 1370
1387 1371 /* ZSTD_indexTooCloseToMax() :
@@ -1397,13 +1381,6 static int ZSTD_indexTooCloseToMax(ZSTD_
1397 1381 return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1398 1382 }
1399 1383
1400 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
1401 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
1402 * during at least this number of times,
1403 * context's memory usage is considered wasteful,
1404 * because it's sized to handle a worst case scenario which rarely happens.
1405 * In which case, resize it down to free some memory */
1406
1407 1384 /*! ZSTD_resetCCtx_internal() :
1408 1385 note : `params` are assumed fully validated at this stage */
1409 1386 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
@@ -1412,30 +1389,12 static size_t ZSTD_resetCCtx_internal(ZS
1412 1389 ZSTD_compResetPolicy_e const crp,
1413 1390 ZSTD_buffered_policy_e const zbuff)
1414 1391 {
1392 ZSTD_cwksp* const ws = &zc->workspace;
1415 1393 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
1416 1394 (U32)pledgedSrcSize, params.cParams.windowLog);
1417 1395 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1418 1396
1419 if (crp == ZSTDcrp_continue) {
1420 if (ZSTD_equivalentParams(zc->appliedParams, params,
1421 zc->inBuffSize,
1422 zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
1423 zbuff, pledgedSrcSize) ) {
1424 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
1425 zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
1426 if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
1427 DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
1428 zc->appliedParams.cParams.windowLog, zc->blockSize);
1429 if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
1430 /* prefer a reset, faster than a rescale */
1431 ZSTD_reset_matchState(&zc->blockState.matchState,
1432 zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
1433 &params.cParams,
1434 crp, ZSTD_resetTarget_CCtx);
1435 }
1436 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
1437 } } }
1438 DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
1397 zc->isFirstBlock = 1;
1439 1398
1440 1399 if (params.ldmParams.enableLdm) {
1441 1400 /* Adjust long distance matching parameters */
@@ -1449,58 +1408,74 static size_t ZSTD_resetCCtx_internal(ZS
1449 1408 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1450 1409 U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
1451 1410 size_t const maxNbSeq = blockSize / divider;
1452 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
1411 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1412 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
1413 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1453 1414 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
1454 1415 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
1455 1416 size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
1456 1417 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
1457 void* ptr; /* used to partition workSpace */
1458
1459 /* Check if workSpace is large enough, alloc a new one if needed */
1460 { size_t const entropySpace = HUF_WORKSPACE_SIZE;
1461 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
1462 size_t const bufferSpace = buffInSize + buffOutSize;
1418
1419 ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue;
1420
1421 if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
1422 needsIndexReset = ZSTDirp_reset;
1423 }
1424
1425 ZSTD_cwksp_bump_oversized_duration(ws, 0);
1426
1427 /* Check if workspace is large enough, alloc a new one if needed */
1428 { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
1429 size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
1430 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1431 size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize);
1463 1432 size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
1464 size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq);
1465
1466 size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
1467 ldmSeqSpace + matchStateSize + tokenSpace +
1468 bufferSpace;
1469
1470 int const workSpaceTooSmall = zc->workSpaceSize < neededSpace;
1471 int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace;
1472 int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION);
1473 zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0;
1433 size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq));
1434
1435 size_t const neededSpace =
1436 cctxSpace +
1437 entropySpace +
1438 blockStateSpace +
1439 ldmSpace +
1440 ldmSeqSpace +
1441 matchStateSize +
1442 tokenSpace +
1443 bufferSpace;
1444
1445 int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
1446 int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
1474 1447
1475 1448 DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
1476 1449 neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
1477 1450 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1478 1451
1479 if (workSpaceTooSmall || workSpaceWasteful) {
1480 DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
1481 zc->workSpaceSize >> 10,
1452 if (workspaceTooSmall || workspaceWasteful) {
1453 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1454 ZSTD_cwksp_sizeof(ws) >> 10,
1482 1455 neededSpace >> 10);
1483 1456
1484 1457 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
1485 1458
1486 zc->workSpaceSize = 0;
1487 ZSTD_free(zc->workSpace, zc->customMem);
1488 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
1489 RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation);
1490 zc->workSpaceSize = neededSpace;
1491 zc->workSpaceOversizedDuration = 0;
1492
1459 needsIndexReset = ZSTDirp_reset;
1460
1461 ZSTD_cwksp_free(ws, zc->customMem);
1462 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem));
1463
1464 DEBUGLOG(5, "reserving object space");
1493 1465 /* Statically sized space.
1494 1466 * entropyWorkspace never moves,
1495 1467 * though prev/next block swap places */
1496 assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
1497 assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
1498 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
1499 zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1;
1500 ptr = zc->blockState.nextCBlock + 1;
1501 zc->entropyWorkspace = (U32*)ptr;
1468 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
1469 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1470 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
1471 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1472 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
1473 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE);
1474 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
1502 1475 } }
1503 1476
1477 ZSTD_cwksp_clear(ws);
1478
1504 1479 /* init params */
1505 1480 zc->appliedParams = params;
1506 1481 zc->blockState.matchState.cParams = params.cParams;
@@ -1519,58 +1494,58 static size_t ZSTD_resetCCtx_internal(ZS
1519 1494
1520 1495 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
1521 1496
1522 ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
1523 zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
1524 &params.cParams,
1525 crp, ZSTD_resetTarget_CCtx);
1497 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1498 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1499 */
1500 zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
1501 zc->seqStore.maxNbLit = blockSize;
1502
1503 /* buffers */
1504 zc->inBuffSize = buffInSize;
1505 zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
1506 zc->outBuffSize = buffOutSize;
1507 zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
1508
1509 /* ldm bucketOffsets table */
1510 if (params.ldmParams.enableLdm) {
1511 /* TODO: avoid memset? */
1512 size_t const ldmBucketSize =
1513 ((size_t)1) << (params.ldmParams.hashLog -
1514 params.ldmParams.bucketSizeLog);
1515 zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
1516 memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
1517 }
1518
1519 /* sequences storage */
1520 ZSTD_referenceExternalSequences(zc, NULL, 0);
1521 zc->seqStore.maxNbSeq = maxNbSeq;
1522 zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1523 zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1524 zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1525 zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
1526
1527 FORWARD_IF_ERROR(ZSTD_reset_matchState(
1528 &zc->blockState.matchState,
1529 ws,
1530 &params.cParams,
1531 crp,
1532 needsIndexReset,
1533 ZSTD_resetTarget_CCtx));
1526 1534
1527 1535 /* ldm hash table */
1528 /* initialize bucketOffsets table later for pointer alignment */
1529 1536 if (params.ldmParams.enableLdm) {
1537 /* TODO: avoid memset? */
1530 1538 size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
1531 memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t));
1532 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
1533 zc->ldmState.hashTable = (ldmEntry_t*)ptr;
1534 ptr = zc->ldmState.hashTable + ldmHSize;
1535 zc->ldmSequences = (rawSeq*)ptr;
1536 ptr = zc->ldmSequences + maxNbLdmSeq;
1539 zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
1540 memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
1541 zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
1537 1542 zc->maxNbLdmSequences = maxNbLdmSeq;
1538 1543
1539 1544 memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
1540 }
1541 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
1542
1543 /* sequences storage */
1544 zc->seqStore.maxNbSeq = maxNbSeq;
1545 zc->seqStore.sequencesStart = (seqDef*)ptr;
1546 ptr = zc->seqStore.sequencesStart + maxNbSeq;
1547 zc->seqStore.llCode = (BYTE*) ptr;
1548 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
1549 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
1550 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
1551 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1552 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1553 */
1554 zc->seqStore.maxNbLit = blockSize;
1555 ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH;
1556
1557 /* ldm bucketOffsets table */
1558 if (params.ldmParams.enableLdm) {
1559 size_t const ldmBucketSize =
1560 ((size_t)1) << (params.ldmParams.hashLog -
1561 params.ldmParams.bucketSizeLog);
1562 memset(ptr, 0, ldmBucketSize);
1563 zc->ldmState.bucketOffsets = (BYTE*)ptr;
1564 ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
1565 1545 ZSTD_window_clear(&zc->ldmState.window);
1566 1546 }
1567 ZSTD_referenceExternalSequences(zc, NULL, 0);
1568
1569 /* buffers */
1570 zc->inBuffSize = buffInSize;
1571 zc->inBuff = (char*)ptr;
1572 zc->outBuffSize = buffOutSize;
1573 zc->outBuff = zc->inBuff + buffInSize;
1547
1548 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
1574 1549
1575 1550 return 0;
1576 1551 }
@@ -1604,15 +1579,15 static const size_t attachDictSizeCutoff
1604 1579 };
1605 1580
1606 1581 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1607 ZSTD_CCtx_params params,
1582 const ZSTD_CCtx_params* params,
1608 1583 U64 pledgedSrcSize)
1609 1584 {
1610 1585 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
1611 1586 return ( pledgedSrcSize <= cutoff
1612 1587 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
1613 || params.attachDictPref == ZSTD_dictForceAttach )
1614 && params.attachDictPref != ZSTD_dictForceCopy
1615 && !params.forceWindow; /* dictMatchState isn't correctly
1588 || params->attachDictPref == ZSTD_dictForceAttach )
1589 && params->attachDictPref != ZSTD_dictForceCopy
1590 && !params->forceWindow; /* dictMatchState isn't correctly
1616 1591 * handled in _enforceMaxDist */
1617 1592 }
1618 1593
@@ -1630,8 +1605,8 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCt
1630 1605 * has its own tables. */
1631 1606 params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
1632 1607 params.cParams.windowLog = windowLog;
1633 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1634 ZSTDcrp_continue, zbuff);
1608 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1609 ZSTDcrp_makeClean, zbuff));
1635 1610 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1636 1611 }
1637 1612
@@ -1679,30 +1654,36 static size_t ZSTD_resetCCtx_byCopyingCD
1679 1654 /* Copy only compression parameters related to tables. */
1680 1655 params.cParams = *cdict_cParams;
1681 1656 params.cParams.windowLog = windowLog;
1682 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1683 ZSTDcrp_noMemset, zbuff);
1657 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1658 ZSTDcrp_leaveDirty, zbuff));
1684 1659 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1685 1660 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
1686 1661 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
1687 1662 }
1688 1663
1664 ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
1665
1689 1666 /* copy tables */
1690 1667 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
1691 1668 size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
1692 size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
1693 assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
1694 assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
1695 assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
1696 assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
1697 memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
1669
1670 memcpy(cctx->blockState.matchState.hashTable,
1671 cdict->matchState.hashTable,
1672 hSize * sizeof(U32));
1673 memcpy(cctx->blockState.matchState.chainTable,
1674 cdict->matchState.chainTable,
1675 chainSize * sizeof(U32));
1698 1676 }
1699 1677
1700 1678 /* Zero the hashTable3, since the cdict never fills it */
1701 { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
1679 { int const h3log = cctx->blockState.matchState.hashLog3;
1680 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
1702 1681 assert(cdict->matchState.hashLog3 == 0);
1703 1682 memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
1704 1683 }
1705 1684
1685 ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
1686
1706 1687 /* copy dictionary offsets */
1707 1688 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
1708 1689 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
@@ -1724,7 +1705,7 static size_t ZSTD_resetCCtx_byCopyingCD
1724 1705 * in-place. We decide here which strategy to use. */
1725 1706 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1726 1707 const ZSTD_CDict* cdict,
1727 ZSTD_CCtx_params params,
1708 const ZSTD_CCtx_params* params,
1728 1709 U64 pledgedSrcSize,
1729 1710 ZSTD_buffered_policy_e zbuff)
1730 1711 {
@@ -1734,10 +1715,10 static size_t ZSTD_resetCCtx_usingCDict(
1734 1715
1735 1716 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1736 1717 return ZSTD_resetCCtx_byAttachingCDict(
1737 cctx, cdict, params, pledgedSrcSize, zbuff);
1718 cctx, cdict, *params, pledgedSrcSize, zbuff);
1738 1719 } else {
1739 1720 return ZSTD_resetCCtx_byCopyingCDict(
1740 cctx, cdict, params, pledgedSrcSize, zbuff);
1721 cctx, cdict, *params, pledgedSrcSize, zbuff);
1741 1722 }
1742 1723 }
1743 1724
@@ -1763,7 +1744,7 static size_t ZSTD_copyCCtx_internal(ZST
1763 1744 params.cParams = srcCCtx->appliedParams.cParams;
1764 1745 params.fParams = fParams;
1765 1746 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
1766 ZSTDcrp_noMemset, zbuff);
1747 ZSTDcrp_leaveDirty, zbuff);
1767 1748 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
1768 1749 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
1769 1750 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
@@ -1771,16 +1752,27 static size_t ZSTD_copyCCtx_internal(ZST
1771 1752 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
1772 1753 }
1773 1754
1755 ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
1756
1774 1757 /* copy tables */
1775 1758 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
1776 1759 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
1777 size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3;
1778 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1779 assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
1780 assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize);
1781 memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */
1760 int const h3log = srcCCtx->blockState.matchState.hashLog3;
1761 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
1762
1763 memcpy(dstCCtx->blockState.matchState.hashTable,
1764 srcCCtx->blockState.matchState.hashTable,
1765 hSize * sizeof(U32));
1766 memcpy(dstCCtx->blockState.matchState.chainTable,
1767 srcCCtx->blockState.matchState.chainTable,
1768 chainSize * sizeof(U32));
1769 memcpy(dstCCtx->blockState.matchState.hashTable3,
1770 srcCCtx->blockState.matchState.hashTable3,
1771 h3Size * sizeof(U32));
1782 1772 }
1783 1773
1774 ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
1775
1784 1776 /* copy dictionary offsets */
1785 1777 {
1786 1778 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
@@ -1831,6 +1823,20 ZSTD_reduceTable_internal (U32* const ta
1831 1823 int rowNb;
1832 1824 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
1833 1825 assert(size < (1U<<31)); /* can be casted to int */
1826
1827 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
1828 /* To validate that the table re-use logic is sound, and that we don't
1829 * access table space that we haven't cleaned, we re-"poison" the table
1830 * space every time we mark it dirty.
1831 *
1832 * This function however is intended to operate on those dirty tables and
1833 * re-clean them. So when this function is used correctly, we can unpoison
1834 * the memory it operated on. This introduces a blind spot though, since
1835 * if we now try to operate on __actually__ poisoned memory, we will not
1836 * detect that. */
1837 __msan_unpoison(table, size * sizeof(U32));
1838 #endif
1839
1834 1840 for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
1835 1841 int column;
1836 1842 for (column=0; column<ZSTD_ROWSIZE; column++) {
@@ -1938,7 +1944,7 ZSTD_compressSequences_internal(seqStore
1938 1944 ZSTD_entropyCTables_t* nextEntropy,
1939 1945 const ZSTD_CCtx_params* cctxParams,
1940 1946 void* dst, size_t dstCapacity,
1941 void* workspace, size_t wkspSize,
1947 void* entropyWorkspace, size_t entropyWkspSize,
1942 1948 const int bmi2)
1943 1949 {
1944 1950 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
@@ -1971,7 +1977,7 ZSTD_compressSequences_internal(seqStore
1971 1977 ZSTD_disableLiteralsCompression(cctxParams),
1972 1978 op, dstCapacity,
1973 1979 literals, litSize,
1974 workspace, wkspSize,
1980 entropyWorkspace, entropyWkspSize,
1975 1981 bmi2);
1976 1982 FORWARD_IF_ERROR(cSize);
1977 1983 assert(cSize <= dstCapacity);
@@ -1981,12 +1987,17 ZSTD_compressSequences_internal(seqStore
1981 1987 /* Sequences Header */
1982 1988 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
1983 1989 dstSize_tooSmall);
1984 if (nbSeq < 0x7F)
1990 if (nbSeq < 128) {
1985 1991 *op++ = (BYTE)nbSeq;
1986 else if (nbSeq < LONGNBSEQ)
1987 op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
1988 else
1989 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
1992 } else if (nbSeq < LONGNBSEQ) {
1993 op[0] = (BYTE)((nbSeq>>8) + 0x80);
1994 op[1] = (BYTE)nbSeq;
1995 op+=2;
1996 } else {
1997 op[0]=0xFF;
1998 MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
1999 op+=3;
2000 }
1990 2001 assert(op <= oend);
1991 2002 if (nbSeq==0) {
1992 2003 /* Copy the old tables over as if we repeated them */
@@ -2002,7 +2013,7 ZSTD_compressSequences_internal(seqStore
2002 2013 ZSTD_seqToCodes(seqStorePtr);
2003 2014 /* build CTable for Literal Lengths */
2004 2015 { unsigned max = MaxLL;
2005 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
2016 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2006 2017 DEBUGLOG(5, "Building LL table");
2007 2018 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2008 2019 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
@@ -2012,10 +2023,14 ZSTD_compressSequences_internal(seqStore
2012 2023 ZSTD_defaultAllowed, strategy);
2013 2024 assert(set_basic < set_compressed && set_rle < set_compressed);
2014 2025 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2015 { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2016 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
2017 prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
2018 workspace, wkspSize);
2026 { size_t const countSize = ZSTD_buildCTable(
2027 op, (size_t)(oend - op),
2028 CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2029 count, max, llCodeTable, nbSeq,
2030 LL_defaultNorm, LL_defaultNormLog, MaxLL,
2031 prevEntropy->fse.litlengthCTable,
2032 sizeof(prevEntropy->fse.litlengthCTable),
2033 entropyWorkspace, entropyWkspSize);
2019 2034 FORWARD_IF_ERROR(countSize);
2020 2035 if (LLtype == set_compressed)
2021 2036 lastNCount = op;
@@ -2024,7 +2039,8 ZSTD_compressSequences_internal(seqStore
2024 2039 } }
2025 2040 /* build CTable for Offsets */
2026 2041 { unsigned max = MaxOff;
2027 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
2042 size_t const mostFrequent = HIST_countFast_wksp(
2043 count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2028 2044 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2029 2045 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2030 2046 DEBUGLOG(5, "Building OF table");
@@ -2035,10 +2051,14 ZSTD_compressSequences_internal(seqStore
2035 2051 OF_defaultNorm, OF_defaultNormLog,
2036 2052 defaultPolicy, strategy);
2037 2053 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2038 { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2039 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2040 prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
2041 workspace, wkspSize);
2054 { size_t const countSize = ZSTD_buildCTable(
2055 op, (size_t)(oend - op),
2056 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2057 count, max, ofCodeTable, nbSeq,
2058 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2059 prevEntropy->fse.offcodeCTable,
2060 sizeof(prevEntropy->fse.offcodeCTable),
2061 entropyWorkspace, entropyWkspSize);
2042 2062 FORWARD_IF_ERROR(countSize);
2043 2063 if (Offtype == set_compressed)
2044 2064 lastNCount = op;
@@ -2047,7 +2067,8 ZSTD_compressSequences_internal(seqStore
2047 2067 } }
2048 2068 /* build CTable for MatchLengths */
2049 2069 { unsigned max = MaxML;
2050 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
2070 size_t const mostFrequent = HIST_countFast_wksp(
2071 count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2051 2072 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2052 2073 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2053 2074 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
@@ -2056,10 +2077,14 ZSTD_compressSequences_internal(seqStore
2056 2077 ML_defaultNorm, ML_defaultNormLog,
2057 2078 ZSTD_defaultAllowed, strategy);
2058 2079 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2059 { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2060 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
2061 prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
2062 workspace, wkspSize);
2080 { size_t const countSize = ZSTD_buildCTable(
2081 op, (size_t)(oend - op),
2082 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2083 count, max, mlCodeTable, nbSeq,
2084 ML_defaultNorm, ML_defaultNormLog, MaxML,
2085 prevEntropy->fse.matchlengthCTable,
2086 sizeof(prevEntropy->fse.matchlengthCTable),
2087 entropyWorkspace, entropyWkspSize);
2063 2088 FORWARD_IF_ERROR(countSize);
2064 2089 if (MLtype == set_compressed)
2065 2090 lastNCount = op;
@@ -2107,13 +2132,13 ZSTD_compressSequences(seqStore_t* seqSt
2107 2132 const ZSTD_CCtx_params* cctxParams,
2108 2133 void* dst, size_t dstCapacity,
2109 2134 size_t srcSize,
2110 void* workspace, size_t wkspSize,
2135 void* entropyWorkspace, size_t entropyWkspSize,
2111 2136 int bmi2)
2112 2137 {
2113 2138 size_t const cSize = ZSTD_compressSequences_internal(
2114 2139 seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2115 2140 dst, dstCapacity,
2116 workspace, wkspSize, bmi2);
2141 entropyWorkspace, entropyWkspSize, bmi2);
2117 2142 if (cSize == 0) return 0;
2118 2143 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2119 2144 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
@@ -2264,11 +2289,99 static size_t ZSTD_buildSeqStore(ZSTD_CC
2264 2289 return ZSTDbss_compress;
2265 2290 }
2266 2291
2292 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
2293 {
2294 const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
2295 const seqDef* seqs = seqStore->sequencesStart;
2296 size_t seqsSize = seqStore->sequences - seqs;
2297
2298 ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
2299 size_t i; size_t position; int repIdx;
2300
2301 assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
2302 for (i = 0, position = 0; i < seqsSize; ++i) {
2303 outSeqs[i].offset = seqs[i].offset;
2304 outSeqs[i].litLength = seqs[i].litLength;
2305 outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
2306
2307 if (i == seqStore->longLengthPos) {
2308 if (seqStore->longLengthID == 1) {
2309 outSeqs[i].litLength += 0x10000;
2310 } else if (seqStore->longLengthID == 2) {
2311 outSeqs[i].matchLength += 0x10000;
2312 }
2313 }
2314
2315 if (outSeqs[i].offset <= ZSTD_REP_NUM) {
2316 outSeqs[i].rep = outSeqs[i].offset;
2317 repIdx = (unsigned int)i - outSeqs[i].offset;
2318
2319 if (outSeqs[i].litLength == 0) {
2320 if (outSeqs[i].offset < 3) {
2321 --repIdx;
2322 } else {
2323 repIdx = (unsigned int)i - 1;
2324 }
2325 ++outSeqs[i].rep;
2326 }
2327 assert(repIdx >= -3);
2328 outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
2329 if (outSeqs[i].rep == 4) {
2330 --outSeqs[i].offset;
2331 }
2332 } else {
2333 outSeqs[i].offset -= ZSTD_REP_NUM;
2334 }
2335
2336 position += outSeqs[i].litLength;
2337 outSeqs[i].matchPos = (unsigned int)position;
2338 position += outSeqs[i].matchLength;
2339 }
2340 zc->seqCollector.seqIndex += seqsSize;
2341 }
2342
2343 size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
2344 size_t outSeqsSize, const void* src, size_t srcSize)
2345 {
2346 const size_t dstCapacity = ZSTD_compressBound(srcSize);
2347 void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem);
2348 SeqCollector seqCollector;
2349
2350 RETURN_ERROR_IF(dst == NULL, memory_allocation);
2351
2352 seqCollector.collectSequences = 1;
2353 seqCollector.seqStart = outSeqs;
2354 seqCollector.seqIndex = 0;
2355 seqCollector.maxSequences = outSeqsSize;
2356 zc->seqCollector = seqCollector;
2357
2358 ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
2359 ZSTD_free(dst, ZSTD_defaultCMem);
2360 return zc->seqCollector.seqIndex;
2361 }
2362
2363 /* Returns true if the given block is a RLE block */
2364 static int ZSTD_isRLE(const BYTE *ip, size_t length) {
2365 size_t i;
2366 if (length < 2) return 1;
2367 for (i = 1; i < length; ++i) {
2368 if (ip[0] != ip[i]) return 0;
2369 }
2370 return 1;
2371 }
2372
2267 2373 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2268 2374 void* dst, size_t dstCapacity,
2269 const void* src, size_t srcSize)
2375 const void* src, size_t srcSize, U32 frame)
2270 2376 {
2377 /* This the upper bound for the length of an rle block.
2378 * This isn't the actual upper bound. Finding the real threshold
2379 * needs further investigation.
2380 */
2381 const U32 rleMaxLength = 25;
2271 2382 size_t cSize;
2383 const BYTE* ip = (const BYTE*)src;
2384 BYTE* op = (BYTE*)dst;
2272 2385 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2273 2386 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
2274 2387 (unsigned)zc->blockState.matchState.nextToUpdate);
@@ -2278,6 +2391,11 static size_t ZSTD_compressBlock_interna
2278 2391 if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
2279 2392 }
2280 2393
2394 if (zc->seqCollector.collectSequences) {
2395 ZSTD_copyBlockSequences(zc);
2396 return 0;
2397 }
2398
2281 2399 /* encode sequences and literals */
2282 2400 cSize = ZSTD_compressSequences(&zc->seqStore,
2283 2401 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
@@ -2287,8 +2405,21 static size_t ZSTD_compressBlock_interna
2287 2405 zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
2288 2406 zc->bmi2);
2289 2407
2408 if (frame &&
2409 /* We don't want to emit our first block as a RLE even if it qualifies because
2410 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
2411 * This is only an issue for zstd <= v1.4.3
2412 */
2413 !zc->isFirstBlock &&
2414 cSize < rleMaxLength &&
2415 ZSTD_isRLE(ip, srcSize))
2416 {
2417 cSize = 1;
2418 op[0] = ip[0];
2419 }
2420
2290 2421 out:
2291 if (!ZSTD_isError(cSize) && cSize != 0) {
2422 if (!ZSTD_isError(cSize) && cSize > 1) {
2292 2423 /* confirm repcodes and entropy tables when emitting a compressed block */
2293 2424 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2294 2425 zc->blockState.prevCBlock = zc->blockState.nextCBlock;
@@ -2305,7 +2436,11 out:
2305 2436 }
2306 2437
2307 2438
2308 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
2439 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
2440 ZSTD_cwksp* ws,
2441 ZSTD_CCtx_params const* params,
2442 void const* ip,
2443 void const* iend)
2309 2444 {
2310 2445 if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
2311 2446 U32 const maxDist = (U32)1 << params->cParams.windowLog;
@@ -2314,7 +2449,9 static void ZSTD_overflowCorrectIfNeeded
2314 2449 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2315 2450 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2316 2451 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2452 ZSTD_cwksp_mark_tables_dirty(ws);
2317 2453 ZSTD_reduceIndex(ms, params, correction);
2454 ZSTD_cwksp_mark_tables_clean(ws);
2318 2455 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2319 2456 else ms->nextToUpdate -= correction;
2320 2457 /* invalidate dictionaries on overflow correction */
@@ -2323,7 +2460,6 static void ZSTD_overflowCorrectIfNeeded
2323 2460 }
2324 2461 }
2325 2462
2326
2327 2463 /*! ZSTD_compress_frameChunk() :
2328 2464 * Compress a chunk of data into one or multiple blocks.
2329 2465 * All blocks will be terminated, all input will be consumed.
@@ -2357,7 +2493,8 static size_t ZSTD_compress_frameChunk (
2357 2493 "not enough space to store compressed block");
2358 2494 if (remaining < blockSize) blockSize = remaining;
2359 2495
2360 ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
2496 ZSTD_overflowCorrectIfNeeded(
2497 ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
2361 2498 ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
2362 2499
2363 2500 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
@@ -2365,15 +2502,16 static size_t ZSTD_compress_frameChunk (
2365 2502
2366 2503 { size_t cSize = ZSTD_compressBlock_internal(cctx,
2367 2504 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
2368 ip, blockSize);
2505 ip, blockSize, 1 /* frame */);
2369 2506 FORWARD_IF_ERROR(cSize);
2370
2371 2507 if (cSize == 0) { /* block is not compressible */
2372 2508 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
2373 2509 FORWARD_IF_ERROR(cSize);
2374 2510 } else {
2375 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2376 MEM_writeLE24(op, cBlockHeader24);
2511 const U32 cBlockHeader = cSize == 1 ?
2512 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
2513 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2514 MEM_writeLE24(op, cBlockHeader);
2377 2515 cSize += ZSTD_blockHeaderSize;
2378 2516 }
2379 2517
@@ -2383,6 +2521,7 static size_t ZSTD_compress_frameChunk (
2383 2521 op += cSize;
2384 2522 assert(dstCapacity >= cSize);
2385 2523 dstCapacity -= cSize;
2524 cctx->isFirstBlock = 0;
2386 2525 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
2387 2526 (unsigned)cSize);
2388 2527 } }
@@ -2393,25 +2532,25 static size_t ZSTD_compress_frameChunk (
2393 2532
2394 2533
2395 2534 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2396 ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID)
2535 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
2397 2536 { BYTE* const op = (BYTE*)dst;
2398 2537 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2399 U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
2400 U32 const checksumFlag = params.fParams.checksumFlag>0;
2401 U32 const windowSize = (U32)1 << params.cParams.windowLog;
2402 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2403 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2404 U32 const fcsCode = params.fParams.contentSizeFlag ?
2538 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
2539 U32 const checksumFlag = params->fParams.checksumFlag>0;
2540 U32 const windowSize = (U32)1 << params->cParams.windowLog;
2541 U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2542 BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2543 U32 const fcsCode = params->fParams.contentSizeFlag ?
2405 2544 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
2406 2545 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2407 2546 size_t pos=0;
2408 2547
2409 assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2548 assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2410 2549 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall);
2411 2550 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2412 !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
2413
2414 if (params.format == ZSTD_f_zstd1) {
2551 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
2552
2553 if (params->format == ZSTD_f_zstd1) {
2415 2554 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2416 2555 pos = 4;
2417 2556 }
@@ -2477,7 +2616,7 static size_t ZSTD_compressContinue_inte
2477 2616 "missing init (ZSTD_compressBegin)");
2478 2617
2479 2618 if (frame && (cctx->stage==ZSTDcs_init)) {
2480 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
2619 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
2481 2620 cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
2482 2621 FORWARD_IF_ERROR(fhSize);
2483 2622 assert(fhSize <= dstCapacity);
@@ -2497,13 +2636,15 static size_t ZSTD_compressContinue_inte
2497 2636
2498 2637 if (!frame) {
2499 2638 /* overflow check and correction for block mode */
2500 ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
2639 ZSTD_overflowCorrectIfNeeded(
2640 ms, &cctx->workspace, &cctx->appliedParams,
2641 src, (BYTE const*)src + srcSize);
2501 2642 }
2502 2643
2503 2644 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
2504 2645 { size_t const cSize = frame ?
2505 2646 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2506 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2647 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
2507 2648 FORWARD_IF_ERROR(cSize);
2508 2649 cctx->consumedSrcSize += srcSize;
2509 2650 cctx->producedCSize += (cSize + fhSize);
@@ -2550,6 +2691,7 size_t ZSTD_compressBlock(ZSTD_CCtx* cct
2550 2691 * @return : 0, or an error code
2551 2692 */
2552 2693 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
2694 ZSTD_cwksp* ws,
2553 2695 ZSTD_CCtx_params const* params,
2554 2696 const void* src, size_t srcSize,
2555 2697 ZSTD_dictTableLoadMethod_e dtlm)
@@ -2570,7 +2712,7 static size_t ZSTD_loadDictionaryContent
2570 2712 size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
2571 2713 const BYTE* const ichunk = ip + chunk;
2572 2714
2573 ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
2715 ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
2574 2716
2575 2717 switch(params->cParams.strategy)
2576 2718 {
@@ -2629,10 +2771,11 static size_t ZSTD_checkDictNCount(short
2629 2771 /*! ZSTD_loadZstdDictionary() :
2630 2772 * @return : dictID, or an error code
2631 2773 * assumptions : magic number supposed already checked
2632 * dictSize supposed > 8
2774 * dictSize supposed >= 8
2633 2775 */
2634 2776 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
2635 2777 ZSTD_matchState_t* ms,
2778 ZSTD_cwksp* ws,
2636 2779 ZSTD_CCtx_params const* params,
2637 2780 const void* dict, size_t dictSize,
2638 2781 ZSTD_dictTableLoadMethod_e dtlm,
@@ -2645,7 +2788,7 static size_t ZSTD_loadZstdDictionary(ZS
2645 2788 size_t dictID;
2646 2789
2647 2790 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2648 assert(dictSize > 8);
2791 assert(dictSize >= 8);
2649 2792 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
2650 2793
2651 2794 dictPtr += 4; /* skip magic number */
@@ -2728,7 +2871,8 static size_t ZSTD_loadZstdDictionary(ZS
2728 2871 bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
2729 2872 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
2730 2873 bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
2731 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
2874 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
2875 ms, ws, params, dictPtr, dictContentSize, dtlm));
2732 2876 return dictID;
2733 2877 }
2734 2878 }
@@ -2738,6 +2882,7 static size_t ZSTD_loadZstdDictionary(ZS
2738 2882 static size_t
2739 2883 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
2740 2884 ZSTD_matchState_t* ms,
2885 ZSTD_cwksp* ws,
2741 2886 const ZSTD_CCtx_params* params,
2742 2887 const void* dict, size_t dictSize,
2743 2888 ZSTD_dictContentType_e dictContentType,
@@ -2745,27 +2890,35 ZSTD_compress_insertDictionary(ZSTD_comp
2745 2890 void* workspace)
2746 2891 {
2747 2892 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
2748 if ((dict==NULL) || (dictSize<=8)) return 0;
2893 if ((dict==NULL) || (dictSize<8)) {
2894 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
2895 return 0;
2896 }
2749 2897
2750 2898 ZSTD_reset_compressedBlockState(bs);
2751 2899
2752 2900 /* dict restricted modes */
2753 2901 if (dictContentType == ZSTD_dct_rawContent)
2754 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
2902 return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm);
2755 2903
2756 2904 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
2757 2905 if (dictContentType == ZSTD_dct_auto) {
2758 2906 DEBUGLOG(4, "raw content dictionary detected");
2759 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
2907 return ZSTD_loadDictionaryContent(
2908 ms, ws, params, dict, dictSize, dtlm);
2760 2909 }
2761 2910 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
2762 2911 assert(0); /* impossible */
2763 2912 }
2764 2913
2765 2914 /* dict as full zstd dictionary */
2766 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
2915 return ZSTD_loadZstdDictionary(
2916 bs, ms, ws, params, dict, dictSize, dtlm, workspace);
2767 2917 }
2768 2918
2919 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
2920 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
2921
2769 2922 /*! ZSTD_compressBegin_internal() :
2770 2923 * @return : 0, or an error code */
2771 2924 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
@@ -2773,23 +2926,34 static size_t ZSTD_compressBegin_interna
2773 2926 ZSTD_dictContentType_e dictContentType,
2774 2927 ZSTD_dictTableLoadMethod_e dtlm,
2775 2928 const ZSTD_CDict* cdict,
2776 ZSTD_CCtx_params params, U64 pledgedSrcSize,
2929 const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
2777 2930 ZSTD_buffered_policy_e zbuff)
2778 2931 {
2779 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog);
2932 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
2780 2933 /* params are supposed to be fully validated at this point */
2781 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
2934 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
2782 2935 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2783
2784 if (cdict && cdict->dictContentSize>0) {
2936 if ( (cdict)
2937 && (cdict->dictContentSize > 0)
2938 && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
2939 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
2940 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
2941 || cdict->compressionLevel == 0)
2942 && (params->attachDictPref != ZSTD_dictForceLoad) ) {
2785 2943 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
2786 2944 }
2787 2945
2788 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
2789 ZSTDcrp_continue, zbuff) );
2790 { size_t const dictID = ZSTD_compress_insertDictionary(
2791 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2792 &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
2946 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
2947 ZSTDcrp_makeClean, zbuff) );
2948 { size_t const dictID = cdict ?
2949 ZSTD_compress_insertDictionary(
2950 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2951 &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize,
2952 dictContentType, dtlm, cctx->entropyWorkspace)
2953 : ZSTD_compress_insertDictionary(
2954 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2955 &cctx->workspace, params, dict, dictSize,
2956 dictContentType, dtlm, cctx->entropyWorkspace);
2793 2957 FORWARD_IF_ERROR(dictID);
2794 2958 assert(dictID <= UINT_MAX);
2795 2959 cctx->dictID = (U32)dictID;
@@ -2802,12 +2966,12 size_t ZSTD_compressBegin_advanced_inter
2802 2966 ZSTD_dictContentType_e dictContentType,
2803 2967 ZSTD_dictTableLoadMethod_e dtlm,
2804 2968 const ZSTD_CDict* cdict,
2805 ZSTD_CCtx_params params,
2969 const ZSTD_CCtx_params* params,
2806 2970 unsigned long long pledgedSrcSize)
2807 2971 {
2808 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog);
2972 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
2809 2973 /* compression parameters verification and optimization */
2810 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
2974 FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) );
2811 2975 return ZSTD_compressBegin_internal(cctx,
2812 2976 dict, dictSize, dictContentType, dtlm,
2813 2977 cdict,
@@ -2822,21 +2986,21 size_t ZSTD_compressBegin_advanced(ZSTD_
2822 2986 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2823 2987 {
2824 2988 ZSTD_CCtx_params const cctxParams =
2825 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2989 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
2826 2990 return ZSTD_compressBegin_advanced_internal(cctx,
2827 2991 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
2828 2992 NULL /*cdict*/,
2829 cctxParams, pledgedSrcSize);
2993 &cctxParams, pledgedSrcSize);
2830 2994 }
2831 2995
2832 2996 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2833 2997 {
2834 2998 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
2835 2999 ZSTD_CCtx_params const cctxParams =
2836 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3000 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
2837 3001 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
2838 3002 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
2839 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
3003 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
2840 3004 }
2841 3005
2842 3006 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
@@ -2859,7 +3023,7 static size_t ZSTD_writeEpilogue(ZSTD_CC
2859 3023
2860 3024 /* special case : empty frame */
2861 3025 if (cctx->stage == ZSTDcs_init) {
2862 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
3026 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
2863 3027 FORWARD_IF_ERROR(fhSize);
2864 3028 dstCapacity -= fhSize;
2865 3029 op += fhSize;
@@ -2920,13 +3084,13 static size_t ZSTD_compress_internal (ZS
2920 3084 ZSTD_parameters params)
2921 3085 {
2922 3086 ZSTD_CCtx_params const cctxParams =
2923 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3087 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
2924 3088 DEBUGLOG(4, "ZSTD_compress_internal");
2925 3089 return ZSTD_compress_advanced_internal(cctx,
2926 3090 dst, dstCapacity,
2927 3091 src, srcSize,
2928 3092 dict, dictSize,
2929 cctxParams);
3093 &cctxParams);
2930 3094 }
2931 3095
2932 3096 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
@@ -2950,7 +3114,7 size_t ZSTD_compress_advanced_internal(
2950 3114 void* dst, size_t dstCapacity,
2951 3115 const void* src, size_t srcSize,
2952 3116 const void* dict,size_t dictSize,
2953 ZSTD_CCtx_params params)
3117 const ZSTD_CCtx_params* params)
2954 3118 {
2955 3119 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
2956 3120 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
@@ -2966,9 +3130,9 size_t ZSTD_compress_usingDict(ZSTD_CCtx
2966 3130 int compressionLevel)
2967 3131 {
2968 3132 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
2969 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3133 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
2970 3134 assert(params.fParams.contentSizeFlag == 1);
2971 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
3135 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
2972 3136 }
2973 3137
2974 3138 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3003,8 +3167,11 size_t ZSTD_estimateCDictSize_advanced(
3003 3167 ZSTD_dictLoadMethod_e dictLoadMethod)
3004 3168 {
3005 3169 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
3006 return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
3007 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
3170 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3171 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3172 + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
3173 + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3174 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
3008 3175 }
3009 3176
3010 3177 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
@@ -3017,7 +3184,9 size_t ZSTD_sizeof_CDict(const ZSTD_CDic
3017 3184 {
3018 3185 if (cdict==NULL) return 0; /* support sizeof on NULL */
3019 3186 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
3020 return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
3187 /* cdict may be in the workspace */
3188 return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
3189 + ZSTD_cwksp_sizeof(&cdict->workspace);
3021 3190 }
3022 3191
3023 3192 static size_t ZSTD_initCDict_internal(
@@ -3031,28 +3200,29 static size_t ZSTD_initCDict_internal(
3031 3200 assert(!ZSTD_checkCParams(cParams));
3032 3201 cdict->matchState.cParams = cParams;
3033 3202 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3034 cdict->dictBuffer = NULL;
3035 3203 cdict->dictContent = dictBuffer;
3036 3204 } else {
3037 void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem);
3038 cdict->dictBuffer = internalBuffer;
3205 void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
3206 RETURN_ERROR_IF(!internalBuffer, memory_allocation);
3039 3207 cdict->dictContent = internalBuffer;
3040 RETURN_ERROR_IF(!internalBuffer, memory_allocation);
3041 3208 memcpy(internalBuffer, dictBuffer, dictSize);
3042 3209 }
3043 3210 cdict->dictContentSize = dictSize;
3044 3211
3212 cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
3213
3214
3045 3215 /* Reset the state to no dictionary */
3046 3216 ZSTD_reset_compressedBlockState(&cdict->cBlockState);
3047 { void* const end = ZSTD_reset_matchState(&cdict->matchState,
3048 (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
3049 &cParams,
3050 ZSTDcrp_continue, ZSTD_resetTarget_CDict);
3051 assert(end == (char*)cdict->workspace + cdict->workspaceSize);
3052 (void)end;
3053 }
3217 FORWARD_IF_ERROR(ZSTD_reset_matchState(
3218 &cdict->matchState,
3219 &cdict->workspace,
3220 &cParams,
3221 ZSTDcrp_makeClean,
3222 ZSTDirp_reset,
3223 ZSTD_resetTarget_CDict));
3054 3224 /* (Maybe) load the dictionary
3055 * Skips loading the dictionary if it is <= 8 bytes.
3225 * Skips loading the dictionary if it is < 8 bytes.
3056 3226 */
3057 3227 { ZSTD_CCtx_params params;
3058 3228 memset(&params, 0, sizeof(params));
@@ -3060,9 +3230,9 static size_t ZSTD_initCDict_internal(
3060 3230 params.fParams.contentSizeFlag = 1;
3061 3231 params.cParams = cParams;
3062 3232 { size_t const dictID = ZSTD_compress_insertDictionary(
3063 &cdict->cBlockState, &cdict->matchState, &params,
3064 cdict->dictContent, cdict->dictContentSize,
3065 dictContentType, ZSTD_dtlm_full, cdict->workspace);
3233 &cdict->cBlockState, &cdict->matchState, &cdict->workspace,
3234 &params, cdict->dictContent, cdict->dictContentSize,
3235 dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
3066 3236 FORWARD_IF_ERROR(dictID);
3067 3237 assert(dictID <= (size_t)(U32)-1);
3068 3238 cdict->dictID = (U32)dictID;
@@ -3080,18 +3250,29 ZSTD_CDict* ZSTD_createCDict_advanced(co
3080 3250 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
3081 3251 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
3082 3252
3083 { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
3084 size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
3253 { size_t const workspaceSize =
3254 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
3255 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
3256 ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
3257 (dictLoadMethod == ZSTD_dlm_byRef ? 0
3258 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
3085 3259 void* const workspace = ZSTD_malloc(workspaceSize, customMem);
3086
3087 if (!cdict || !workspace) {
3088 ZSTD_free(cdict, customMem);
3260 ZSTD_cwksp ws;
3261 ZSTD_CDict* cdict;
3262
3263 if (!workspace) {
3089 3264 ZSTD_free(workspace, customMem);
3090 3265 return NULL;
3091 3266 }
3267
3268 ZSTD_cwksp_init(&ws, workspace, workspaceSize);
3269
3270 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
3271 assert(cdict != NULL);
3272 ZSTD_cwksp_move(&cdict->workspace, &ws);
3092 3273 cdict->customMem = customMem;
3093 cdict->workspace = workspace;
3094 cdict->workspaceSize = workspaceSize;
3274 cdict->compressionLevel = 0; /* signals advanced API usage */
3275
3095 3276 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3096 3277 dictBuffer, dictSize,
3097 3278 dictLoadMethod, dictContentType,
@@ -3107,9 +3288,12 ZSTD_CDict* ZSTD_createCDict_advanced(co
3107 3288 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
3108 3289 {
3109 3290 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3110 return ZSTD_createCDict_advanced(dict, dictSize,
3111 ZSTD_dlm_byCopy, ZSTD_dct_auto,
3112 cParams, ZSTD_defaultCMem);
3291 ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
3292 ZSTD_dlm_byCopy, ZSTD_dct_auto,
3293 cParams, ZSTD_defaultCMem);
3294 if (cdict)
3295 cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
3296 return cdict;
3113 3297 }
3114 3298
3115 3299 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
@@ -3124,9 +3308,11 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
3124 3308 {
3125 3309 if (cdict==NULL) return 0; /* support free on NULL */
3126 3310 { ZSTD_customMem const cMem = cdict->customMem;
3127 ZSTD_free(cdict->workspace, cMem);
3128 ZSTD_free(cdict->dictBuffer, cMem);
3129 ZSTD_free(cdict, cMem);
3311 int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
3312 ZSTD_cwksp_free(&cdict->workspace, cMem);
3313 if (!cdictInWorkspace) {
3314 ZSTD_free(cdict, cMem);
3315 }
3130 3316 return 0;
3131 3317 }
3132 3318 }
@@ -3152,28 +3338,30 const ZSTD_CDict* ZSTD_initStaticCDict(
3152 3338 ZSTD_compressionParameters cParams)
3153 3339 {
3154 3340 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
3155 size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize)
3156 + HUF_WORKSPACE_SIZE + matchStateSize;
3157 ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
3158 void* ptr;
3341 size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3342 + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3343 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
3344 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3345 + matchStateSize;
3346 ZSTD_CDict* cdict;
3347
3159 3348 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
3349
3350 {
3351 ZSTD_cwksp ws;
3352 ZSTD_cwksp_init(&ws, workspace, workspaceSize);
3353 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
3354 if (cdict == NULL) return NULL;
3355 ZSTD_cwksp_move(&cdict->workspace, &ws);
3356 }
3357
3160 3358 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
3161 3359 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
3162 3360 if (workspaceSize < neededSize) return NULL;
3163 3361
3164 if (dictLoadMethod == ZSTD_dlm_byCopy) {
3165 memcpy(cdict+1, dict, dictSize);
3166 dict = cdict+1;
3167 ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
3168 } else {
3169 ptr = cdict+1;
3170 }
3171 cdict->workspace = ptr;
3172 cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize;
3173
3174 3362 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3175 3363 dict, dictSize,
3176 ZSTD_dlm_byRef, dictContentType,
3364 dictLoadMethod, dictContentType,
3177 3365 cParams) ))
3178 3366 return NULL;
3179 3367
@@ -3195,7 +3383,15 size_t ZSTD_compressBegin_usingCDict_adv
3195 3383 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
3196 3384 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong);
3197 3385 { ZSTD_CCtx_params params = cctx->requestedParams;
3198 params.cParams = ZSTD_getCParamsFromCDict(cdict);
3386 params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
3387 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
3388 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
3389 || cdict->compressionLevel == 0 )
3390 && (params.attachDictPref != ZSTD_dictForceLoad) ?
3391 ZSTD_getCParamsFromCDict(cdict)
3392 : ZSTD_getCParams(cdict->compressionLevel,
3393 pledgedSrcSize,
3394 cdict->dictContentSize);
3199 3395 /* Increase window log to fit the entire dictionary and source if the
3200 3396 * source size is known. Limit the increase to 19, which is the
3201 3397 * window log for compression level 1 with the largest source size.
@@ -3209,7 +3405,7 size_t ZSTD_compressBegin_usingCDict_adv
3209 3405 return ZSTD_compressBegin_internal(cctx,
3210 3406 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
3211 3407 cdict,
3212 params, pledgedSrcSize,
3408 &params, pledgedSrcSize,
3213 3409 ZSTDb_not_buffered);
3214 3410 }
3215 3411 }
@@ -3300,7 +3496,7 static size_t ZSTD_resetCStream_internal
3300 3496 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
3301 3497 dict, dictSize, dictContentType, ZSTD_dtlm_fast,
3302 3498 cdict,
3303 params, pledgedSrcSize,
3499 &params, pledgedSrcSize,
3304 3500 ZSTDb_buffered) );
3305 3501
3306 3502 cctx->inToCompress = 0;
@@ -3334,13 +3530,14 size_t ZSTD_resetCStream(ZSTD_CStream* z
3334 3530 * Assumption 2 : either dict, or cdict, is defined, not both */
3335 3531 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
3336 3532 const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
3337 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
3533 const ZSTD_CCtx_params* params,
3534 unsigned long long pledgedSrcSize)
3338 3535 {
3339 3536 DEBUGLOG(4, "ZSTD_initCStream_internal");
3340 3537 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3341 3538 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3342 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
3343 zcs->requestedParams = params;
3539 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
3540 zcs->requestedParams = *params;
3344 3541 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3345 3542 if (dict) {
3346 3543 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
@@ -3379,7 +3576,7 size_t ZSTD_initCStream_usingCDict(ZSTD_
3379 3576 /* ZSTD_initCStream_advanced() :
3380 3577 * pledgedSrcSize must be exact.
3381 3578 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
3382 * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */
3579 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
3383 3580 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
3384 3581 const void* dict, size_t dictSize,
3385 3582 ZSTD_parameters params, unsigned long long pss)
@@ -3393,7 +3590,7 size_t ZSTD_initCStream_advanced(ZSTD_CS
3393 3590 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3394 3591 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3395 3592 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
3396 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3593 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params);
3397 3594 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
3398 3595 return 0;
3399 3596 }
@@ -3643,7 +3840,7 size_t ZSTD_compressStream2( ZSTD_CCtx*
3643 3840 if (cctx->mtctx == NULL) {
3644 3841 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
3645 3842 params.nbWorkers);
3646 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
3843 cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
3647 3844 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation);
3648 3845 }
3649 3846 /* mt compression */
@@ -3771,8 +3968,8 static const ZSTD_compressionParameters
3771 3968 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
3772 3969 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
3773 3970 { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
3774 { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
3775 { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
3971 { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
3972 { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
3776 3973 { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */
3777 3974 { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
3778 3975 { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */
@@ -3796,8 +3993,8 static const ZSTD_compressionParameters
3796 3993 /* W, C, H, S, L, T, strat */
3797 3994 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3798 3995 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
3799 { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */
3800 { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */
3996 { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
3997 { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
3801 3998 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
3802 3999 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
3803 4000 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
@@ -3823,8 +4020,8 static const ZSTD_compressionParameters
3823 4020 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3824 4021 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
3825 4022 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
3826 { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */
3827 { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */
4023 { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
4024 { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
3828 4025 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
3829 4026 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
3830 4027 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
@@ -3849,7 +4046,7 static const ZSTD_compressionParameters
3849 4046 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3850 4047 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
3851 4048 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
3852 { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */
4049 { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
3853 4050 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
3854 4051 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
3855 4052 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
@@ -19,6 +19,7
19 19 * Dependencies
20 20 ***************************************/
21 21 #include "zstd_internal.h"
22 #include "zstd_cwksp.h"
22 23 #ifdef ZSTD_MULTITHREAD
23 24 # include "zstdmt_compress.h"
24 25 #endif
@@ -192,6 +193,13 typedef struct {
192 193 size_t capacity; /* The capacity starting from `seq` pointer */
193 194 } rawSeqStore_t;
194 195
196 typedef struct {
197 int collectSequences;
198 ZSTD_Sequence* seqStart;
199 size_t seqIndex;
200 size_t maxSequences;
201 } SeqCollector;
202
195 203 struct ZSTD_CCtx_params_s {
196 204 ZSTD_format_e format;
197 205 ZSTD_compressionParameters cParams;
@@ -203,6 +211,9 struct ZSTD_CCtx_params_s {
203 211 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
204 212 * No target when targetCBlockSize == 0.
205 213 * There is no guarantee on compressed block size */
214 int srcSizeHint; /* User's best guess of source size.
215 * Hint is not valid when srcSizeHint == 0.
216 * There is no guarantee that hint is close to actual source size */
206 217
207 218 ZSTD_dictAttachPref_e attachDictPref;
208 219 ZSTD_literalCompressionMode_e literalCompressionMode;
@@ -228,9 +239,7 struct ZSTD_CCtx_s {
228 239 ZSTD_CCtx_params appliedParams;
229 240 U32 dictID;
230 241
231 int workSpaceOversizedDuration;
232 void* workSpace;
233 size_t workSpaceSize;
242 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
234 243 size_t blockSize;
235 244 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
236 245 unsigned long long consumedSrcSize;
@@ -238,6 +247,8 struct ZSTD_CCtx_s {
238 247 XXH64_state_t xxhState;
239 248 ZSTD_customMem customMem;
240 249 size_t staticSize;
250 SeqCollector seqCollector;
251 int isFirstBlock;
241 252
242 253 seqStore_t seqStore; /* sequences storage ptrs */
243 254 ldmState_t ldmState; /* long distance matching state */
@@ -337,26 +348,57 MEM_STATIC size_t ZSTD_minGain(size_t sr
337 348 return (srcSize >> minlog) + 2;
338 349 }
339 350
351 /*! ZSTD_safecopyLiterals() :
352 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
353 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
354 * large copies.
355 */
356 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
357 assert(iend > ilimit_w);
358 if (ip <= ilimit_w) {
359 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
360 op += ilimit_w - ip;
361 ip = ilimit_w;
362 }
363 while (ip < iend) *op++ = *ip++;
364 }
365
340 366 /*! ZSTD_storeSeq() :
341 * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
342 * `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
367 * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
368 * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
343 369 * `mlBase` : matchLength - MINMATCH
370 * Allowed to overread literals up to litLimit.
344 371 */
345 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
372 HINT_INLINE UNUSED_ATTR
373 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
346 374 {
375 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
376 BYTE const* const litEnd = literals + litLength;
347 377 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
348 378 static const BYTE* g_start = NULL;
349 379 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
350 380 { U32 const pos = (U32)((const BYTE*)literals - g_start);
351 381 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
352 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
382 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
353 383 }
354 384 #endif
355 385 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
356 386 /* copy Literals */
357 387 assert(seqStorePtr->maxNbLit <= 128 KB);
358 388 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
359 ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap);
389 assert(literals + litLength <= litLimit);
390 if (litEnd <= litLimit_w) {
391 /* Common case we can use wildcopy.
392 * First copy 16 bytes, because literals are likely short.
393 */
394 assert(WILDCOPY_OVERLENGTH >= 16);
395 ZSTD_copy16(seqStorePtr->lit, literals);
396 if (litLength > 16) {
397 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
398 }
399 } else {
400 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
401 }
360 402 seqStorePtr->lit += litLength;
361 403
362 404 /* literal Length */
@@ -368,7 +410,7 MEM_STATIC void ZSTD_storeSeq(seqStore_t
368 410 seqStorePtr->sequences[0].litLength = (U16)litLength;
369 411
370 412 /* match offset */
371 seqStorePtr->sequences[0].offset = offsetCode + 1;
413 seqStorePtr->sequences[0].offset = offCode + 1;
372 414
373 415 /* match Length */
374 416 if (mlBase>0xFFFF) {
@@ -910,7 +952,7 ZSTD_compressionParameters ZSTD_getCPara
910 952 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
911 953 const void* dict, size_t dictSize,
912 954 const ZSTD_CDict* cdict,
913 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
955 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
914 956
915 957 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
916 958
@@ -925,7 +967,7 size_t ZSTD_compressBegin_advanced_inter
925 967 ZSTD_dictContentType_e dictContentType,
926 968 ZSTD_dictTableLoadMethod_e dtlm,
927 969 const ZSTD_CDict* cdict,
928 ZSTD_CCtx_params params,
970 const ZSTD_CCtx_params* params,
929 971 unsigned long long pledgedSrcSize);
930 972
931 973 /* ZSTD_compress_advanced_internal() :
@@ -934,7 +976,7 size_t ZSTD_compress_advanced_internal(Z
934 976 void* dst, size_t dstCapacity,
935 977 const void* src, size_t srcSize,
936 978 const void* dict,size_t dictSize,
937 ZSTD_CCtx_params params);
979 const ZSTD_CCtx_params* params);
938 980
939 981
940 982 /* ZSTD_writeLastEmptyBlock() :
@@ -70,7 +70,7 size_t ZSTD_compressLiterals (ZSTD_hufCT
70 70 ZSTD_strategy strategy, int disableLiteralCompression,
71 71 void* dst, size_t dstCapacity,
72 72 const void* src, size_t srcSize,
73 void* workspace, size_t wkspSize,
73 void* entropyWorkspace, size_t entropyWorkspaceSize,
74 74 const int bmi2)
75 75 {
76 76 size_t const minGain = ZSTD_minGain(srcSize, strategy);
@@ -99,10 +99,15 size_t ZSTD_compressLiterals (ZSTD_hufCT
99 99 { HUF_repeat repeat = prevHuf->repeatMode;
100 100 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
101 101 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
102 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
103 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
104 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
105 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
102 cLitSize = singleStream ?
103 HUF_compress1X_repeat(
104 ostart+lhSize, dstCapacity-lhSize, src, srcSize,
105 255, 11, entropyWorkspace, entropyWorkspaceSize,
106 (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
107 HUF_compress4X_repeat(
108 ostart+lhSize, dstCapacity-lhSize, src, srcSize,
109 255, 11, entropyWorkspace, entropyWorkspaceSize,
110 (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
106 111 if (repeat != HUF_repeat_none) {
107 112 /* reused the existing table */
108 113 hType = set_repeat;
@@ -23,7 +23,7 size_t ZSTD_compressLiterals (ZSTD_hufCT
23 23 ZSTD_strategy strategy, int disableLiteralCompression,
24 24 void* dst, size_t dstCapacity,
25 25 const void* src, size_t srcSize,
26 void* workspace, size_t wkspSize,
26 void* entropyWorkspace, size_t entropyWorkspaceSize,
27 27 const int bmi2);
28 28
29 29 #endif /* ZSTD_COMPRESS_LITERALS_H */
@@ -222,7 +222,7 ZSTD_buildCTable(void* dst, size_t dstCa
222 222 const BYTE* codeTable, size_t nbSeq,
223 223 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
224 224 const FSE_CTable* prevCTable, size_t prevCTableSize,
225 void* workspace, size_t workspaceSize)
225 void* entropyWorkspace, size_t entropyWorkspaceSize)
226 226 {
227 227 BYTE* op = (BYTE*)dst;
228 228 const BYTE* const oend = op + dstCapacity;
@@ -238,7 +238,7 ZSTD_buildCTable(void* dst, size_t dstCa
238 238 memcpy(nextCTable, prevCTable, prevCTableSize);
239 239 return 0;
240 240 case set_basic:
241 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
241 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */
242 242 return 0;
243 243 case set_compressed: {
244 244 S16 norm[MaxSeq + 1];
@@ -252,7 +252,7 ZSTD_buildCTable(void* dst, size_t dstCa
252 252 FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
253 253 { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
254 254 FORWARD_IF_ERROR(NCountSize);
255 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
255 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
256 256 return NCountSize;
257 257 }
258 258 }
@@ -35,7 +35,7 ZSTD_buildCTable(void* dst, size_t dstCa
35 35 const BYTE* codeTable, size_t nbSeq,
36 36 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
37 37 const FSE_CTable* prevCTable, size_t prevCTableSize,
38 void* workspace, size_t workspaceSize);
38 void* entropyWorkspace, size_t entropyWorkspaceSize);
39 39
40 40 size_t ZSTD_encodeSequences(
41 41 void* dst, size_t dstCapacity,
@@ -148,7 +148,7 size_t ZSTD_compressBlock_doubleFast_gen
148 148 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
149 149 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
150 150 ip++;
151 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
151 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
152 152 goto _match_stored;
153 153 }
154 154
@@ -157,7 +157,7 size_t ZSTD_compressBlock_doubleFast_gen
157 157 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
158 158 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
159 159 ip++;
160 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
160 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
161 161 goto _match_stored;
162 162 }
163 163
@@ -247,7 +247,7 size_t ZSTD_compressBlock_doubleFast_gen
247 247 offset_2 = offset_1;
248 248 offset_1 = offset;
249 249
250 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
250 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
251 251
252 252 _match_stored:
253 253 /* match found */
@@ -278,7 +278,7 size_t ZSTD_compressBlock_doubleFast_gen
278 278 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
279 279 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
280 280 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
281 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
281 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
282 282 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
283 283 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
284 284 ip += repLength2;
@@ -297,7 +297,7 size_t ZSTD_compressBlock_doubleFast_gen
297 297 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
298 298 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
299 299 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
300 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
300 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
301 301 ip += rLength;
302 302 anchor = ip;
303 303 continue; /* faster when present ... (?) */
@@ -411,7 +411,7 static size_t ZSTD_compressBlock_doubleF
411 411 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
412 412 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
413 413 ip++;
414 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
414 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
415 415 } else {
416 416 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
417 417 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -422,7 +422,7 static size_t ZSTD_compressBlock_doubleF
422 422 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
423 423 offset_2 = offset_1;
424 424 offset_1 = offset;
425 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
425 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
426 426
427 427 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
428 428 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -447,7 +447,7 static size_t ZSTD_compressBlock_doubleF
447 447 }
448 448 offset_2 = offset_1;
449 449 offset_1 = offset;
450 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
450 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
451 451
452 452 } else {
453 453 ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -479,7 +479,7 static size_t ZSTD_compressBlock_doubleF
479 479 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
480 480 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
481 481 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
482 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
482 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
483 483 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
484 484 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
485 485 ip += repLength2;
@@ -8,7 +8,7
8 8 * You may select, at your option, one of the above-listed licenses.
9 9 */
10 10
11 #include "zstd_compress_internal.h"
11 #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12 12 #include "zstd_fast.h"
13 13
14 14
@@ -43,8 +43,8 void ZSTD_fillHashTable(ZSTD_matchState_
43 43 }
44 44
45 45
46 FORCE_INLINE_TEMPLATE
47 size_t ZSTD_compressBlock_fast_generic(
46 FORCE_INLINE_TEMPLATE size_t
47 ZSTD_compressBlock_fast_generic(
48 48 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
49 49 void const* src, size_t srcSize,
50 50 U32 const mls)
@@ -74,8 +74,7 size_t ZSTD_compressBlock_fast_generic(
74 74 DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
75 75 ip0 += (ip0 == prefixStart);
76 76 ip1 = ip0 + 1;
77 {
78 U32 const maxRep = (U32)(ip0 - prefixStart);
77 { U32 const maxRep = (U32)(ip0 - prefixStart);
79 78 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
80 79 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
81 80 }
@@ -118,8 +117,7 size_t ZSTD_compressBlock_fast_generic(
118 117 match0 = match1;
119 118 goto _offset;
120 119 }
121 {
122 size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
120 { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
123 121 assert(step >= 2);
124 122 ip0 += step;
125 123 ip1 += step;
@@ -138,7 +136,7 size_t ZSTD_compressBlock_fast_generic(
138 136 _match: /* Requires: ip0, match0, offcode */
139 137 /* Count the forward length */
140 138 mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
141 ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
139 ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
142 140 /* match found */
143 141 ip0 += mLength;
144 142 anchor = ip0;
@@ -150,16 +148,15 size_t ZSTD_compressBlock_fast_generic(
150 148 hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
151 149 hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
152 150
153 while ( (ip0 <= ilimit)
154 && ( (offset_2>0)
155 & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
151 while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
152 && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
156 153 /* store sequence */
157 154 size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
158 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
155 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
159 156 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
160 157 ip0 += rLength;
161 158 ip1 = ip0 + 1;
162 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
159 ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
163 160 anchor = ip0;
164 161 continue; /* faster when present (confirmed on gcc-8) ... (?) */
165 162 }
@@ -179,8 +176,7 size_t ZSTD_compressBlock_fast(
179 176 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
180 177 void const* src, size_t srcSize)
181 178 {
182 ZSTD_compressionParameters const* cParams = &ms->cParams;
183 U32 const mls = cParams->minMatch;
179 U32 const mls = ms->cParams.minMatch;
184 180 assert(ms->dictMatchState == NULL);
185 181 switch(mls)
186 182 {
@@ -265,7 +261,7 size_t ZSTD_compressBlock_fast_dictMatch
265 261 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
266 262 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
267 263 ip++;
268 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
264 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
269 265 } else if ( (matchIndex <= prefixStartIndex) ) {
270 266 size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
271 267 U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -285,7 +281,7 size_t ZSTD_compressBlock_fast_dictMatch
285 281 } /* catch up */
286 282 offset_2 = offset_1;
287 283 offset_1 = offset;
288 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
284 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
289 285 }
290 286 } else if (MEM_read32(match) != MEM_read32(ip)) {
291 287 /* it's not a match, and we're not going to check the dictionary */
@@ -300,7 +296,7 size_t ZSTD_compressBlock_fast_dictMatch
300 296 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
301 297 offset_2 = offset_1;
302 298 offset_1 = offset;
303 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
299 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
304 300 }
305 301
306 302 /* match found */
@@ -325,7 +321,7 size_t ZSTD_compressBlock_fast_dictMatch
325 321 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
326 322 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
327 323 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
328 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
324 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
329 325 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
330 326 ip += repLength2;
331 327 anchor = ip;
@@ -348,8 +344,7 size_t ZSTD_compressBlock_fast_dictMatch
348 344 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
349 345 void const* src, size_t srcSize)
350 346 {
351 ZSTD_compressionParameters const* cParams = &ms->cParams;
352 U32 const mls = cParams->minMatch;
347 U32 const mls = ms->cParams.minMatch;
353 348 assert(ms->dictMatchState != NULL);
354 349 switch(mls)
355 350 {
@@ -408,16 +403,17 static size_t ZSTD_compressBlock_fast_ex
408 403 const U32 repIndex = current + 1 - offset_1;
409 404 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
410 405 const BYTE* const repMatch = repBase + repIndex;
411 size_t mLength;
412 406 hashTable[h] = current; /* update hash table */
413 407 assert(offset_1 <= current +1); /* check repIndex */
414 408
415 409 if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
416 410 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
417 411 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
418 mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
412 size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
419 413 ip++;
420 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
414 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
415 ip += rLength;
416 anchor = ip;
421 417 } else {
422 418 if ( (matchIndex < dictStartIndex) ||
423 419 (MEM_read32(match) != MEM_read32(ip)) ) {
@@ -427,19 +423,15 static size_t ZSTD_compressBlock_fast_ex
427 423 }
428 424 { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
429 425 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
430 U32 offset;
431 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
426 U32 const offset = current - matchIndex;
427 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
432 428 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
433 offset = current - matchIndex;
434 offset_2 = offset_1;
435 offset_1 = offset;
436 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
429 offset_2 = offset_1; offset_1 = offset; /* update offset history */
430 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
431 ip += mLength;
432 anchor = ip;
437 433 } }
438 434
439 /* found a match : store it */
440 ip += mLength;
441 anchor = ip;
442
443 435 if (ip <= ilimit) {
444 436 /* Fill Table */
445 437 hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@@ -448,13 +440,13 static size_t ZSTD_compressBlock_fast_ex
448 440 while (ip <= ilimit) {
449 441 U32 const current2 = (U32)(ip-base);
450 442 U32 const repIndex2 = current2 - offset_2;
451 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
443 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
452 444 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
453 445 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
454 446 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
455 447 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
456 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
457 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
448 { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
449 ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
458 450 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
459 451 ip += repLength2;
460 452 anchor = ip;
@@ -476,8 +468,7 size_t ZSTD_compressBlock_fast_extDict(
476 468 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
477 469 void const* src, size_t srcSize)
478 470 {
479 ZSTD_compressionParameters const* cParams = &ms->cParams;
480 U32 const mls = cParams->minMatch;
471 U32 const mls = ms->cParams.minMatch;
481 472 switch(mls)
482 473 {
483 474 default: /* includes case 3 */
@@ -810,7 +810,7 ZSTD_compressBlock_lazy_generic(
810 810 /* store sequence */
811 811 _storeSequence:
812 812 { size_t const litLength = start - anchor;
813 ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
813 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
814 814 anchor = ip = start + matchLength;
815 815 }
816 816
@@ -828,7 +828,7 ZSTD_compressBlock_lazy_generic(
828 828 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
829 829 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
830 830 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
831 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
831 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
832 832 ip += matchLength;
833 833 anchor = ip;
834 834 continue;
@@ -843,7 +843,7 ZSTD_compressBlock_lazy_generic(
843 843 /* store sequence */
844 844 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
845 845 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
846 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
846 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
847 847 ip += matchLength;
848 848 anchor = ip;
849 849 continue; /* faster when present ... (?) */
@@ -1051,7 +1051,7 size_t ZSTD_compressBlock_lazy_extDict_g
1051 1051 /* store sequence */
1052 1052 _storeSequence:
1053 1053 { size_t const litLength = start - anchor;
1054 ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1054 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1055 1055 anchor = ip = start + matchLength;
1056 1056 }
1057 1057
@@ -1066,7 +1066,7 size_t ZSTD_compressBlock_lazy_extDict_g
1066 1066 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1067 1067 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1068 1068 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1069 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1069 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1070 1070 ip += matchLength;
1071 1071 anchor = ip;
1072 1072 continue; /* faster when present ... (?) */
@@ -49,9 +49,9 size_t ZSTD_ldm_getTableSize(ldmParams_t
49 49 {
50 50 size_t const ldmHSize = ((size_t)1) << params.hashLog;
51 51 size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52 size_t const ldmBucketSize =
53 ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54 size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
52 size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
53 size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
54 + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
55 55 return params.enableLdm ? totalSize : 0;
56 56 }
57 57
@@ -583,7 +583,7 size_t ZSTD_ldm_blockCompress(rawSeqStor
583 583 rep[i] = rep[i-1];
584 584 rep[0] = sequence.offset;
585 585 /* Store the sequence */
586 ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
586 ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
587 587 sequence.offset + ZSTD_REP_MOVE,
588 588 sequence.matchLength - MINMATCH);
589 589 ip += sequence.matchLength;
@@ -1098,7 +1098,7 ZSTD_compressBlock_opt_generic(ZSTD_matc
1098 1098
1099 1099 assert(anchor + llen <= iend);
1100 1100 ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1101 ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
1101 ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1102 1102 anchor += advance;
1103 1103 ip = anchor;
1104 1104 } }
@@ -668,7 +668,7 static void ZSTDMT_compressionJob(void*
668 668
669 669 /* init */
670 670 if (job->cdict) {
671 size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
671 size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
672 672 assert(job->firstJob); /* only allowed for first job */
673 673 if (ZSTD_isError(initError)) JOB_ERROR(initError);
674 674 } else { /* srcStart points at reloaded section */
@@ -680,7 +680,7 static void ZSTDMT_compressionJob(void*
680 680 job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
681 681 ZSTD_dtlm_fast,
682 682 NULL, /*cdict*/
683 jobParams, pledgedSrcSize);
683 &jobParams, pledgedSrcSize);
684 684 if (ZSTD_isError(initError)) JOB_ERROR(initError);
685 685 } }
686 686
@@ -927,12 +927,18 static void ZSTDMT_releaseAllJobResource
927 927 unsigned jobID;
928 928 DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
929 929 for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
930 /* Copy the mutex/cond out */
931 ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
932 ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
933
930 934 DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
931 935 ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
932 mtctx->jobs[jobID].dstBuff = g_nullBuffer;
933 mtctx->jobs[jobID].cSize = 0;
936
937 /* Clear the job description, but keep the mutex/cond */
938 memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
939 mtctx->jobs[jobID].job_mutex = mutex;
940 mtctx->jobs[jobID].job_cond = cond;
934 941 }
935 memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
936 942 mtctx->inBuff.buffer = g_nullBuffer;
937 943 mtctx->inBuff.filled = 0;
938 944 mtctx->allJobsCompleted = 1;
@@ -1028,9 +1034,9 size_t ZSTDMT_getMTCtxParameter(ZSTDMT_C
1028 1034
1029 1035 /* Sets parameters relevant to the compression job,
1030 1036 * initializing others to default values. */
1031 static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1037 static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
1032 1038 {
1033 ZSTD_CCtx_params jobParams = params;
1039 ZSTD_CCtx_params jobParams = *params;
1034 1040 /* Clear parameters related to multithreading */
1035 1041 jobParams.forceWindow = 0;
1036 1042 jobParams.nbWorkers = 0;
@@ -1151,16 +1157,16 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt
1151 1157 /* ===== Multi-threaded compression ===== */
1152 1158 /* ------------------------------------------ */
1153 1159
1154 static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1160 static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1155 1161 {
1156 1162 unsigned jobLog;
1157 if (params.ldmParams.enableLdm) {
1163 if (params->ldmParams.enableLdm) {
1158 1164 /* In Long Range Mode, the windowLog is typically oversized.
1159 1165 * In which case, it's preferable to determine the jobSize
1160 1166 * based on chainLog instead. */
1161 jobLog = MAX(21, params.cParams.chainLog + 4);
1167 jobLog = MAX(21, params->cParams.chainLog + 4);
1162 1168 } else {
1163 jobLog = MAX(20, params.cParams.windowLog + 2);
1169 jobLog = MAX(20, params->cParams.windowLog + 2);
1164 1170 }
1165 1171 return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
1166 1172 }
@@ -1193,27 +1199,27 static int ZSTDMT_overlapLog(int ovlog,
1193 1199 return ovlog;
1194 1200 }
1195 1201
1196 static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1202 static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1197 1203 {
1198 int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1199 int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1204 int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1205 int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1200 1206 assert(0 <= overlapRLog && overlapRLog <= 8);
1201 if (params.ldmParams.enableLdm) {
1207 if (params->ldmParams.enableLdm) {
1202 1208 /* In Long Range Mode, the windowLog is typically oversized.
1203 1209 * In which case, it's preferable to determine the jobSize
1204 1210 * based on chainLog instead.
1205 1211 * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1206 ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1212 ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1207 1213 - overlapRLog;
1208 1214 }
1209 1215 assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
1210 DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1216 DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
1211 1217 DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1212 1218 return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1213 1219 }
1214 1220
1215 1221 static unsigned
1216 ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1222 ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
1217 1223 {
1218 1224 assert(nbWorkers>0);
1219 1225 { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@@ -1236,9 +1242,9 static size_t ZSTDMT_compress_advanced_i
1236 1242 const ZSTD_CDict* cdict,
1237 1243 ZSTD_CCtx_params params)
1238 1244 {
1239 ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1240 size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1241 unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1245 ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
1246 size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
1247 unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
1242 1248 size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1243 1249 size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
1244 1250 const char* const srcStart = (const char*)src;
@@ -1256,7 +1262,7 static size_t ZSTDMT_compress_advanced_i
1256 1262 ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
1257 1263 DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
1258 1264 if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
1259 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
1265 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
1260 1266 }
1261 1267
1262 1268 assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@@ -1404,12 +1410,12 size_t ZSTDMT_initCStream_internal(
1404 1410
1405 1411 mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1406 1412 if (mtctx->singleBlockingThread) {
1407 ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
1413 ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
1408 1414 DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1409 1415 assert(singleThreadParams.nbWorkers == 0);
1410 1416 return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
1411 1417 dict, dictSize, cdict,
1412 singleThreadParams, pledgedSrcSize);
1418 &singleThreadParams, pledgedSrcSize);
1413 1419 }
1414 1420
1415 1421 DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@@ -1435,11 +1441,11 size_t ZSTDMT_initCStream_internal(
1435 1441 mtctx->cdict = cdict;
1436 1442 }
1437 1443
1438 mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1444 mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
1439 1445 DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1440 1446 mtctx->targetSectionSize = params.jobSize;
1441 1447 if (mtctx->targetSectionSize == 0) {
1442 mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1448 mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
1443 1449 }
1444 1450 assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
1445 1451
@@ -61,7 +61,9
61 61 * Error Management
62 62 ****************************************************************/
63 63 #define HUF_isError ERR_isError
64 #ifndef CHECK_F
64 65 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
66 #endif
65 67
66 68
67 69 /* **************************************************************
@@ -88,10 +88,7 size_t ZSTD_estimateDCtxSize(void) { ret
88 88
89 89 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
90 90 {
91 size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
92 ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
93 ZSTD_FRAMEHEADERSIZE_PREFIX;
94 ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
91 size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
95 92 /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
96 93 assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
97 94 return startingInputLength;
@@ -376,7 +373,7 unsigned long long ZSTD_findDecompressed
376 373 {
377 374 unsigned long long totalDstSize = 0;
378 375
379 while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
376 while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
380 377 U32 const magicNumber = MEM_readLE32(src);
381 378
382 379 if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -629,11 +626,12 static size_t ZSTD_decompressFrame(ZSTD_
629 626
630 627 /* check */
631 628 RETURN_ERROR_IF(
632 remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
629 remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
633 630 srcSize_wrong);
634 631
635 632 /* Frame Header */
636 { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
633 { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
634 ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
637 635 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
638 636 RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
639 637 srcSize_wrong);
@@ -714,7 +712,7 static size_t ZSTD_decompressMultiFrame(
714 712 dictSize = ZSTD_DDict_dictSize(ddict);
715 713 }
716 714
717 while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
715 while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
718 716
719 717 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
720 718 if (ZSTD_isLegacy(src, srcSize)) {
@@ -1098,7 +1096,7 ZSTD_loadDEntropy(ZSTD_entropyDTables_t*
1098 1096 size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
1099 1097 for (i=0; i<3; i++) {
1100 1098 U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
1101 RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
1099 RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
1102 1100 dictionary_corrupted);
1103 1101 entropy->rep[i] = rep;
1104 1102 } }
@@ -1267,7 +1265,7 size_t ZSTD_DCtx_loadDictionary_advanced
1267 1265 {
1268 1266 RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
1269 1267 ZSTD_clearDict(dctx);
1270 if (dict && dictSize >= 8) {
1268 if (dict && dictSize != 0) {
1271 1269 dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
1272 1270 RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
1273 1271 dctx->ddict = dctx->ddictLocal;
@@ -1300,14 +1298,14 size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dc
1300 1298
1301 1299
1302 1300 /* ZSTD_initDStream_usingDict() :
1303 * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1301 * return : expected size, aka ZSTD_startingInputLength().
1304 1302 * this function cannot fail */
1305 1303 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
1306 1304 {
1307 1305 DEBUGLOG(4, "ZSTD_initDStream_usingDict");
1308 1306 FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
1309 1307 FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
1310 return ZSTD_FRAMEHEADERSIZE_PREFIX;
1308 return ZSTD_startingInputLength(zds->format);
1311 1309 }
1312 1310
1313 1311 /* note : this variant can't fail */
@@ -1324,16 +1322,16 size_t ZSTD_initDStream_usingDDict(ZSTD_
1324 1322 {
1325 1323 FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
1326 1324 FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
1327 return ZSTD_FRAMEHEADERSIZE_PREFIX;
1325 return ZSTD_startingInputLength(dctx->format);
1328 1326 }
1329 1327
1330 1328 /* ZSTD_resetDStream() :
1331 * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1329 * return : expected size, aka ZSTD_startingInputLength().
1332 1330 * this function cannot fail */
1333 1331 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
1334 1332 {
1335 1333 FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
1336 return ZSTD_FRAMEHEADERSIZE_PREFIX;
1334 return ZSTD_startingInputLength(dctx->format);
1337 1335 }
1338 1336
1339 1337
@@ -1564,7 +1562,7 size_t ZSTD_decompressStream(ZSTD_DStrea
1564 1562 zds->lhSize += remainingInput;
1565 1563 }
1566 1564 input->pos = input->size;
1567 return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1565 return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1568 1566 }
1569 1567 assert(ip != NULL);
1570 1568 memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@@ -573,38 +573,118 typedef struct {
573 573 size_t pos;
574 574 } seqState_t;
575 575
576 /*! ZSTD_overlapCopy8() :
577 * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
578 * If the offset is < 8 then the offset is spread to at least 8 bytes.
579 *
580 * Precondition: *ip <= *op
581 * Postcondition: *op - *op >= 8
582 */
583 static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584 assert(*ip <= *op);
585 if (offset < 8) {
586 /* close range match, overlap */
587 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589 int const sub2 = dec64table[offset];
590 (*op)[0] = (*ip)[0];
591 (*op)[1] = (*ip)[1];
592 (*op)[2] = (*ip)[2];
593 (*op)[3] = (*ip)[3];
594 *ip += dec32table[offset];
595 ZSTD_copy4(*op+4, *ip);
596 *ip -= sub2;
597 } else {
598 ZSTD_copy8(*op, *ip);
599 }
600 *ip += 8;
601 *op += 8;
602 assert(*op - *ip >= 8);
603 }
576 604
577 /* ZSTD_execSequenceLast7():
578 * exceptional case : decompress a match starting within last 7 bytes of output buffer.
579 * requires more careful checks, to ensure there is no overflow.
580 * performance does not matter though.
581 * note : this case is supposed to be never generated "naturally" by reference encoder,
582 * since in most cases it needs at least 8 bytes to look for a match.
583 * but it's allowed by the specification. */
605 /*! ZSTD_safecopy() :
606 * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
607 * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
608 * This function is only called in the uncommon case where the sequence is near the end of the block. It
609 * should be fast for a single long sequence, but can be slow for several short sequences.
610 *
611 * @param ovtype controls the overlap detection
612 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
613 * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
614 * The src buffer must be before the dst buffer.
615 */
616 static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617 ptrdiff_t const diff = op - ip;
618 BYTE* const oend = op + length;
619
620 assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621 (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
622
623 if (length < 8) {
624 /* Handle short lengths. */
625 while (op < oend) *op++ = *ip++;
626 return;
627 }
628 if (ovtype == ZSTD_overlap_src_before_dst) {
629 /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
630 assert(length >= 8);
631 ZSTD_overlapCopy8(&op, &ip, diff);
632 assert(op - ip >= 8);
633 assert(op <= oend);
634 }
635
636 if (oend <= oend_w) {
637 /* No risk of overwrite. */
638 ZSTD_wildcopy(op, ip, length, ovtype);
639 return;
640 }
641 if (op <= oend_w) {
642 /* Wildcopy until we get close to the end. */
643 assert(oend > oend_w);
644 ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
645 ip += oend_w - op;
646 op = oend_w;
647 }
648 /* Handle the leftovers. */
649 while (op < oend) *op++ = *ip++;
650 }
651
652 /* ZSTD_execSequenceEnd():
653 * This version handles cases that are near the end of the output buffer. It requires
654 * more careful checks to make sure there is no overflow. By separating out these hard
655 * and unlikely cases, we can speed up the common cases.
656 *
657 * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658 * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
659 */
584 660 FORCE_NOINLINE
585 size_t ZSTD_execSequenceLast7(BYTE* op,
586 BYTE* const oend, seq_t sequence,
587 const BYTE** litPtr, const BYTE* const litLimit,
588 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
661 size_t ZSTD_execSequenceEnd(BYTE* op,
662 BYTE* const oend, seq_t sequence,
663 const BYTE** litPtr, const BYTE* const litLimit,
664 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
589 665 {
590 666 BYTE* const oLitEnd = op + sequence.litLength;
591 667 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
592 668 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
593 669 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
594 670 const BYTE* match = oLitEnd - sequence.offset;
671 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
595 672
596 /* check */
597 RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
673 /* bounds checks */
674 assert(oLitEnd < oMatchEnd);
675 RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
598 676 RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
599 677
600 678 /* copy literals */
601 while (op < oLitEnd) *op++ = *(*litPtr)++;
679 ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
680 op = oLitEnd;
681 *litPtr = iLitEnd;
602 682
603 683 /* copy Match */
604 if (sequence.offset > (size_t)(oLitEnd - base)) {
684 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
605 685 /* offset beyond prefix */
606 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
607 match = dictEnd - (base-match);
686 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
687 match = dictEnd - (prefixStart-match);
608 688 if (match + sequence.matchLength <= dictEnd) {
609 689 memmove(oLitEnd, match, sequence.matchLength);
610 690 return sequenceLength;
@@ -614,13 +694,12 size_t ZSTD_execSequenceLast7(BYTE* op,
614 694 memmove(oLitEnd, match, length1);
615 695 op = oLitEnd + length1;
616 696 sequence.matchLength -= length1;
617 match = base;
697 match = prefixStart;
618 698 } }
619 while (op < oMatchEnd) *op++ = *match++;
699 ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
620 700 return sequenceLength;
621 701 }
622 702
623
624 703 HINT_INLINE
625 704 size_t ZSTD_execSequence(BYTE* op,
626 705 BYTE* const oend, seq_t sequence,
@@ -634,20 +713,29 size_t ZSTD_execSequence(BYTE* op,
634 713 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
635 714 const BYTE* match = oLitEnd - sequence.offset;
636 715
637 /* check */
638 RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
639 RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
640 if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
716 /* Errors and uncommon cases handled here. */
717 assert(oLitEnd < oMatchEnd);
718 if (iLitEnd > litLimit || oMatchEnd > oend_w)
719 return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
720
721 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
722 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
723 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
724 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
641 725
642 /* copy Literals */
643 if (sequence.litLength > 8)
644 ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
645 else
646 ZSTD_copy8(op, *litPtr);
726 /* Copy Literals:
727 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
728 * We likely don't need the full 32-byte wildcopy.
729 */
730 assert(WILDCOPY_OVERLENGTH >= 16);
731 ZSTD_copy16(op, (*litPtr));
732 if (sequence.litLength > 16) {
733 ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
734 }
647 735 op = oLitEnd;
648 736 *litPtr = iLitEnd; /* update for next sequence */
649 737
650 /* copy Match */
738 /* Copy Match */
651 739 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
652 740 /* offset beyond prefix -> go into extDict */
653 741 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@@ -662,123 +750,33 size_t ZSTD_execSequence(BYTE* op,
662 750 op = oLitEnd + length1;
663 751 sequence.matchLength -= length1;
664 752 match = prefixStart;
665 if (op > oend_w || sequence.matchLength < MINMATCH) {
666 U32 i;
667 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
668 return sequenceLength;
669 }
670 753 } }
671 /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
672
673 /* match within prefix */
674 if (sequence.offset < 8) {
675 /* close range match, overlap */
676 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
677 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
678 int const sub2 = dec64table[sequence.offset];
679 op[0] = match[0];
680 op[1] = match[1];
681 op[2] = match[2];
682 op[3] = match[3];
683 match += dec32table[sequence.offset];
684 ZSTD_copy4(op+4, match);
685 match -= sub2;
686 } else {
687 ZSTD_copy8(op, match);
688 }
689 op += 8; match += 8;
690
691 if (oMatchEnd > oend-(16-MINMATCH)) {
692 if (op < oend_w) {
693 ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
694 match += oend_w - op;
695 op = oend_w;
696 }
697 while (op < oMatchEnd) *op++ = *match++;
698 } else {
699 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
700 }
701 return sequenceLength;
702 }
703
704
705 HINT_INLINE
706 size_t ZSTD_execSequenceLong(BYTE* op,
707 BYTE* const oend, seq_t sequence,
708 const BYTE** litPtr, const BYTE* const litLimit,
709 const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
710 {
711 BYTE* const oLitEnd = op + sequence.litLength;
712 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
713 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
714 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
715 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
716 const BYTE* match = sequence.match;
717
718 /* check */
719 RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
720 RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
721 if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
754 /* Match within prefix of 1 or more bytes */
755 assert(op <= oMatchEnd);
756 assert(oMatchEnd <= oend_w);
757 assert(match >= prefixStart);
758 assert(sequence.matchLength >= 1);
722 759
723 /* copy Literals */
724 if (sequence.litLength > 8)
725 ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
726 else
727 ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
728
729 op = oLitEnd;
730 *litPtr = iLitEnd; /* update for next sequence */
760 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
761 * without overlap checking.
762 */
763 if (sequence.offset >= WILDCOPY_VECLEN) {
764 /* We bet on a full wildcopy for matches, since we expect matches to be
765 * longer than literals (in general). In silesia, ~10% of matches are longer
766 * than 16 bytes.
767 */
768 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
769 return sequenceLength;
770 }
771 assert(sequence.offset < WILDCOPY_VECLEN);
731 772
732 /* copy Match */
733 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
734 /* offset beyond prefix */
735 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
736 if (match + sequence.matchLength <= dictEnd) {
737 memmove(oLitEnd, match, sequence.matchLength);
738 return sequenceLength;
739 }
740 /* span extDict & currentPrefixSegment */
741 { size_t const length1 = dictEnd - match;
742 memmove(oLitEnd, match, length1);
743 op = oLitEnd + length1;
744 sequence.matchLength -= length1;
745 match = prefixStart;
746 if (op > oend_w || sequence.matchLength < MINMATCH) {
747 U32 i;
748 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
749 return sequenceLength;
750 }
751 } }
752 assert(op <= oend_w);
753 assert(sequence.matchLength >= MINMATCH);
773 /* Copy 8 bytes and spread the offset to be >= 8. */
774 ZSTD_overlapCopy8(&op, &match, sequence.offset);
754 775
755 /* match within prefix */
756 if (sequence.offset < 8) {
757 /* close range match, overlap */
758 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
759 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
760 int const sub2 = dec64table[sequence.offset];
761 op[0] = match[0];
762 op[1] = match[1];
763 op[2] = match[2];
764 op[3] = match[3];
765 match += dec32table[sequence.offset];
766 ZSTD_copy4(op+4, match);
767 match -= sub2;
768 } else {
769 ZSTD_copy8(op, match);
770 }
771 op += 8; match += 8;
772
773 if (oMatchEnd > oend-(16-MINMATCH)) {
774 if (op < oend_w) {
775 ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
776 match += oend_w - op;
777 op = oend_w;
778 }
779 while (op < oMatchEnd) *op++ = *match++;
780 } else {
781 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
776 /* If the match length is > 8 bytes, then continue with the wildcopy. */
777 if (sequence.matchLength > 8) {
778 assert(op < oMatchEnd);
779 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
782 780 }
783 781 return sequenceLength;
784 782 }
@@ -1098,7 +1096,7 ZSTD_decompressSequencesLong_body(
1098 1096 /* decode and decompress */
1099 1097 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1100 1098 seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1101 size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1099 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1102 1100 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1103 1101 PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1104 1102 sequences[seqNb & STORED_SEQS_MASK] = sequence;
@@ -1109,7 +1107,7 ZSTD_decompressSequencesLong_body(
1109 1107 /* finish queue */
1110 1108 seqNb -= seqAdvance;
1111 1109 for ( ; seqNb<nbSeq ; seqNb++) {
1112 size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1110 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1113 1111 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1114 1112 op += oneSeqSize;
1115 1113 }
@@ -36,16 +36,17 extern "C" {
36 36 *****************************************************************/
37 37 /* Deprecation warnings */
38 38 /* Should these warnings be a problem,
39 it is generally possible to disable them,
40 typically with -Wno-deprecated-declarations for gcc
41 or _CRT_SECURE_NO_WARNINGS in Visual.
42 Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
39 * it is generally possible to disable them,
40 * typically with -Wno-deprecated-declarations for gcc
41 * or _CRT_SECURE_NO_WARNINGS in Visual.
42 * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
43 */
43 44 #ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
44 45 # define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
45 46 #else
46 47 # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
47 48 # define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
48 # elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
49 # elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
49 50 # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
50 51 # elif defined(__GNUC__) && (__GNUC__ >= 3)
51 52 # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
@@ -638,8 +638,8 void COVER_warnOnSmallCorpus(size_t maxD
638 638 "compared to the source size %u! "
639 639 "size(source)/size(dictionary) = %f, but it should be >= "
640 640 "10! This may lead to a subpar dictionary! We recommend "
641 "training on sources at least 10x, and up to 100x the "
642 "size of the dictionary!\n", (U32)maxDictSize,
641 "training on sources at least 10x, and preferably 100x "
642 "the size of the dictionary! \n", (U32)maxDictSize,
643 643 (U32)nbDmers, ratio);
644 644 }
645 645
@@ -571,7 +571,7 static void ZDICT_fillNoise(void* buffer
571 571 unsigned const prime1 = 2654435761U;
572 572 unsigned const prime2 = 2246822519U;
573 573 unsigned acc = prime1;
574 size_t p=0;;
574 size_t p=0;
575 575 for (p=0; p<length; p++) {
576 576 acc *= prime2;
577 577 ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
@@ -15,6 +15,7 extern "C" {
15 15 #define ZSTD_H_235446
16 16
17 17 /* ====== Dependency ======*/
18 #include <limits.h> /* INT_MAX */
18 19 #include <stddef.h> /* size_t */
19 20
20 21
@@ -71,7 +72,7 extern "C" {
71 72 /*------ Version ------*/
72 73 #define ZSTD_VERSION_MAJOR 1
73 74 #define ZSTD_VERSION_MINOR 4
74 #define ZSTD_VERSION_RELEASE 3
75 #define ZSTD_VERSION_RELEASE 4
75 76
76 77 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
77 78 ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
@@ -196,9 +197,13 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(v
196 197 ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
197 198
198 199 /*! ZSTD_compressCCtx() :
199 * Same as ZSTD_compress(), using an explicit ZSTD_CCtx
200 * The function will compress at requested compression level,
201 * ignoring any other parameter */
200 * Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
201 * Important : in order to behave similarly to `ZSTD_compress()`,
202 * this function compresses at requested compression level,
203 * __ignoring any other parameter__ .
204 * If any advanced parameter was set using the advanced API,
205 * they will all be reset. Only `compressionLevel` remains.
206 */
202 207 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
203 208 void* dst, size_t dstCapacity,
204 209 const void* src, size_t srcSize,
@@ -233,7 +238,7 ZSTDLIB_API size_t ZSTD_decompressDCtx(Z
233 238 * using ZSTD_CCtx_set*() functions.
234 239 * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
235 240 * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
236 * They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()
241 * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
237 242 *
238 243 * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
239 244 *
@@ -261,18 +266,26 typedef enum {
261 266
262 267 /* compression parameters
263 268 * Note: When compressing with a ZSTD_CDict these parameters are superseded
264 * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
265 * for more info (superseded-by-cdict). */
266 ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
269 * by the parameters used to construct the ZSTD_CDict.
270 * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
271 ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
272 * Note that exact compression parameters are dynamically determined,
273 * depending on both compression level and srcSize (when known).
267 274 * Default level is ZSTD_CLEVEL_DEFAULT==3.
268 275 * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
269 276 * Note 1 : it's possible to pass a negative compression level.
270 * Note 2 : setting a level sets all default values of other compression parameters */
277 * Note 2 : setting a level resets all other compression parameters to default */
278 /* Advanced compression parameters :
279 * It's possible to pin down compression parameters to some specific values.
280 * In which case, these values are no longer dynamically selected by the compressor */
271 281 ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2.
282 * This will set a memory budget for streaming decompression,
283 * with larger values requiring more memory
284 * and typically compressing more.
272 285 * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
273 286 * Special: value 0 means "use default windowLog".
274 287 * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
275 * requires explicitly allowing such window size at decompression stage if using streaming. */
288 * requires explicitly allowing such size at streaming decompression stage. */
276 289 ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2.
277 290 * Resulting memory usage is (1 << (hashLog+2)).
278 291 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
@@ -283,13 +296,13 typedef enum {
283 296 * Resulting memory usage is (1 << (chainLog+2)).
284 297 * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
285 298 * Larger tables result in better and slower compression.
286 * This parameter is useless when using "fast" strategy.
299 * This parameter is useless for "fast" strategy.
287 300 * It's still useful when using "dfast" strategy,
288 301 * in which case it defines a secondary probe table.
289 302 * Special: value 0 means "use default chainLog". */
290 303 ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2.
291 304 * More attempts result in better and slower compression.
292 * This parameter is useless when using "fast" and "dFast" strategies.
305 * This parameter is useless for "fast" and "dFast" strategies.
293 306 * Special: value 0 means "use default searchLog". */
294 307 ZSTD_c_minMatch=105, /* Minimum size of searched matches.
295 308 * Note that Zstandard can still find matches of smaller size,
@@ -344,7 +357,7 typedef enum {
344 357 ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
345 358 * Content size must be known at the beginning of compression.
346 359 * This is automatically the case when using ZSTD_compress2(),
347 * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
360 * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
348 361 ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
349 362 ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */
350 363
@@ -363,7 +376,7 typedef enum {
363 376 * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
364 377 * 0 means default, which is dynamically determined based on compression parameters.
365 378 * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
366 * The minimum size is automatically and transparently enforced */
379 * The minimum size is automatically and transparently enforced. */
367 380 ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size.
368 381 * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
369 382 * It helps preserve compression ratio, while each job is compressed in parallel.
@@ -386,6 +399,7 typedef enum {
386 399 * ZSTD_c_forceAttachDict
387 400 * ZSTD_c_literalCompressionMode
388 401 * ZSTD_c_targetCBlockSize
402 * ZSTD_c_srcSizeHint
389 403 * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
390 404 * note : never ever use experimentalParam? names directly;
391 405 * also, the enums values themselves are unstable and can still change.
@@ -396,6 +410,7 typedef enum {
396 410 ZSTD_c_experimentalParam4=1001,
397 411 ZSTD_c_experimentalParam5=1002,
398 412 ZSTD_c_experimentalParam6=1003,
413 ZSTD_c_experimentalParam7=1004
399 414 } ZSTD_cParameter;
400 415
401 416 typedef struct {
@@ -793,12 +808,17 ZSTDLIB_API size_t ZSTD_decompress_using
793 808 typedef struct ZSTD_CDict_s ZSTD_CDict;
794 809
795 810 /*! ZSTD_createCDict() :
796 * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
797 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
811 * When compressing multiple messages or blocks using the same dictionary,
812 * it's recommended to digest the dictionary only once, since it's a costly operation.
813 * ZSTD_createCDict() will create a state from digesting a dictionary.
814 * The resulting state can be used for future compression operations with very limited startup cost.
798 815 * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
799 * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
800 * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
801 * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
816 * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
817 * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
818 * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
819 * in which case the only thing that it transports is the @compressionLevel.
820 * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
821 * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
802 822 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
803 823 int compressionLevel);
804 824
@@ -925,7 +945,7 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZS
925 945 * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
926 946 * It's a CPU consuming operation, with non-negligible impact on latency.
927 947 * If there is a need to use the same prefix multiple times, consider loadDictionary instead.
928 * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
948 * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
929 949 * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
930 950 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
931 951 const void* prefix, size_t prefixSize);
@@ -969,7 +989,7 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZS
969 989 * Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
970 990 * Prefix buffer must remain unmodified up to the end of frame,
971 991 * reached when ZSTD_decompressStream() returns 0.
972 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
992 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
973 993 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
974 994 * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
975 995 * A full dictionary is more costly, as it requires building tables.
@@ -1014,8 +1034,8 ZSTDLIB_API size_t ZSTD_sizeof_DDict(con
1014 1034 * Some of them might be removed in the future (especially when redundant with existing stable functions)
1015 1035 * ***************************************************************************************/
1016 1036
1017 #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size required to query frame header size */
1018 #define ZSTD_FRAMEHEADERSIZE_MIN 6
1037 #define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */
1038 #define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2)
1019 1039 #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */
1020 1040 #define ZSTD_SKIPPABLEHEADERSIZE 8
1021 1041
@@ -1063,6 +1083,8 ZSTDLIB_API size_t ZSTD_sizeof_DDict(con
1063 1083 /* Advanced parameter bounds */
1064 1084 #define ZSTD_TARGETCBLOCKSIZE_MIN 64
1065 1085 #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
1086 #define ZSTD_SRCSIZEHINT_MIN 0
1087 #define ZSTD_SRCSIZEHINT_MAX INT_MAX
1066 1088
1067 1089 /* internal */
1068 1090 #define ZSTD_HASHLOG3_MAX 17
@@ -1073,6 +1095,24 ZSTDLIB_API size_t ZSTD_sizeof_DDict(con
1073 1095 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
1074 1096
1075 1097 typedef struct {
1098 unsigned int matchPos; /* Match pos in dst */
1099 /* If seqDef.offset > 3, then this is seqDef.offset - 3
1100 * If seqDef.offset < 3, then this is the corresponding repeat offset
1101 * But if seqDef.offset < 3 and litLength == 0, this is the
1102 * repeat offset before the corresponding repeat offset
1103 * And if seqDef.offset == 3 and litLength == 0, this is the
1104 * most recent repeat offset - 1
1105 */
1106 unsigned int offset;
1107 unsigned int litLength; /* Literal length */
1108 unsigned int matchLength; /* Match length */
1109 /* 0 when seq not rep and seqDef.offset otherwise
1110 * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
1111 */
1112 unsigned int rep;
1113 } ZSTD_Sequence;
1114
1115 typedef struct {
1076 1116 unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
1077 1117 unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
1078 1118 unsigned hashLog; /**< dispatch table : larger == faster, more memory */
@@ -1101,21 +1141,12 typedef enum {
1101 1141
1102 1142 typedef enum {
1103 1143 ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
1104 ZSTD_dlm_byRef = 1, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
1144 ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
1105 1145 } ZSTD_dictLoadMethod_e;
1106 1146
1107 1147 typedef enum {
1108 /* Opened question : should we have a format ZSTD_f_auto ?
1109 * Today, it would mean exactly the same as ZSTD_f_zstd1.
1110 * But, in the future, should several formats become supported,
1111 * on the compression side, it would mean "default format".
1112 * On the decompression side, it would mean "automatic format detection",
1113 * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
1114 * Since meaning is a little different, another option could be to define different enums for compression and decompression.
1115 * This question could be kept for later, when there are actually multiple formats to support,
1116 * but there is also the question of pinning enum values, and pinning value `0` is especially important */
1117 1148 ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */
1118 ZSTD_f_zstd1_magicless = 1, /* Variant of zstd frame format, without initial 4-bytes magic number.
1149 ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number.
1119 1150 * Useful to save 4 bytes per generated frame.
1120 1151 * Decoder cannot recognise automatically this format, requiring this instruction. */
1121 1152 } ZSTD_format_e;
@@ -1126,7 +1157,7 typedef enum {
1126 1157 * to evolve and should be considered only in the context of extremely
1127 1158 * advanced performance tuning.
1128 1159 *
1129 * Zstd currently supports the use of a CDict in two ways:
1160 * Zstd currently supports the use of a CDict in three ways:
1130 1161 *
1131 1162 * - The contents of the CDict can be copied into the working context. This
1132 1163 * means that the compression can search both the dictionary and input
@@ -1142,6 +1173,12 typedef enum {
1142 1173 * working context's tables can be reused). For small inputs, this can be
1143 1174 * faster than copying the CDict's tables.
1144 1175 *
1176 * - The CDict's tables are not used at all, and instead we use the working
1177 * context alone to reload the dictionary and use params based on the source
1178 * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
1179 * This method is effective when the dictionary sizes are very small relative
1180 * to the input size, and the input size is fairly large to begin with.
1181 *
1145 1182 * Zstd has a simple internal heuristic that selects which strategy to use
1146 1183 * at the beginning of a compression. However, if experimentation shows that
1147 1184 * Zstd is making poor choices, it is possible to override that choice with
@@ -1150,6 +1187,7 typedef enum {
1150 1187 ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
1151 1188 ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
1152 1189 ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
1190 ZSTD_dictForceLoad = 3 /* Always reload the dictionary */
1153 1191 } ZSTD_dictAttachPref_e;
1154 1192
1155 1193 typedef enum {
@@ -1158,7 +1196,7 typedef enum {
1158 1196 * levels will be compressed. */
1159 1197 ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be
1160 1198 * emitted if Huffman compression is not profitable. */
1161 ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */
1199 ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */
1162 1200 } ZSTD_literalCompressionMode_e;
1163 1201
1164 1202
@@ -1210,20 +1248,38 ZSTDLIB_API unsigned long long ZSTD_deco
1210 1248 * or an error code (if srcSize is too small) */
1211 1249 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
1212 1250
1251 /*! ZSTD_getSequences() :
1252 * Extract sequences from the sequence store
1253 * zc can be used to insert custom compression params.
1254 * This function invokes ZSTD_compress2
1255 * @return : number of sequences extracted
1256 */
1257 ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
1258 size_t outSeqsSize, const void* src, size_t srcSize);
1259
1213 1260
1214 1261 /***************************************
1215 1262 * Memory management
1216 1263 ***************************************/
1217 1264
1218 1265 /*! ZSTD_estimate*() :
1219 * These functions make it possible to estimate memory usage
1220 * of a future {D,C}Ctx, before its creation.
1221 * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
1222 * It will also consider src size to be arbitrarily "large", which is worst case.
1223 * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
1224 * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
1225 * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
1226 * Note : CCtx size estimation is only correct for single-threaded compression. */
1266 * These functions make it possible to estimate memory usage of a future
1267 * {D,C}Ctx, before its creation.
1268 *
1269 * ZSTD_estimateCCtxSize() will provide a budget large enough for any
1270 * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
1271 * this estimate does not include space for a window buffer, so this estimate
1272 * is guaranteed to be enough for single-shot compressions, but not streaming
1273 * compressions. It will however assume the input may be arbitrarily large,
1274 * which is the worst case. If srcSize is known to always be small,
1275 * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
1276 * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
1277 * ZSTD_getCParams() to create cParams from compressionLevel.
1278 * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
1279 * ZSTD_CCtxParams_setParameter().
1280 *
1281 * Note: only single-threaded compression is supported. This function will
1282 * return an error code if ZSTD_c_nbWorkers is >= 1. */
1227 1283 ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
1228 1284 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
1229 1285 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@@ -1334,7 +1390,8 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict
1334 1390 * Create a digested dictionary for compression
1335 1391 * Dictionary content is just referenced, not duplicated.
1336 1392 * As a consequence, `dictBuffer` **must** outlive CDict,
1337 * and its content must remain unmodified throughout the lifetime of CDict. */
1393 * and its content must remain unmodified throughout the lifetime of CDict.
1394 * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
1338 1395 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
1339 1396
1340 1397 /*! ZSTD_getCParams() :
@@ -1361,7 +1418,9 ZSTDLIB_API size_t ZSTD_checkCParams(ZST
1361 1418 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
1362 1419
1363 1420 /*! ZSTD_compress_advanced() :
1364 * Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure) */
1421 * Note : this function is now DEPRECATED.
1422 * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
1423 * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
1365 1424 ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
1366 1425 void* dst, size_t dstCapacity,
1367 1426 const void* src, size_t srcSize,
@@ -1369,7 +1428,9 ZSTDLIB_API size_t ZSTD_compress_advance
1369 1428 ZSTD_parameters params);
1370 1429
1371 1430 /*! ZSTD_compress_usingCDict_advanced() :
1372 * Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters */
1431 * Note : this function is now REDUNDANT.
1432 * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
1433 * This prototype will be marked as deprecated and generate compilation warning in some future version */
1373 1434 ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
1374 1435 void* dst, size_t dstCapacity,
1375 1436 const void* src, size_t srcSize,
@@ -1441,6 +1502,12 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_a
1441 1502 * There is no guarantee on compressed block size (default:0) */
1442 1503 #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
1443 1504
1505 /* User's best guess of source size.
1506 * Hint is not valid when srcSizeHint == 0.
1507 * There is no guarantee that hint is close to actual source size,
1508 * but compression ratio may regress significantly if guess considerably underestimates */
1509 #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
1510
1444 1511 /*! ZSTD_CCtx_getParameter() :
1445 1512 * Get the requested compression parameter value, selected by enum ZSTD_cParameter,
1446 1513 * and store it into int* value.
@@ -1613,8 +1680,13 ZSTDLIB_API size_t ZSTD_decompressStream
1613 1680 * pledgedSrcSize must be correct. If it is not known at init time, use
1614 1681 * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
1615 1682 * "0" also disables frame content size field. It may be enabled in the future.
1683 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1616 1684 */
1617 ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
1685 ZSTDLIB_API size_t
1686 ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
1687 int compressionLevel,
1688 unsigned long long pledgedSrcSize);
1689
1618 1690 /**! ZSTD_initCStream_usingDict() :
1619 1691 * This function is deprecated, and is equivalent to:
1620 1692 * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@@ -1623,42 +1695,66 ZSTDLIB_API size_t ZSTD_initCStream_srcS
1623 1695 *
1624 1696 * Creates of an internal CDict (incompatible with static CCtx), except if
1625 1697 * dict == NULL or dictSize < 8, in which case no dict is used.
1626 * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if
1698 * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
1627 1699 * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
1700 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1628 1701 */
1629 ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
1702 ZSTDLIB_API size_t
1703 ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
1704 const void* dict, size_t dictSize,
1705 int compressionLevel);
1706
1630 1707 /**! ZSTD_initCStream_advanced() :
1631 1708 * This function is deprecated, and is approximately equivalent to:
1632 1709 * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
1633 * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
1710 * // Pseudocode: Set each zstd parameter and leave the rest as-is.
1711 * for ((param, value) : params) {
1712 * ZSTD_CCtx_setParameter(zcs, param, value);
1713 * }
1634 1714 * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
1635 1715 * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
1636 1716 *
1637 * pledgedSrcSize must be correct. If srcSize is not known at init time, use
1638 * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy.
1717 * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
1718 * pledgedSrcSize must be correct.
1719 * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
1720 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1639 1721 */
1640 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
1641 ZSTD_parameters params, unsigned long long pledgedSrcSize);
1722 ZSTDLIB_API size_t
1723 ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
1724 const void* dict, size_t dictSize,
1725 ZSTD_parameters params,
1726 unsigned long long pledgedSrcSize);
1727
1642 1728 /**! ZSTD_initCStream_usingCDict() :
1643 1729 * This function is deprecated, and equivalent to:
1644 1730 * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
1645 1731 * ZSTD_CCtx_refCDict(zcs, cdict);
1646 1732 *
1647 1733 * note : cdict will just be referenced, and must outlive compression session
1734 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1648 1735 */
1649 1736 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
1737
1650 1738 /**! ZSTD_initCStream_usingCDict_advanced() :
1651 * This function is deprecated, and is approximately equivalent to:
1739 * This function is DEPRECATED, and is approximately equivalent to:
1652 1740 * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
1653 * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
1741 * // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
1742 * for ((fParam, value) : fParams) {
1743 * ZSTD_CCtx_setParameter(zcs, fParam, value);
1744 * }
1654 1745 * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
1655 1746 * ZSTD_CCtx_refCDict(zcs, cdict);
1656 1747 *
1657 1748 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
1658 1749 * pledgedSrcSize must be correct. If srcSize is not known at init time, use
1659 1750 * value ZSTD_CONTENTSIZE_UNKNOWN.
1751 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1660 1752 */
1661 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);
1753 ZSTDLIB_API size_t
1754 ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
1755 const ZSTD_CDict* cdict,
1756 ZSTD_frameParameters fParams,
1757 unsigned long long pledgedSrcSize);
1662 1758
1663 1759 /*! ZSTD_resetCStream() :
1664 1760 * This function is deprecated, and is equivalent to:
@@ -1673,6 +1769,7 ZSTDLIB_API size_t ZSTD_initCStream_usin
1673 1769 * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
1674 1770 * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
1675 1771 * @return : 0, or an error code (which can be tested using ZSTD_isError())
1772 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1676 1773 */
1677 1774 ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
1678 1775
@@ -1718,8 +1815,10 ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_
1718 1815 * ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
1719 1816 *
1720 1817 * note: no dictionary will be used if dict == NULL or dictSize < 8
1818 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1721 1819 */
1722 1820 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
1821
1723 1822 /**
1724 1823 * This function is deprecated, and is equivalent to:
1725 1824 *
@@ -1727,14 +1826,17 ZSTDLIB_API size_t ZSTD_initDStream_usin
1727 1826 * ZSTD_DCtx_refDDict(zds, ddict);
1728 1827 *
1729 1828 * note : ddict is referenced, it must outlive decompression session
1829 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1730 1830 */
1731 1831 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
1832
1732 1833 /**
1733 1834 * This function is deprecated, and is equivalent to:
1734 1835 *
1735 1836 * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
1736 1837 *
1737 1838 * re-use decompression parameters from previous init; saves dictionary loading
1839 * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
1738 1840 */
1739 1841 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
1740 1842
@@ -1908,7 +2010,7 ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne
1908 2010
1909 2011 /*!
1910 2012 Block functions produce and decode raw zstd blocks, without frame metadata.
1911 Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
2013 Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
1912 2014 But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
1913 2015
1914 2016 A few rules to respect :
General Comments 0
You need to be logged in to leave comments. Login now