##// END OF EJS Templates
zstd: vendor python-zstandard 0.7.0...
Gregory Szorc -
r30895:c32454d6 default
parent child Browse files
Show More
@@ -0,0 +1,132 b''
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(FrameParameters__doc__,
14 "FrameParameters: information about a zstd frame");
15
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
17 const char* source;
18 Py_ssize_t sourceSize;
19 ZSTD_frameParams params;
20 FrameParametersObject* result = NULL;
21 size_t zresult;
22
23 #if PY_MAJOR_VERSION >= 3
24 if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
25 #else
26 if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
27 #endif
28 &source, &sourceSize)) {
29 return NULL;
30 }
31
32 /* Needed for Python 2 to reject unicode */
33 if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
34 PyErr_SetString(PyExc_TypeError, "argument must be bytes");
35 return NULL;
36 }
37
38 zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
39
40 if (ZSTD_isError(zresult)) {
41 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
42 return NULL;
43 }
44
45 if (zresult) {
46 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
47 return NULL;
48 }
49
50 result = PyObject_New(FrameParametersObject, &FrameParametersType);
51 if (!result) {
52 return NULL;
53 }
54
55 result->frameContentSize = params.frameContentSize;
56 result->windowSize = params.windowSize;
57 result->dictID = params.dictID;
58 result->checksumFlag = params.checksumFlag ? 1 : 0;
59
60 return result;
61 }
62
63 static void FrameParameters_dealloc(PyObject* self) {
64 PyObject_Del(self);
65 }
66
67 static PyMemberDef FrameParameters_members[] = {
68 { "content_size", T_ULONGLONG,
69 offsetof(FrameParametersObject, frameContentSize), READONLY,
70 "frame content size" },
71 { "window_size", T_UINT,
72 offsetof(FrameParametersObject, windowSize), READONLY,
73 "window size" },
74 { "dict_id", T_UINT,
75 offsetof(FrameParametersObject, dictID), READONLY,
76 "dictionary ID" },
77 { "has_checksum", T_BOOL,
78 offsetof(FrameParametersObject, checksumFlag), READONLY,
79 "checksum flag" },
80 { NULL }
81 };
82
83 PyTypeObject FrameParametersType = {
84 PyVarObject_HEAD_INIT(NULL, 0)
85 "FrameParameters", /* tp_name */
86 sizeof(FrameParametersObject), /* tp_basicsize */
87 0, /* tp_itemsize */
88 (destructor)FrameParameters_dealloc, /* tp_dealloc */
89 0, /* tp_print */
90 0, /* tp_getattr */
91 0, /* tp_setattr */
92 0, /* tp_compare */
93 0, /* tp_repr */
94 0, /* tp_as_number */
95 0, /* tp_as_sequence */
96 0, /* tp_as_mapping */
97 0, /* tp_hash */
98 0, /* tp_call */
99 0, /* tp_str */
100 0, /* tp_getattro */
101 0, /* tp_setattro */
102 0, /* tp_as_buffer */
103 Py_TPFLAGS_DEFAULT, /* tp_flags */
104 FrameParameters__doc__, /* tp_doc */
105 0, /* tp_traverse */
106 0, /* tp_clear */
107 0, /* tp_richcompare */
108 0, /* tp_weaklistoffset */
109 0, /* tp_iter */
110 0, /* tp_iternext */
111 0, /* tp_methods */
112 FrameParameters_members, /* tp_members */
113 0, /* tp_getset */
114 0, /* tp_base */
115 0, /* tp_dict */
116 0, /* tp_descr_get */
117 0, /* tp_descr_set */
118 0, /* tp_dictoffset */
119 0, /* tp_init */
120 0, /* tp_alloc */
121 0, /* tp_new */
122 };
123
124 void frameparams_module_init(PyObject* mod) {
125 Py_TYPE(&FrameParametersType) = &PyType_Type;
126 if (PyType_Ready(&FrameParametersType) < 0) {
127 return;
128 }
129
130 Py_IncRef((PyObject*)&FrameParametersType);
131 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
132 }
@@ -0,0 +1,194 b''
1 /**
2 * Copyright (c) 2016-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /* ====== Dependencies ======= */
12 #include <stddef.h> /* size_t */
13 #include <stdlib.h> /* malloc, calloc, free */
14 #include "pool.h"
15
16 /* ====== Compiler specifics ====== */
17 #if defined(_MSC_VER)
18 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
19 #endif
20
21
22 #ifdef ZSTD_MULTITHREAD
23
24 #include "threading.h" /* pthread adaptation */
25
26 /* A job is a function and an opaque argument */
27 typedef struct POOL_job_s {
28 POOL_function function;
29 void *opaque;
30 } POOL_job;
31
32 struct POOL_ctx_s {
33 /* Keep track of the threads */
34 pthread_t *threads;
35 size_t numThreads;
36
37 /* The queue is a circular buffer */
38 POOL_job *queue;
39 size_t queueHead;
40 size_t queueTail;
41 size_t queueSize;
42 /* The mutex protects the queue */
43 pthread_mutex_t queueMutex;
44 /* Condition variable for pushers to wait on when the queue is full */
45 pthread_cond_t queuePushCond;
46 /* Condition variables for poppers to wait on when the queue is empty */
47 pthread_cond_t queuePopCond;
48 /* Indicates if the queue is shutting down */
49 int shutdown;
50 };
51
52 /* POOL_thread() :
53 Work thread for the thread pool.
54 Waits for jobs and executes them.
55 @returns : NULL on failure else non-null.
56 */
57 static void* POOL_thread(void* opaque) {
58 POOL_ctx* const ctx = (POOL_ctx*)opaque;
59 if (!ctx) { return NULL; }
60 for (;;) {
61 /* Lock the mutex and wait for a non-empty queue or until shutdown */
62 pthread_mutex_lock(&ctx->queueMutex);
63 while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
64 pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
65 }
66 /* empty => shutting down: so stop */
67 if (ctx->queueHead == ctx->queueTail) {
68 pthread_mutex_unlock(&ctx->queueMutex);
69 return opaque;
70 }
71 /* Pop a job off the queue */
72 { POOL_job const job = ctx->queue[ctx->queueHead];
73 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
74 /* Unlock the mutex, signal a pusher, and run the job */
75 pthread_mutex_unlock(&ctx->queueMutex);
76 pthread_cond_signal(&ctx->queuePushCond);
77 job.function(job.opaque);
78 }
79 }
80 /* Unreachable */
81 }
82
83 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
84 POOL_ctx *ctx;
85 /* Check the parameters */
86 if (!numThreads || !queueSize) { return NULL; }
87 /* Allocate the context and zero initialize */
88 ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
89 if (!ctx) { return NULL; }
90 /* Initialize the job queue.
91 * It needs one extra space since one space is wasted to differentiate empty
92 * and full queues.
93 */
94 ctx->queueSize = queueSize + 1;
95 ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
96 ctx->queueHead = 0;
97 ctx->queueTail = 0;
98 pthread_mutex_init(&ctx->queueMutex, NULL);
99 pthread_cond_init(&ctx->queuePushCond, NULL);
100 pthread_cond_init(&ctx->queuePopCond, NULL);
101 ctx->shutdown = 0;
102 /* Allocate space for the thread handles */
103 ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
104 ctx->numThreads = 0;
105 /* Check for errors */
106 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
107 /* Initialize the threads */
108 { size_t i;
109 for (i = 0; i < numThreads; ++i) {
110 if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
111 ctx->numThreads = i;
112 POOL_free(ctx);
113 return NULL;
114 } }
115 ctx->numThreads = numThreads;
116 }
117 return ctx;
118 }
119
120 /*! POOL_join() :
121 Shutdown the queue, wake any sleeping threads, and join all of the threads.
122 */
123 static void POOL_join(POOL_ctx *ctx) {
124 /* Shut down the queue */
125 pthread_mutex_lock(&ctx->queueMutex);
126 ctx->shutdown = 1;
127 pthread_mutex_unlock(&ctx->queueMutex);
128 /* Wake up sleeping threads */
129 pthread_cond_broadcast(&ctx->queuePushCond);
130 pthread_cond_broadcast(&ctx->queuePopCond);
131 /* Join all of the threads */
132 { size_t i;
133 for (i = 0; i < ctx->numThreads; ++i) {
134 pthread_join(ctx->threads[i], NULL);
135 } }
136 }
137
138 void POOL_free(POOL_ctx *ctx) {
139 if (!ctx) { return; }
140 POOL_join(ctx);
141 pthread_mutex_destroy(&ctx->queueMutex);
142 pthread_cond_destroy(&ctx->queuePushCond);
143 pthread_cond_destroy(&ctx->queuePopCond);
144 if (ctx->queue) free(ctx->queue);
145 if (ctx->threads) free(ctx->threads);
146 free(ctx);
147 }
148
149 void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
150 POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
151 if (!ctx) { return; }
152
153 pthread_mutex_lock(&ctx->queueMutex);
154 { POOL_job const job = {function, opaque};
155 /* Wait until there is space in the queue for the new job */
156 size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
157 while (ctx->queueHead == newTail && !ctx->shutdown) {
158 pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
159 newTail = (ctx->queueTail + 1) % ctx->queueSize;
160 }
161 /* The queue is still going => there is space */
162 if (!ctx->shutdown) {
163 ctx->queue[ctx->queueTail] = job;
164 ctx->queueTail = newTail;
165 }
166 }
167 pthread_mutex_unlock(&ctx->queueMutex);
168 pthread_cond_signal(&ctx->queuePopCond);
169 }
170
171 #else /* ZSTD_MULTITHREAD not defined */
172 /* No multi-threading support */
173
174 /* We don't need any data, but if it is empty malloc() might return NULL. */
175 struct POOL_ctx_s {
176 int data;
177 };
178
179 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
180 (void)numThreads;
181 (void)queueSize;
182 return (POOL_ctx *)malloc(sizeof(POOL_ctx));
183 }
184
185 void POOL_free(POOL_ctx *ctx) {
186 if (ctx) free(ctx);
187 }
188
189 void POOL_add(void *ctx, POOL_function function, void *opaque) {
190 (void)ctx;
191 function(opaque);
192 }
193
194 #endif /* ZSTD_MULTITHREAD */
@@ -0,0 +1,56 b''
1 /**
2 * Copyright (c) 2016-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9 #ifndef POOL_H
10 #define POOL_H
11
12 #if defined (__cplusplus)
13 extern "C" {
14 #endif
15
16
17 #include <stddef.h> /* size_t */
18
19 typedef struct POOL_ctx_s POOL_ctx;
20
21 /*! POOL_create() :
22 Create a thread pool with at most `numThreads` threads.
23 `numThreads` must be at least 1.
24 The maximum number of queued jobs before blocking is `queueSize`.
25 `queueSize` must be at least 1.
26 @return : The POOL_ctx pointer on success else NULL.
27 */
28 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
29
30 /*! POOL_free() :
31 Free a thread pool returned by POOL_create().
32 */
33 void POOL_free(POOL_ctx *ctx);
34
35 /*! POOL_function :
36 The function type that can be added to a thread pool.
37 */
38 typedef void (*POOL_function)(void *);
39 /*! POOL_add_function :
40 The function type for a generic thread pool add function.
41 */
42 typedef void (*POOL_add_function)(void *, POOL_function, void *);
43
44 /*! POOL_add() :
45 Add the job `function(opaque)` to the thread pool.
46 Possibly blocks until there is room in the queue.
47 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
48 */
49 void POOL_add(void *ctx, POOL_function function, void *opaque);
50
51
52 #if defined (__cplusplus)
53 }
54 #endif
55
56 #endif
@@ -0,0 +1,79 b''
1
2 /**
3 * Copyright (c) 2016 Tino Reichardt
4 * All rights reserved.
5 *
6 * This source code is licensed under the BSD-style license found in the
7 * LICENSE file in the root directory of this source tree. An additional grant
8 * of patent rights can be found in the PATENTS file in the same directory.
9 *
10 * You can contact the author at:
11 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
12 */
13
14 /**
15 * This file will hold wrapper for systems, which do not support pthreads
16 */
17
18 /* ====== Compiler specifics ====== */
19 #if defined(_MSC_VER)
20 # pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */
21 #endif
22
23
24 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
25
26 /**
27 * Windows minimalist Pthread Wrapper, based on :
28 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
29 */
30
31
32 /* === Dependencies === */
33 #include <process.h>
34 #include <errno.h>
35 #include "threading.h"
36
37
38 /* === Implementation === */
39
40 static unsigned __stdcall worker(void *arg)
41 {
42 pthread_t* const thread = (pthread_t*) arg;
43 thread->arg = thread->start_routine(thread->arg);
44 return 0;
45 }
46
47 int pthread_create(pthread_t* thread, const void* unused,
48 void* (*start_routine) (void*), void* arg)
49 {
50 (void)unused;
51 thread->arg = arg;
52 thread->start_routine = start_routine;
53 thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
54
55 if (!thread->handle)
56 return errno;
57 else
58 return 0;
59 }
60
61 int _pthread_join(pthread_t * thread, void **value_ptr)
62 {
63 DWORD result;
64
65 if (!thread->handle) return 0;
66
67 result = WaitForSingleObject(thread->handle, INFINITE);
68 switch (result) {
69 case WAIT_OBJECT_0:
70 if (value_ptr) *value_ptr = thread->arg;
71 return 0;
72 case WAIT_ABANDONED:
73 return EINVAL;
74 default:
75 return GetLastError();
76 }
77 }
78
79 #endif /* ZSTD_MULTITHREAD */
@@ -0,0 +1,104 b''
1
2 /**
3 * Copyright (c) 2016 Tino Reichardt
4 * All rights reserved.
5 *
6 * This source code is licensed under the BSD-style license found in the
7 * LICENSE file in the root directory of this source tree. An additional grant
8 * of patent rights can be found in the PATENTS file in the same directory.
9 *
10 * You can contact the author at:
11 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
12 */
13
14 #ifndef THREADING_H_938743
15 #define THREADING_H_938743
16
17 #if defined (__cplusplus)
18 extern "C" {
19 #endif
20
21 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
22
23 /**
24 * Windows minimalist Pthread Wrapper, based on :
25 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
26 */
27 #ifdef WINVER
28 # undef WINVER
29 #endif
30 #define WINVER 0x0600
31
32 #ifdef _WIN32_WINNT
33 # undef _WIN32_WINNT
34 #endif
35 #define _WIN32_WINNT 0x0600
36
37 #ifndef WIN32_LEAN_AND_MEAN
38 # define WIN32_LEAN_AND_MEAN
39 #endif
40
41 #include <windows.h>
42
43 /* mutex */
44 #define pthread_mutex_t CRITICAL_SECTION
45 #define pthread_mutex_init(a,b) InitializeCriticalSection((a))
46 #define pthread_mutex_destroy(a) DeleteCriticalSection((a))
47 #define pthread_mutex_lock(a) EnterCriticalSection((a))
48 #define pthread_mutex_unlock(a) LeaveCriticalSection((a))
49
50 /* condition variable */
51 #define pthread_cond_t CONDITION_VARIABLE
52 #define pthread_cond_init(a, b) InitializeConditionVariable((a))
53 #define pthread_cond_destroy(a) /* No delete */
54 #define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
55 #define pthread_cond_signal(a) WakeConditionVariable((a))
56 #define pthread_cond_broadcast(a) WakeAllConditionVariable((a))
57
58 /* pthread_create() and pthread_join() */
59 typedef struct {
60 HANDLE handle;
61 void* (*start_routine)(void*);
62 void* arg;
63 } pthread_t;
64
65 int pthread_create(pthread_t* thread, const void* unused,
66 void* (*start_routine) (void*), void* arg);
67
68 #define pthread_join(a, b) _pthread_join(&(a), (b))
69 int _pthread_join(pthread_t* thread, void** value_ptr);
70
71 /**
72 * add here more wrappers as required
73 */
74
75
76 #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection mathod */
77 /* === POSIX Systems === */
78 # include <pthread.h>
79
80 #else /* ZSTD_MULTITHREAD not defined */
81 /* No multithreading support */
82
83 #define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
84 #define pthread_mutex_init(a,b)
85 #define pthread_mutex_destroy(a)
86 #define pthread_mutex_lock(a)
87 #define pthread_mutex_unlock(a)
88
89 #define pthread_cond_t int
90 #define pthread_cond_init(a,b)
91 #define pthread_cond_destroy(a)
92 #define pthread_cond_wait(a,b)
93 #define pthread_cond_signal(a)
94 #define pthread_cond_broadcast(a)
95
96 /* do not use pthread_t */
97
98 #endif /* ZSTD_MULTITHREAD */
99
100 #if defined (__cplusplus)
101 }
102 #endif
103
104 #endif /* THREADING_H_938743 */
This diff has been collapsed as it changes many lines, (740 lines changed) Show them Hide them
@@ -0,0 +1,740 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /* ====== Tuning parameters ====== */
12 #define ZSTDMT_NBTHREADS_MAX 128
13
14
15 /* ====== Compiler specifics ====== */
16 #if defined(_MSC_VER)
17 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
18 #endif
19
20
21 /* ====== Dependencies ====== */
22 #include <stdlib.h> /* malloc */
23 #include <string.h> /* memcpy */
24 #include "pool.h" /* threadpool */
25 #include "threading.h" /* mutex */
26 #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27 #include "zstdmt_compress.h"
28 #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
29 #include "xxhash.h"
30
31
32 /* ====== Debug ====== */
33 #if 0
34
35 # include <stdio.h>
36 # include <unistd.h>
37 # include <sys/times.h>
38 static unsigned g_debugLevel = 3;
39 # define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
40 # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
41
42 # define DEBUG_PRINTHEX(l,p,n) { \
43 unsigned debug_u; \
44 for (debug_u=0; debug_u<(n); debug_u++) \
45 DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
46 DEBUGLOGRAW(l, " \n"); \
47 }
48
49 static unsigned long long GetCurrentClockTimeMicroseconds()
50 {
51 static clock_t _ticksPerSecond = 0;
52 if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
53
54 struct tms junk; clock_t newTicks = (clock_t) times(&junk);
55 return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
56 }
57
58 #define MUTEX_WAIT_TIME_DLEVEL 5
59 #define PTHREAD_MUTEX_LOCK(mutex) \
60 if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
61 unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \
62 pthread_mutex_lock(mutex); \
63 unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
64 unsigned long long elapsedTime = (afterTime-beforeTime); \
65 if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
66 DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
67 elapsedTime, #mutex); \
68 } \
69 } else pthread_mutex_lock(mutex);
70
71 #else
72
73 # define DEBUGLOG(l, ...) {} /* disabled */
74 # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
75 # define DEBUG_PRINTHEX(l,p,n) {}
76
77 #endif
78
79
80 /* ===== Buffer Pool ===== */
81
82 typedef struct buffer_s {
83 void* start;
84 size_t size;
85 } buffer_t;
86
87 static const buffer_t g_nullBuffer = { NULL, 0 };
88
89 typedef struct ZSTDMT_bufferPool_s {
90 unsigned totalBuffers;
91 unsigned nbBuffers;
92 buffer_t bTable[1]; /* variable size */
93 } ZSTDMT_bufferPool;
94
95 static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
96 {
97 unsigned const maxNbBuffers = 2*nbThreads + 2;
98 ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
99 if (bufPool==NULL) return NULL;
100 bufPool->totalBuffers = maxNbBuffers;
101 bufPool->nbBuffers = 0;
102 return bufPool;
103 }
104
105 static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
106 {
107 unsigned u;
108 if (!bufPool) return; /* compatibility with free on NULL */
109 for (u=0; u<bufPool->totalBuffers; u++)
110 free(bufPool->bTable[u].start);
111 free(bufPool);
112 }
113
114 /* assumption : invocation from main thread only ! */
115 static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
116 {
117 if (pool->nbBuffers) { /* try to use an existing buffer */
118 buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
119 size_t const availBufferSize = buf.size;
120 if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */
121 return buf;
122 free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
123 }
124 /* create new buffer */
125 { buffer_t buffer;
126 void* const start = malloc(bSize);
127 if (start==NULL) bSize = 0;
128 buffer.start = start; /* note : start can be NULL if malloc fails ! */
129 buffer.size = bSize;
130 return buffer;
131 }
132 }
133
134 /* store buffer for later re-use, up to pool capacity */
135 static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
136 {
137 if (buf.start == NULL) return; /* release on NULL */
138 if (pool->nbBuffers < pool->totalBuffers) {
139 pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
140 return;
141 }
142 /* Reached bufferPool capacity (should not happen) */
143 free(buf.start);
144 }
145
146
147 /* ===== CCtx Pool ===== */
148
149 typedef struct {
150 unsigned totalCCtx;
151 unsigned availCCtx;
152 ZSTD_CCtx* cctx[1]; /* variable size */
153 } ZSTDMT_CCtxPool;
154
155 /* assumption : CCtxPool invocation only from main thread */
156
157 /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
158 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
159 {
160 unsigned u;
161 for (u=0; u<pool->totalCCtx; u++)
162 ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
163 free(pool);
164 }
165
166 /* ZSTDMT_createCCtxPool() :
167 * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
168 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
169 {
170 ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
171 if (!cctxPool) return NULL;
172 cctxPool->totalCCtx = nbThreads;
173 cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
174 cctxPool->cctx[0] = ZSTD_createCCtx();
175 if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
176 DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
177 return cctxPool;
178 }
179
180 static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
181 {
182 if (pool->availCCtx) {
183 pool->availCCtx--;
184 return pool->cctx[pool->availCCtx];
185 }
186 return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
187 }
188
189 static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
190 {
191 if (cctx==NULL) return; /* compatibility with release on NULL */
192 if (pool->availCCtx < pool->totalCCtx)
193 pool->cctx[pool->availCCtx++] = cctx;
194 else
195 /* pool overflow : should not happen, since totalCCtx==nbThreads */
196 ZSTD_freeCCtx(cctx);
197 }
198
199
200 /* ===== Thread worker ===== */
201
202 typedef struct {
203 buffer_t buffer;
204 size_t filled;
205 } inBuff_t;
206
207 typedef struct {
208 ZSTD_CCtx* cctx;
209 buffer_t src;
210 const void* srcStart;
211 size_t srcSize;
212 size_t dictSize;
213 buffer_t dstBuff;
214 size_t cSize;
215 size_t dstFlushed;
216 unsigned firstChunk;
217 unsigned lastChunk;
218 unsigned jobCompleted;
219 unsigned jobScanned;
220 pthread_mutex_t* jobCompleted_mutex;
221 pthread_cond_t* jobCompleted_cond;
222 ZSTD_parameters params;
223 ZSTD_CDict* cdict;
224 unsigned long long fullFrameSize;
225 } ZSTDMT_jobDescription;
226
227 /* ZSTDMT_compressChunk() : POOL_function type */
228 void ZSTDMT_compressChunk(void* jobDescription)
229 {
230 ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
231 const void* const src = (const char*)job->srcStart + job->dictSize;
232 buffer_t const dstBuff = job->dstBuff;
233 DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
234 if (job->cdict) {
235 size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
236 if (job->cdict) DEBUGLOG(3, "using CDict ");
237 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
238 } else {
239 size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
240 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
241 ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
242 }
243 if (!job->firstChunk) { /* flush frame header */
244 size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
245 if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
246 ZSTD_invalidateRepCodes(job->cctx);
247 }
248
249 DEBUGLOG(4, "Compressing : ");
250 DEBUG_PRINTHEX(4, job->srcStart, 12);
251 job->cSize = (job->lastChunk) ? /* last chunk signal */
252 ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
253 ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
254 DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
255
256 _endJob:
257 PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
258 job->jobCompleted = 1;
259 job->jobScanned = 0;
260 pthread_cond_signal(job->jobCompleted_cond);
261 pthread_mutex_unlock(job->jobCompleted_mutex);
262 }
263
264
265 /* ------------------------------------------ */
266 /* ===== Multi-threaded compression ===== */
267 /* ------------------------------------------ */
268
269 struct ZSTDMT_CCtx_s {
270 POOL_ctx* factory;
271 ZSTDMT_bufferPool* buffPool;
272 ZSTDMT_CCtxPool* cctxPool;
273 pthread_mutex_t jobCompleted_mutex;
274 pthread_cond_t jobCompleted_cond;
275 size_t targetSectionSize;
276 size_t marginSize;
277 size_t inBuffSize;
278 size_t dictSize;
279 size_t targetDictSize;
280 inBuff_t inBuff;
281 ZSTD_parameters params;
282 XXH64_state_t xxhState;
283 unsigned nbThreads;
284 unsigned jobIDMask;
285 unsigned doneJobID;
286 unsigned nextJobID;
287 unsigned frameEnded;
288 unsigned allJobsCompleted;
289 unsigned overlapRLog;
290 unsigned long long frameContentSize;
291 size_t sectionSize;
292 ZSTD_CDict* cdict;
293 ZSTD_CStream* cstream;
294 ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
295 };
296
297 ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
298 {
299 ZSTDMT_CCtx* cctx;
300 U32 const minNbJobs = nbThreads + 2;
301 U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
302 U32 const nbJobs = 1 << nbJobsLog2;
303 DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
304 nbThreads, minNbJobs, nbJobsLog2, nbJobs);
305 if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
306 cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
307 if (!cctx) return NULL;
308 cctx->nbThreads = nbThreads;
309 cctx->jobIDMask = nbJobs - 1;
310 cctx->allJobsCompleted = 1;
311 cctx->sectionSize = 0;
312 cctx->overlapRLog = 3;
313 cctx->factory = POOL_create(nbThreads, 1);
314 cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
315 cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
316 if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */
317 ZSTDMT_freeCCtx(cctx);
318 return NULL;
319 }
320 if (nbThreads==1) {
321 cctx->cstream = ZSTD_createCStream();
322 if (!cctx->cstream) {
323 ZSTDMT_freeCCtx(cctx); return NULL;
324 } }
325 pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
326 pthread_cond_init(&cctx->jobCompleted_cond, NULL);
327 DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
328 return cctx;
329 }
330
331 /* ZSTDMT_releaseAllJobResources() :
332 * Ensure all workers are killed first. */
333 static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
334 {
335 unsigned jobID;
336 for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
337 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
338 mtctx->jobs[jobID].dstBuff = g_nullBuffer;
339 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
340 mtctx->jobs[jobID].src = g_nullBuffer;
341 ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
342 mtctx->jobs[jobID].cctx = NULL;
343 }
344 memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
345 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
346 mtctx->inBuff.buffer = g_nullBuffer;
347 mtctx->allJobsCompleted = 1;
348 }
349
350 size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
351 {
352 if (mtctx==NULL) return 0; /* compatible with free on NULL */
353 POOL_free(mtctx->factory);
354 if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
355 ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
356 ZSTDMT_freeCCtxPool(mtctx->cctxPool);
357 ZSTD_freeCDict(mtctx->cdict);
358 ZSTD_freeCStream(mtctx->cstream);
359 pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
360 pthread_cond_destroy(&mtctx->jobCompleted_cond);
361 free(mtctx);
362 return 0;
363 }
364
365 size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
366 {
367 switch(parameter)
368 {
369 case ZSTDMT_p_sectionSize :
370 mtctx->sectionSize = value;
371 return 0;
372 case ZSTDMT_p_overlapSectionLog :
373 DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
374 mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
375 return 0;
376 default :
377 return ERROR(compressionParameter_unsupported);
378 }
379 }
380
381
382 /* ------------------------------------------ */
383 /* ===== Multi-threaded compression ===== */
384 /* ------------------------------------------ */
385
386 size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
387 void* dst, size_t dstCapacity,
388 const void* src, size_t srcSize,
389 int compressionLevel)
390 {
391 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
392 size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
393 unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */;
394 unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
395 size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
396 size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
397 size_t remainingSrcSize = srcSize;
398 const char* const srcStart = (const char*)src;
399 size_t frameStartPos = 0;
400
401 DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
402 DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
403 params.fParams.contentSizeFlag = 1;
404
405 if (nbChunks==1) { /* fallback to single-thread mode */
406 ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
407 return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
408 }
409
410 { unsigned u;
411 for (u=0; u<nbChunks; u++) {
412 size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
413 size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity;
414 buffer_t const dstAsBuffer = { dst, dstCapacity };
415 buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer;
416 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
417
418 if ((cctx==NULL) || (dstBuffer.start==NULL)) {
419 mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
420 mtctx->jobs[u].jobCompleted = 1;
421 nbChunks = u+1;
422 break; /* let's wait for previous jobs to complete, but don't start new ones */
423 }
424
425 mtctx->jobs[u].srcStart = srcStart + frameStartPos;
426 mtctx->jobs[u].srcSize = chunkSize;
427 mtctx->jobs[u].fullFrameSize = srcSize;
428 mtctx->jobs[u].params = params;
429 mtctx->jobs[u].dstBuff = dstBuffer;
430 mtctx->jobs[u].cctx = cctx;
431 mtctx->jobs[u].firstChunk = (u==0);
432 mtctx->jobs[u].lastChunk = (u==nbChunks-1);
433 mtctx->jobs[u].jobCompleted = 0;
434 mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
435 mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
436
437 DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
438 DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
439 POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
440
441 frameStartPos += chunkSize;
442 remainingSrcSize -= chunkSize;
443 } }
444 /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
445
446 { unsigned chunkID;
447 size_t error = 0, dstPos = 0;
448 for (chunkID=0; chunkID<nbChunks; chunkID++) {
449 DEBUGLOG(3, "waiting for chunk %u ", chunkID);
450 PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
451 while (mtctx->jobs[chunkID].jobCompleted==0) {
452 DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
453 pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
454 }
455 pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
456 DEBUGLOG(3, "ready to write chunk %u ", chunkID);
457
458 ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
459 mtctx->jobs[chunkID].cctx = NULL;
460 mtctx->jobs[chunkID].srcStart = NULL;
461 { size_t const cSize = mtctx->jobs[chunkID].cSize;
462 if (ZSTD_isError(cSize)) error = cSize;
463 if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
464 if (chunkID) { /* note : chunk 0 is already written directly into dst */
465 if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);
466 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
467 mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
468 }
469 dstPos += cSize ;
470 }
471 }
472 if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
473 return error ? error : dstPos;
474 }
475
476 }
477
478
479 /* ====================================== */
480 /* ======= Streaming API ======= */
481 /* ====================================== */
482
483 static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
484 while (zcs->doneJobID < zcs->nextJobID) {
485 unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
486 PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
487 while (zcs->jobs[jobID].jobCompleted==0) {
488 DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
489 pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
490 }
491 pthread_mutex_unlock(&zcs->jobCompleted_mutex);
492 zcs->doneJobID++;
493 }
494 }
495
496
497 static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
498 const void* dict, size_t dictSize, unsigned updateDict,
499 ZSTD_parameters params, unsigned long long pledgedSrcSize)
500 {
501 ZSTD_customMem const cmem = { NULL, NULL, NULL };
502 DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
503 if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
504 if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
505 ZSTDMT_waitForAllJobsCompleted(zcs);
506 ZSTDMT_releaseAllJobResources(zcs);
507 zcs->allJobsCompleted = 1;
508 }
509 zcs->params = params;
510 if (updateDict) {
511 ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
512 if (dict && dictSize) {
513 zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem);
514 if (zcs->cdict == NULL) return ERROR(memory_allocation);
515 } }
516 zcs->frameContentSize = pledgedSrcSize;
517 zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
518 DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
519 DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
520 zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
521 zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
522 zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
523 DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
524 zcs->marginSize = zcs->targetSectionSize >> 2;
525 zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
526 zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
527 if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
528 zcs->inBuff.filled = 0;
529 zcs->dictSize = 0;
530 zcs->doneJobID = 0;
531 zcs->nextJobID = 0;
532 zcs->frameEnded = 0;
533 zcs->allJobsCompleted = 0;
534 if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
535 return 0;
536 }
537
538 size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
539 const void* dict, size_t dictSize,
540 ZSTD_parameters params, unsigned long long pledgedSrcSize)
541 {
542 return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
543 }
544
545 /* ZSTDMT_resetCStream() :
546 * pledgedSrcSize is optional and can be zero == unknown */
547 size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
548 {
549 if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
550 return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
551 }
552
553 size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
554 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
555 return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
556 }
557
558
559 static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
560 {
561 size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
562 buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
563 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
564 unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
565
566 if ((cctx==NULL) || (dstBuffer.start==NULL)) {
567 zcs->jobs[jobID].jobCompleted = 1;
568 zcs->nextJobID++;
569 ZSTDMT_waitForAllJobsCompleted(zcs);
570 ZSTDMT_releaseAllJobResources(zcs);
571 return ERROR(memory_allocation);
572 }
573
574 DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
575 zcs->jobs[jobID].src = zcs->inBuff.buffer;
576 zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
577 zcs->jobs[jobID].srcSize = srcSize;
578 zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
579 zcs->jobs[jobID].params = zcs->params;
580 if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
581 zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
582 zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
583 zcs->jobs[jobID].dstBuff = dstBuffer;
584 zcs->jobs[jobID].cctx = cctx;
585 zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
586 zcs->jobs[jobID].lastChunk = endFrame;
587 zcs->jobs[jobID].jobCompleted = 0;
588 zcs->jobs[jobID].dstFlushed = 0;
589 zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
590 zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
591
592 /* get a new buffer for next input */
593 if (!endFrame) {
594 size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
595 zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
596 if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
597 zcs->jobs[jobID].jobCompleted = 1;
598 zcs->nextJobID++;
599 ZSTDMT_waitForAllJobsCompleted(zcs);
600 ZSTDMT_releaseAllJobResources(zcs);
601 return ERROR(memory_allocation);
602 }
603 DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
604 zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
605 DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
606 memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
607 DEBUGLOG(5, "new inBuff pre-filled");
608 zcs->dictSize = newDictSize;
609 } else {
610 zcs->inBuff.buffer = g_nullBuffer;
611 zcs->inBuff.filled = 0;
612 zcs->dictSize = 0;
613 zcs->frameEnded = 1;
614 if (zcs->nextJobID == 0)
615 zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
616 }
617
618 DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
619 POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
620 zcs->nextJobID++;
621 return 0;
622 }
623
624
625 /* ZSTDMT_flushNextJob() :
626 * output : will be updated with amount of data flushed .
627 * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
628 * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
629 static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
630 {
631 unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
632 if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
633 PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
634 while (zcs->jobs[wJobID].jobCompleted==0) {
635 DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
636 if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
637 pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
638 }
639 pthread_mutex_unlock(&zcs->jobCompleted_mutex);
640 /* compression job completed : output can be flushed */
641 { ZSTDMT_jobDescription job = zcs->jobs[wJobID];
642 if (!job.jobScanned) {
643 if (ZSTD_isError(job.cSize)) {
644 DEBUGLOG(5, "compression error detected ");
645 ZSTDMT_waitForAllJobsCompleted(zcs);
646 ZSTDMT_releaseAllJobResources(zcs);
647 return job.cSize;
648 }
649 ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
650 zcs->jobs[wJobID].cctx = NULL;
651 DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
652 if (zcs->params.fParams.checksumFlag) {
653 XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
654 if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
655 U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
656 DEBUGLOG(4, "writing checksum : %08X \n", checksum);
657 MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
658 job.cSize += 4;
659 zcs->jobs[wJobID].cSize += 4;
660 } }
661 ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
662 zcs->jobs[wJobID].srcStart = NULL;
663 zcs->jobs[wJobID].src = g_nullBuffer;
664 zcs->jobs[wJobID].jobScanned = 1;
665 }
666 { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
667 DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
668 memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
669 output->pos += toWrite;
670 job.dstFlushed += toWrite;
671 }
672 if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
673 ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
674 zcs->jobs[wJobID].dstBuff = g_nullBuffer;
675 zcs->jobs[wJobID].jobCompleted = 0;
676 zcs->doneJobID++;
677 } else {
678 zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
679 }
680 /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
681 if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
682 if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
683 zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
684 return 0; /* everything flushed */
685 } }
686
687
688 size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
689 {
690 size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
691 if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
692 if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
693
694 /* fill input buffer */
695 { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
696 memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
697 input->pos += toLoad;
698 zcs->inBuff.filled += toLoad;
699 }
700
701 if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
702 && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
703 CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
704 }
705
706 /* check for data to flush */
707 CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
708
709 /* recommended next input size : fill current input buffer */
710 return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
711 }
712
713
714 static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
715 {
716 size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
717
718 if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
719 if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
720 && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
721 CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
722 }
723
724 /* check if there is any data available to flush */
725 DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
726 return ZSTDMT_flushNextJob(zcs, output, 1);
727 }
728
729
730 size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
731 {
732 if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
733 return ZSTDMT_flushStream_internal(zcs, output, 0);
734 }
735
736 size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
737 {
738 if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
739 return ZSTDMT_flushStream_internal(zcs, output, 1);
740 }
@@ -0,0 +1,78 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 #ifndef ZSTDMT_COMPRESS_H
11 #define ZSTDMT_COMPRESS_H
12
13 #if defined (__cplusplus)
14 extern "C" {
15 #endif
16
17
18 /* Note : All prototypes defined in this file shall be considered experimental.
19 * There is no guarantee of API continuity (yet) on any of these prototypes */
20
21 /* === Dependencies === */
22 #include <stddef.h> /* size_t */
23 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
24 #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
25
26
27 /* === Simple one-pass functions === */
28
29 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
30 ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
31 ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx);
32
33 ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx,
34 void* dst, size_t dstCapacity,
35 const void* src, size_t srcSize,
36 int compressionLevel);
37
38
39 /* === Streaming functions === */
40
41 ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
42 ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
43
44 ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
45
46 ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
47 ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
48
49
50 /* === Advanced functions and parameters === */
51
52 #ifndef ZSTDMT_SECTION_SIZE_MIN
53 # define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
54 #endif
55
56 ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */
57 ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
58
59 /* ZSDTMT_parameter :
60 * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
61 typedef enum {
62 ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
63 ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
64 } ZSDTMT_parameter;
65
66 /* ZSTDMT_setMTCtxParameter() :
67 * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
68 * The function must be called typically after ZSTD_createCCtx().
69 * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
70 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
71 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
72
73
74 #if defined (__cplusplus)
75 }
76 #endif
77
78 #endif /* ZSTDMT_COMPRESS_H */
This diff has been collapsed as it changes many lines, (1021 lines changed) Show them Hide them
@@ -0,0 +1,1021 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 /*-*************************************
11 * Dependencies
12 ***************************************/
13 #include <stdio.h> /* fprintf */
14 #include <stdlib.h> /* malloc, free, qsort */
15 #include <string.h> /* memset */
16 #include <time.h> /* clock */
17
18 #include "mem.h" /* read */
19 #include "pool.h"
20 #include "threading.h"
21 #include "zstd_internal.h" /* includes zstd.h */
22 #ifndef ZDICT_STATIC_LINKING_ONLY
23 #define ZDICT_STATIC_LINKING_ONLY
24 #endif
25 #include "zdict.h"
26
27 /*-*************************************
28 * Constants
29 ***************************************/
30 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
31
32 /*-*************************************
33 * Console display
34 ***************************************/
35 static int g_displayLevel = 2;
36 #define DISPLAY(...) \
37 { \
38 fprintf(stderr, __VA_ARGS__); \
39 fflush(stderr); \
40 }
41 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
42 if (displayLevel >= l) { \
43 DISPLAY(__VA_ARGS__); \
44 } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
45 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
46
47 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
48 if (displayLevel >= l) { \
49 if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
50 g_time = clock(); \
51 DISPLAY(__VA_ARGS__); \
52 if (displayLevel >= 4) \
53 fflush(stdout); \
54 } \
55 }
56 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
57 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
58 static clock_t g_time = 0;
59
60 /*-*************************************
61 * Hash table
62 ***************************************
63 * A small specialized hash map for storing activeDmers.
64 * The map does not resize, so if it becomes full it will loop forever.
65 * Thus, the map must be large enough to store every value.
66 * The map implements linear probing and keeps its load less than 0.5.
67 */
68
69 #define MAP_EMPTY_VALUE ((U32)-1)
70 typedef struct COVER_map_pair_t_s {
71 U32 key;
72 U32 value;
73 } COVER_map_pair_t;
74
75 typedef struct COVER_map_s {
76 COVER_map_pair_t *data;
77 U32 sizeLog;
78 U32 size;
79 U32 sizeMask;
80 } COVER_map_t;
81
82 /**
83 * Clear the map.
84 */
85 static void COVER_map_clear(COVER_map_t *map) {
86 memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
87 }
88
89 /**
90 * Initializes a map of the given size.
91 * Returns 1 on success and 0 on failure.
92 * The map must be destroyed with COVER_map_destroy().
93 * The map is only guaranteed to be large enough to hold size elements.
94 */
95 static int COVER_map_init(COVER_map_t *map, U32 size) {
96 map->sizeLog = ZSTD_highbit32(size) + 2;
97 map->size = (U32)1 << map->sizeLog;
98 map->sizeMask = map->size - 1;
99 map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
100 if (!map->data) {
101 map->sizeLog = 0;
102 map->size = 0;
103 return 0;
104 }
105 COVER_map_clear(map);
106 return 1;
107 }
108
109 /**
110 * Internal hash function
111 */
112 static const U32 prime4bytes = 2654435761U;
113 static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
114 return (key * prime4bytes) >> (32 - map->sizeLog);
115 }
116
117 /**
118 * Helper function that returns the index that a key should be placed into.
119 */
120 static U32 COVER_map_index(COVER_map_t *map, U32 key) {
121 const U32 hash = COVER_map_hash(map, key);
122 U32 i;
123 for (i = hash;; i = (i + 1) & map->sizeMask) {
124 COVER_map_pair_t *pos = &map->data[i];
125 if (pos->value == MAP_EMPTY_VALUE) {
126 return i;
127 }
128 if (pos->key == key) {
129 return i;
130 }
131 }
132 }
133
134 /**
135 * Returns the pointer to the value for key.
136 * If key is not in the map, it is inserted and the value is set to 0.
137 * The map must not be full.
138 */
139 static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
140 COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
141 if (pos->value == MAP_EMPTY_VALUE) {
142 pos->key = key;
143 pos->value = 0;
144 }
145 return &pos->value;
146 }
147
148 /**
149 * Deletes key from the map if present.
150 */
151 static void COVER_map_remove(COVER_map_t *map, U32 key) {
152 U32 i = COVER_map_index(map, key);
153 COVER_map_pair_t *del = &map->data[i];
154 U32 shift = 1;
155 if (del->value == MAP_EMPTY_VALUE) {
156 return;
157 }
158 for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
159 COVER_map_pair_t *const pos = &map->data[i];
160 /* If the position is empty we are done */
161 if (pos->value == MAP_EMPTY_VALUE) {
162 del->value = MAP_EMPTY_VALUE;
163 return;
164 }
165 /* If pos can be moved to del do so */
166 if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
167 del->key = pos->key;
168 del->value = pos->value;
169 del = pos;
170 shift = 1;
171 } else {
172 ++shift;
173 }
174 }
175 }
176
177 /**
178 * Destroyes a map that is inited with COVER_map_init().
179 */
180 static void COVER_map_destroy(COVER_map_t *map) {
181 if (map->data) {
182 free(map->data);
183 }
184 map->data = NULL;
185 map->size = 0;
186 }
187
188 /*-*************************************
189 * Context
190 ***************************************/
191
192 typedef struct {
193 const BYTE *samples;
194 size_t *offsets;
195 const size_t *samplesSizes;
196 size_t nbSamples;
197 U32 *suffix;
198 size_t suffixSize;
199 U32 *freqs;
200 U32 *dmerAt;
201 unsigned d;
202 } COVER_ctx_t;
203
204 /* We need a global context for qsort... */
205 static COVER_ctx_t *g_ctx = NULL;
206
207 /*-*************************************
208 * Helper functions
209 ***************************************/
210
211 /**
212 * Returns the sum of the sample sizes.
213 */
214 static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
215 size_t sum = 0;
216 size_t i;
217 for (i = 0; i < nbSamples; ++i) {
218 sum += samplesSizes[i];
219 }
220 return sum;
221 }
222
223 /**
224 * Returns -1 if the dmer at lp is less than the dmer at rp.
225 * Return 0 if the dmers at lp and rp are equal.
226 * Returns 1 if the dmer at lp is greater than the dmer at rp.
227 */
228 static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
229 const U32 lhs = *(const U32 *)lp;
230 const U32 rhs = *(const U32 *)rp;
231 return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
232 }
233
234 /**
235 * Same as COVER_cmp() except ties are broken by pointer value
236 * NOTE: g_ctx must be set to call this function. A global is required because
237 * qsort doesn't take an opaque pointer.
238 */
239 static int COVER_strict_cmp(const void *lp, const void *rp) {
240 int result = COVER_cmp(g_ctx, lp, rp);
241 if (result == 0) {
242 result = lp < rp ? -1 : 1;
243 }
244 return result;
245 }
246
247 /**
248 * Returns the first pointer in [first, last) whose element does not compare
249 * less than value. If no such element exists it returns last.
250 */
251 static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
252 size_t value) {
253 size_t count = last - first;
254 while (count != 0) {
255 size_t step = count / 2;
256 const size_t *ptr = first;
257 ptr += step;
258 if (*ptr < value) {
259 first = ++ptr;
260 count -= step + 1;
261 } else {
262 count = step;
263 }
264 }
265 return first;
266 }
267
268 /**
269 * Generic groupBy function.
270 * Groups an array sorted by cmp into groups with equivalent values.
271 * Calls grp for each group.
272 */
273 static void
274 COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
275 int (*cmp)(COVER_ctx_t *, const void *, const void *),
276 void (*grp)(COVER_ctx_t *, const void *, const void *)) {
277 const BYTE *ptr = (const BYTE *)data;
278 size_t num = 0;
279 while (num < count) {
280 const BYTE *grpEnd = ptr + size;
281 ++num;
282 while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
283 grpEnd += size;
284 ++num;
285 }
286 grp(ctx, ptr, grpEnd);
287 ptr = grpEnd;
288 }
289 }
290
291 /*-*************************************
292 * Cover functions
293 ***************************************/
294
295 /**
296 * Called on each group of positions with the same dmer.
297 * Counts the frequency of each dmer and saves it in the suffix array.
298 * Fills `ctx->dmerAt`.
299 */
300 static void COVER_group(COVER_ctx_t *ctx, const void *group,
301 const void *groupEnd) {
302 /* The group consists of all the positions with the same first d bytes. */
303 const U32 *grpPtr = (const U32 *)group;
304 const U32 *grpEnd = (const U32 *)groupEnd;
305 /* The dmerId is how we will reference this dmer.
306 * This allows us to map the whole dmer space to a much smaller space, the
307 * size of the suffix array.
308 */
309 const U32 dmerId = (U32)(grpPtr - ctx->suffix);
310 /* Count the number of samples this dmer shows up in */
311 U32 freq = 0;
312 /* Details */
313 const size_t *curOffsetPtr = ctx->offsets;
314 const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
315 /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
316 * different sample than the last.
317 */
318 size_t curSampleEnd = ctx->offsets[0];
319 for (; grpPtr != grpEnd; ++grpPtr) {
320 /* Save the dmerId for this position so we can get back to it. */
321 ctx->dmerAt[*grpPtr] = dmerId;
322 /* Dictionaries only help for the first reference to the dmer.
323 * After that zstd can reference the match from the previous reference.
324 * So only count each dmer once for each sample it is in.
325 */
326 if (*grpPtr < curSampleEnd) {
327 continue;
328 }
329 freq += 1;
330 /* Binary search to find the end of the sample *grpPtr is in.
331 * In the common case that grpPtr + 1 == grpEnd we can skip the binary
332 * search because the loop is over.
333 */
334 if (grpPtr + 1 != grpEnd) {
335 const size_t *sampleEndPtr =
336 COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
337 curSampleEnd = *sampleEndPtr;
338 curOffsetPtr = sampleEndPtr + 1;
339 }
340 }
341 /* At this point we are never going to look at this segment of the suffix
342 * array again. We take advantage of this fact to save memory.
343 * We store the frequency of the dmer in the first position of the group,
344 * which is dmerId.
345 */
346 ctx->suffix[dmerId] = freq;
347 }
348
349 /**
350 * A segment is a range in the source as well as the score of the segment.
351 */
352 typedef struct {
353 U32 begin;
354 U32 end;
355 double score;
356 } COVER_segment_t;
357
358 /**
359 * Selects the best segment in an epoch.
360 * Segments of are scored according to the function:
361 *
362 * Let F(d) be the frequency of dmer d.
363 * Let S_i be the dmer at position i of segment S which has length k.
364 *
365 * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
366 *
367 * Once the dmer d is in the dictionay we set F(d) = 0.
368 */
369 static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
370 COVER_map_t *activeDmers, U32 begin,
371 U32 end, COVER_params_t parameters) {
372 /* Constants */
373 const U32 k = parameters.k;
374 const U32 d = parameters.d;
375 const U32 dmersInK = k - d + 1;
376 /* Try each segment (activeSegment) and save the best (bestSegment) */
377 COVER_segment_t bestSegment = {0, 0, 0};
378 COVER_segment_t activeSegment;
379 /* Reset the activeDmers in the segment */
380 COVER_map_clear(activeDmers);
381 /* The activeSegment starts at the beginning of the epoch. */
382 activeSegment.begin = begin;
383 activeSegment.end = begin;
384 activeSegment.score = 0;
385 /* Slide the activeSegment through the whole epoch.
386 * Save the best segment in bestSegment.
387 */
388 while (activeSegment.end < end) {
389 /* The dmerId for the dmer at the next position */
390 U32 newDmer = ctx->dmerAt[activeSegment.end];
391 /* The entry in activeDmers for this dmerId */
392 U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
393 /* If the dmer isn't already present in the segment add its score. */
394 if (*newDmerOcc == 0) {
395 /* The paper suggest using the L-0.5 norm, but experiments show that it
396 * doesn't help.
397 */
398 activeSegment.score += freqs[newDmer];
399 }
400 /* Add the dmer to the segment */
401 activeSegment.end += 1;
402 *newDmerOcc += 1;
403
404 /* If the window is now too large, drop the first position */
405 if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
406 U32 delDmer = ctx->dmerAt[activeSegment.begin];
407 U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
408 activeSegment.begin += 1;
409 *delDmerOcc -= 1;
410 /* If this is the last occurence of the dmer, subtract its score */
411 if (*delDmerOcc == 0) {
412 COVER_map_remove(activeDmers, delDmer);
413 activeSegment.score -= freqs[delDmer];
414 }
415 }
416
417 /* If this segment is the best so far save it */
418 if (activeSegment.score > bestSegment.score) {
419 bestSegment = activeSegment;
420 }
421 }
422 {
423 /* Trim off the zero frequency head and tail from the segment. */
424 U32 newBegin = bestSegment.end;
425 U32 newEnd = bestSegment.begin;
426 U32 pos;
427 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
428 U32 freq = freqs[ctx->dmerAt[pos]];
429 if (freq != 0) {
430 newBegin = MIN(newBegin, pos);
431 newEnd = pos + 1;
432 }
433 }
434 bestSegment.begin = newBegin;
435 bestSegment.end = newEnd;
436 }
437 {
438 /* Zero out the frequency of each dmer covered by the chosen segment. */
439 U32 pos;
440 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
441 freqs[ctx->dmerAt[pos]] = 0;
442 }
443 }
444 return bestSegment;
445 }
446
447 /**
448 * Check the validity of the parameters.
449 * Returns non-zero if the parameters are valid and 0 otherwise.
450 */
451 static int COVER_checkParameters(COVER_params_t parameters) {
452 /* k and d are required parameters */
453 if (parameters.d == 0 || parameters.k == 0) {
454 return 0;
455 }
456 /* d <= k */
457 if (parameters.d > parameters.k) {
458 return 0;
459 }
460 return 1;
461 }
462
463 /**
464 * Clean up a context initialized with `COVER_ctx_init()`.
465 */
466 static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
467 if (!ctx) {
468 return;
469 }
470 if (ctx->suffix) {
471 free(ctx->suffix);
472 ctx->suffix = NULL;
473 }
474 if (ctx->freqs) {
475 free(ctx->freqs);
476 ctx->freqs = NULL;
477 }
478 if (ctx->dmerAt) {
479 free(ctx->dmerAt);
480 ctx->dmerAt = NULL;
481 }
482 if (ctx->offsets) {
483 free(ctx->offsets);
484 ctx->offsets = NULL;
485 }
486 }
487
488 /**
489 * Prepare a context for dictionary building.
490 * The context is only dependent on the parameter `d` and can used multiple
491 * times.
492 * Returns 1 on success or zero on error.
493 * The context must be destroyed with `COVER_ctx_destroy()`.
494 */
495 static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
496 const size_t *samplesSizes, unsigned nbSamples,
497 unsigned d) {
498 const BYTE *const samples = (const BYTE *)samplesBuffer;
499 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
500 /* Checks */
501 if (totalSamplesSize < d ||
502 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
503 DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
504 (COVER_MAX_SAMPLES_SIZE >> 20));
505 return 0;
506 }
507 /* Zero the context */
508 memset(ctx, 0, sizeof(*ctx));
509 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
510 (U32)totalSamplesSize);
511 ctx->samples = samples;
512 ctx->samplesSizes = samplesSizes;
513 ctx->nbSamples = nbSamples;
514 /* Partial suffix array */
515 ctx->suffixSize = totalSamplesSize - d + 1;
516 ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
517 /* Maps index to the dmerID */
518 ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
519 /* The offsets of each file */
520 ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
521 if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
522 DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
523 COVER_ctx_destroy(ctx);
524 return 0;
525 }
526 ctx->freqs = NULL;
527 ctx->d = d;
528
529 /* Fill offsets from the samlesSizes */
530 {
531 U32 i;
532 ctx->offsets[0] = 0;
533 for (i = 1; i <= nbSamples; ++i) {
534 ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
535 }
536 }
537 DISPLAYLEVEL(2, "Constructing partial suffix array\n");
538 {
539 /* suffix is a partial suffix array.
540 * It only sorts suffixes by their first parameters.d bytes.
541 * The sort is stable, so each dmer group is sorted by position in input.
542 */
543 U32 i;
544 for (i = 0; i < ctx->suffixSize; ++i) {
545 ctx->suffix[i] = i;
546 }
547 /* qsort doesn't take an opaque pointer, so pass as a global */
548 g_ctx = ctx;
549 qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
550 }
551 DISPLAYLEVEL(2, "Computing frequencies\n");
552 /* For each dmer group (group of positions with the same first d bytes):
553 * 1. For each position we set dmerAt[position] = dmerID. The dmerID is
554 * (groupBeginPtr - suffix). This allows us to go from position to
555 * dmerID so we can look up values in freq.
556 * 2. We calculate how many samples the dmer occurs in and save it in
557 * freqs[dmerId].
558 */
559 COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
560 &COVER_group);
561 ctx->freqs = ctx->suffix;
562 ctx->suffix = NULL;
563 return 1;
564 }
565
566 /**
567 * Given the prepared context build the dictionary.
568 */
569 static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
570 COVER_map_t *activeDmers, void *dictBuffer,
571 size_t dictBufferCapacity,
572 COVER_params_t parameters) {
573 BYTE *const dict = (BYTE *)dictBuffer;
574 size_t tail = dictBufferCapacity;
575 /* Divide the data up into epochs of equal size.
576 * We will select at least one segment from each epoch.
577 */
578 const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
579 const U32 epochSize = (U32)(ctx->suffixSize / epochs);
580 size_t epoch;
581 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
582 epochSize);
583 /* Loop through the epochs until there are no more segments or the dictionary
584 * is full.
585 */
586 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
587 const U32 epochBegin = (U32)(epoch * epochSize);
588 const U32 epochEnd = epochBegin + epochSize;
589 size_t segmentSize;
590 /* Select a segment */
591 COVER_segment_t segment = COVER_selectSegment(
592 ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
593 /* Trim the segment if necessary and if it is empty then we are done */
594 segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
595 if (segmentSize == 0) {
596 break;
597 }
598 /* We fill the dictionary from the back to allow the best segments to be
599 * referenced with the smallest offsets.
600 */
601 tail -= segmentSize;
602 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
603 DISPLAYUPDATE(
604 2, "\r%u%% ",
605 (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
606 }
607 DISPLAYLEVEL(2, "\r%79s\r", "");
608 return tail;
609 }
610
611 /**
612 * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
613 * dictionary.
614 */
615 static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
616 ZDICT_params_t zdictParams;
617 memset(&zdictParams, 0, sizeof(zdictParams));
618 zdictParams.notificationLevel = 1;
619 zdictParams.dictID = parameters.dictID;
620 zdictParams.compressionLevel = parameters.compressionLevel;
621 return zdictParams;
622 }
623
624 /**
625 * Constructs a dictionary using a heuristic based on the following paper:
626 *
627 * Liao, Petri, Moffat, Wirth
628 * Effective Construction of Relative Lempel-Ziv Dictionaries
629 * Published in WWW 2016.
630 */
631 ZDICTLIB_API size_t COVER_trainFromBuffer(
632 void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
633 const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
634 BYTE *const dict = (BYTE *)dictBuffer;
635 COVER_ctx_t ctx;
636 COVER_map_t activeDmers;
637 /* Checks */
638 if (!COVER_checkParameters(parameters)) {
639 DISPLAYLEVEL(1, "Cover parameters incorrect\n");
640 return ERROR(GENERIC);
641 }
642 if (nbSamples == 0) {
643 DISPLAYLEVEL(1, "Cover must have at least one input file\n");
644 return ERROR(GENERIC);
645 }
646 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
647 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
648 ZDICT_DICTSIZE_MIN);
649 return ERROR(dstSize_tooSmall);
650 }
651 /* Initialize global data */
652 g_displayLevel = parameters.notificationLevel;
653 /* Initialize context and activeDmers */
654 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
655 parameters.d)) {
656 return ERROR(GENERIC);
657 }
658 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
659 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
660 COVER_ctx_destroy(&ctx);
661 return ERROR(GENERIC);
662 }
663
664 DISPLAYLEVEL(2, "Building dictionary\n");
665 {
666 const size_t tail =
667 COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
668 dictBufferCapacity, parameters);
669 ZDICT_params_t zdictParams = COVER_translateParams(parameters);
670 const size_t dictionarySize = ZDICT_finalizeDictionary(
671 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
672 samplesBuffer, samplesSizes, nbSamples, zdictParams);
673 if (!ZSTD_isError(dictionarySize)) {
674 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
675 (U32)dictionarySize);
676 }
677 COVER_ctx_destroy(&ctx);
678 COVER_map_destroy(&activeDmers);
679 return dictionarySize;
680 }
681 }
682
683 /**
684 * COVER_best_t is used for two purposes:
685 * 1. Synchronizing threads.
686 * 2. Saving the best parameters and dictionary.
687 *
688 * All of the methods except COVER_best_init() are thread safe if zstd is
689 * compiled with multithreaded support.
690 */
691 typedef struct COVER_best_s {
692 pthread_mutex_t mutex;
693 pthread_cond_t cond;
694 size_t liveJobs;
695 void *dict;
696 size_t dictSize;
697 COVER_params_t parameters;
698 size_t compressedSize;
699 } COVER_best_t;
700
701 /**
702 * Initialize the `COVER_best_t`.
703 */
704 static void COVER_best_init(COVER_best_t *best) {
705 if (!best) {
706 return;
707 }
708 pthread_mutex_init(&best->mutex, NULL);
709 pthread_cond_init(&best->cond, NULL);
710 best->liveJobs = 0;
711 best->dict = NULL;
712 best->dictSize = 0;
713 best->compressedSize = (size_t)-1;
714 memset(&best->parameters, 0, sizeof(best->parameters));
715 }
716
717 /**
718 * Wait until liveJobs == 0.
719 */
720 static void COVER_best_wait(COVER_best_t *best) {
721 if (!best) {
722 return;
723 }
724 pthread_mutex_lock(&best->mutex);
725 while (best->liveJobs != 0) {
726 pthread_cond_wait(&best->cond, &best->mutex);
727 }
728 pthread_mutex_unlock(&best->mutex);
729 }
730
731 /**
732 * Call COVER_best_wait() and then destroy the COVER_best_t.
733 */
734 static void COVER_best_destroy(COVER_best_t *best) {
735 if (!best) {
736 return;
737 }
738 COVER_best_wait(best);
739 if (best->dict) {
740 free(best->dict);
741 }
742 pthread_mutex_destroy(&best->mutex);
743 pthread_cond_destroy(&best->cond);
744 }
745
746 /**
747 * Called when a thread is about to be launched.
748 * Increments liveJobs.
749 */
750 static void COVER_best_start(COVER_best_t *best) {
751 if (!best) {
752 return;
753 }
754 pthread_mutex_lock(&best->mutex);
755 ++best->liveJobs;
756 pthread_mutex_unlock(&best->mutex);
757 }
758
759 /**
760 * Called when a thread finishes executing, both on error or success.
761 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
762 * If this dictionary is the best so far save it and its parameters.
763 */
764 static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
765 COVER_params_t parameters, void *dict,
766 size_t dictSize) {
767 if (!best) {
768 return;
769 }
770 {
771 size_t liveJobs;
772 pthread_mutex_lock(&best->mutex);
773 --best->liveJobs;
774 liveJobs = best->liveJobs;
775 /* If the new dictionary is better */
776 if (compressedSize < best->compressedSize) {
777 /* Allocate space if necessary */
778 if (!best->dict || best->dictSize < dictSize) {
779 if (best->dict) {
780 free(best->dict);
781 }
782 best->dict = malloc(dictSize);
783 if (!best->dict) {
784 best->compressedSize = ERROR(GENERIC);
785 best->dictSize = 0;
786 return;
787 }
788 }
789 /* Save the dictionary, parameters, and size */
790 memcpy(best->dict, dict, dictSize);
791 best->dictSize = dictSize;
792 best->parameters = parameters;
793 best->compressedSize = compressedSize;
794 }
795 pthread_mutex_unlock(&best->mutex);
796 if (liveJobs == 0) {
797 pthread_cond_broadcast(&best->cond);
798 }
799 }
800 }
801
802 /**
803 * Parameters for COVER_tryParameters().
804 */
805 typedef struct COVER_tryParameters_data_s {
806 const COVER_ctx_t *ctx;
807 COVER_best_t *best;
808 size_t dictBufferCapacity;
809 COVER_params_t parameters;
810 } COVER_tryParameters_data_t;
811
812 /**
813 * Tries a set of parameters and upates the COVER_best_t with the results.
814 * This function is thread safe if zstd is compiled with multithreaded support.
815 * It takes its parameters as an *OWNING* opaque pointer to support threading.
816 */
817 static void COVER_tryParameters(void *opaque) {
818 /* Save parameters as local variables */
819 COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
820 const COVER_ctx_t *const ctx = data->ctx;
821 const COVER_params_t parameters = data->parameters;
822 size_t dictBufferCapacity = data->dictBufferCapacity;
823 size_t totalCompressedSize = ERROR(GENERIC);
824 /* Allocate space for hash table, dict, and freqs */
825 COVER_map_t activeDmers;
826 BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
827 U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
828 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
829 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
830 goto _cleanup;
831 }
832 if (!dict || !freqs) {
833 DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
834 goto _cleanup;
835 }
836 /* Copy the frequencies because we need to modify them */
837 memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
838 /* Build the dictionary */
839 {
840 const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
841 dictBufferCapacity, parameters);
842 const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
843 dictBufferCapacity = ZDICT_finalizeDictionary(
844 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
845 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
846 if (ZDICT_isError(dictBufferCapacity)) {
847 DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
848 goto _cleanup;
849 }
850 }
851 /* Check total compressed size */
852 {
853 /* Pointers */
854 ZSTD_CCtx *cctx;
855 ZSTD_CDict *cdict;
856 void *dst;
857 /* Local variables */
858 size_t dstCapacity;
859 size_t i;
860 /* Allocate dst with enough space to compress the maximum sized sample */
861 {
862 size_t maxSampleSize = 0;
863 for (i = 0; i < ctx->nbSamples; ++i) {
864 maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
865 }
866 dstCapacity = ZSTD_compressBound(maxSampleSize);
867 dst = malloc(dstCapacity);
868 }
869 /* Create the cctx and cdict */
870 cctx = ZSTD_createCCtx();
871 cdict =
872 ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
873 if (!dst || !cctx || !cdict) {
874 goto _compressCleanup;
875 }
876 /* Compress each sample and sum their sizes (or error) */
877 totalCompressedSize = 0;
878 for (i = 0; i < ctx->nbSamples; ++i) {
879 const size_t size = ZSTD_compress_usingCDict(
880 cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
881 ctx->samplesSizes[i], cdict);
882 if (ZSTD_isError(size)) {
883 totalCompressedSize = ERROR(GENERIC);
884 goto _compressCleanup;
885 }
886 totalCompressedSize += size;
887 }
888 _compressCleanup:
889 ZSTD_freeCCtx(cctx);
890 ZSTD_freeCDict(cdict);
891 if (dst) {
892 free(dst);
893 }
894 }
895
896 _cleanup:
897 COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
898 dictBufferCapacity);
899 free(data);
900 COVER_map_destroy(&activeDmers);
901 if (dict) {
902 free(dict);
903 }
904 if (freqs) {
905 free(freqs);
906 }
907 }
908
909 ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
910 size_t dictBufferCapacity,
911 const void *samplesBuffer,
912 const size_t *samplesSizes,
913 unsigned nbSamples,
914 COVER_params_t *parameters) {
915 /* constants */
916 const unsigned nbThreads = parameters->nbThreads;
917 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
918 const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
919 const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
920 const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
921 const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
922 const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
923 const unsigned kIterations =
924 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
925 /* Local variables */
926 const int displayLevel = parameters->notificationLevel;
927 unsigned iteration = 1;
928 unsigned d;
929 unsigned k;
930 COVER_best_t best;
931 POOL_ctx *pool = NULL;
932 /* Checks */
933 if (kMinK < kMaxD || kMaxK < kMinK) {
934 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
935 return ERROR(GENERIC);
936 }
937 if (nbSamples == 0) {
938 DISPLAYLEVEL(1, "Cover must have at least one input file\n");
939 return ERROR(GENERIC);
940 }
941 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
942 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
943 ZDICT_DICTSIZE_MIN);
944 return ERROR(dstSize_tooSmall);
945 }
946 if (nbThreads > 1) {
947 pool = POOL_create(nbThreads, 1);
948 if (!pool) {
949 return ERROR(memory_allocation);
950 }
951 }
952 /* Initialization */
953 COVER_best_init(&best);
954 /* Turn down global display level to clean up display at level 2 and below */
955 g_displayLevel = parameters->notificationLevel - 1;
956 /* Loop through d first because each new value needs a new context */
957 LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
958 kIterations);
959 for (d = kMinD; d <= kMaxD; d += 2) {
960 /* Initialize the context for this value of d */
961 COVER_ctx_t ctx;
962 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
963 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
964 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
965 COVER_best_destroy(&best);
966 return ERROR(GENERIC);
967 }
968 /* Loop through k reusing the same context */
969 for (k = kMinK; k <= kMaxK; k += kStepSize) {
970 /* Prepare the arguments */
971 COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
972 sizeof(COVER_tryParameters_data_t));
973 LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
974 if (!data) {
975 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
976 COVER_best_destroy(&best);
977 COVER_ctx_destroy(&ctx);
978 return ERROR(GENERIC);
979 }
980 data->ctx = &ctx;
981 data->best = &best;
982 data->dictBufferCapacity = dictBufferCapacity;
983 data->parameters = *parameters;
984 data->parameters.k = k;
985 data->parameters.d = d;
986 data->parameters.steps = kSteps;
987 /* Check the parameters */
988 if (!COVER_checkParameters(data->parameters)) {
989 DISPLAYLEVEL(1, "Cover parameters incorrect\n");
990 continue;
991 }
992 /* Call the function and pass ownership of data to it */
993 COVER_best_start(&best);
994 if (pool) {
995 POOL_add(pool, &COVER_tryParameters, data);
996 } else {
997 COVER_tryParameters(data);
998 }
999 /* Print status */
1000 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
1001 (U32)((iteration * 100) / kIterations));
1002 ++iteration;
1003 }
1004 COVER_best_wait(&best);
1005 COVER_ctx_destroy(&ctx);
1006 }
1007 LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
1008 /* Fill the output buffer and parameters with output of the best parameters */
1009 {
1010 const size_t dictSize = best.dictSize;
1011 if (ZSTD_isError(best.compressedSize)) {
1012 COVER_best_destroy(&best);
1013 return best.compressedSize;
1014 }
1015 *parameters = best.parameters;
1016 memcpy(dictBuffer, best.dict, dictSize);
1017 COVER_best_destroy(&best);
1018 POOL_free(pool);
1019 return dictSize;
1020 }
1021 }
@@ -1,6 +1,33 b''
1 Version History
1 Version History
2 ===============
2 ===============
3
3
4 0.7.0 (released 2017-02-07)
5 ---------------------------
6
7 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
8 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
9 decompression of *content-only dictionary chains*.
10 * CFFI module fully implemented; all tests run against both C extension and
11 CFFI implementation.
12 * Vendored version of zstd updated to 1.1.3.
13 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
14 to avoid extra memory allocation of dict data.
15 * Add function names to error messages (by using ":name" in PyArg_Parse*
16 functions).
17 * Reuse decompression context across operations. Previously, we created a
18 new ZSTD_DCtx for each decompress(). This was measured to slow down
19 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
20 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
21 things faster in the process.
22 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
23 of bytes written.
24 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
25 written to the underlying output object.
26 * CompressionParameters instances now expose their values as attributes.
27 * CompressionParameters instances no longer are subscriptable nor behave
28 as tuples (backwards incompatible). Use attributes to obtain values.
29 * DictParameters instances now expose their values as attributes.
30
4 0.6.0 (released 2017-01-14)
31 0.6.0 (released 2017-01-14)
5 ---------------------------
32 ---------------------------
6
33
@@ -4,10 +4,11 b' python-zstandard'
4
4
5 This project provides Python bindings for interfacing with the
5 This project provides Python bindings for interfacing with the
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 and CFFI interface is provided.
7 and CFFI interface are provided.
8
8
9 The primary goal of the extension is to provide a Pythonic interface to
9 The primary goal of the project is to provide a rich interface to the
10 the underlying C API. This means exposing most of the features and flexibility
10 underlying C API through a Pythonic interface while not sacrificing
11 performance. This means exposing most of the features and flexibility
11 of the C API while not sacrificing usability or safety that Python provides.
12 of the C API while not sacrificing usability or safety that Python provides.
12
13
13 The canonical home for this project is
14 The canonical home for this project is
@@ -23,6 +24,9 b' with the current API and that functional'
23 may be some backwards incompatible changes before 1.0. Though the author
24 may be some backwards incompatible changes before 1.0. Though the author
24 does not intend to make any major changes to the Python API.
25 does not intend to make any major changes to the Python API.
25
26
27 This project is vendored and distributed with Mercurial 4.1, where it is
28 used in a production capacity.
29
26 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
30 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
27 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
31 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
28 confident the extension is stable and works as advertised on these
32 confident the extension is stable and works as advertised on these
@@ -48,14 +52,15 b' low level compression and decompression '
48 support compression without the framing headers. But the author doesn't
52 support compression without the framing headers. But the author doesn't
49 believe it a high priority at this time.
53 believe it a high priority at this time.
50
54
51 The CFFI bindings are half-baked and need to be finished.
55 The CFFI bindings are feature complete and all tests run against both
56 the C extension and CFFI bindings to ensure behavior parity.
52
57
53 Requirements
58 Requirements
54 ============
59 ============
55
60
56 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, and 3.5
61 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
57 on common platforms (Linux, Windows, and OS X). Only x86_64 is currently
62 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
58 well-tested as an architecture.
63 currently well-tested as an architecture.
59
64
60 Installing
65 Installing
61 ==========
66 ==========
@@ -106,15 +111,11 b' compressing at several hundred MB/s and '
106 Comparison to Other Python Bindings
111 Comparison to Other Python Bindings
107 ===================================
112 ===================================
108
113
109 https://pypi.python.org/pypi/zstd is an alternative Python binding to
114 https://pypi.python.org/pypi/zstd is an alternate Python binding to
110 Zstandard. At the time this was written, the latest release of that
115 Zstandard. At the time this was written, the latest release of that
111 package (1.0.0.2) had the following significant differences from this package:
116 package (1.1.2) only exposed the simple APIs for compression and decompression.
112
117 This package exposes much more of the zstd API, including streaming and
113 * It only exposes the simple API for compression and decompression operations.
118 dictionary compression. This package also has CFFI support.
114 This extension exposes the streaming API, dictionary training, and more.
115 * It adds a custom framing header to compressed data and there is no way to
116 disable it. This means that data produced with that module cannot be used by
117 other Zstandard implementations.
118
119
119 Bundling of Zstandard Source Code
120 Bundling of Zstandard Source Code
120 =================================
121 =================================
@@ -260,6 +261,10 b' A ``flush()`` method can be called to ev'
260 compressor's internal state into the output object. This may result in 0 or
261 compressor's internal state into the output object. This may result in 0 or
261 more ``write()`` calls to the output object.
262 more ``write()`` calls to the output object.
262
263
264 Both ``write()`` and ``flush()`` return the number of bytes written to the
265 object's ``write()``. In many cases, small inputs do not accumulate enough
266 data to cause a write and ``write()`` will return ``0``.
267
263 If the size of the data being fed to this streaming compressor is known,
268 If the size of the data being fed to this streaming compressor is known,
264 you can declare it before compression begins::
269 you can declare it before compression begins::
265
270
@@ -476,6 +481,10 b' This behaves similarly to ``zstd.ZstdCom'
476 the decompressor by calling ``write(data)`` and decompressed output is written
481 the decompressor by calling ``write(data)`` and decompressed output is written
477 to the output object by calling its ``write(data)`` method.
482 to the output object by calling its ``write(data)`` method.
478
483
484 Calls to ``write()`` will return the number of bytes written to the output
485 object. Not all inputs will result in bytes being written, so return values
486 of ``0`` are possible.
487
479 The size of chunks being ``write()`` to the destination can be specified::
488 The size of chunks being ``write()`` to the destination can be specified::
480
489
481 dctx = zstd.ZstdDecompressor()
490 dctx = zstd.ZstdDecompressor()
@@ -576,6 +585,53 b' Here is how this API should be used::'
576 data = dobj.decompress(compressed_chunk_0)
585 data = dobj.decompress(compressed_chunk_0)
577 data = dobj.decompress(compressed_chunk_1)
586 data = dobj.decompress(compressed_chunk_1)
578
587
588 Content-Only Dictionary Chain Decompression
589 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
590
591 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
592 zstd frames produced using chained *content-only* dictionary compression. Such
593 a list of frames is produced by compressing discrete inputs where each
594 non-initial input is compressed with a *content-only* dictionary consisting
595 of the content of the previous input.
596
597 For example, say you have the following inputs::
598
599 inputs = [b'input 1', b'input 2', b'input 3']
600
601 The zstd frame chain consists of:
602
603 1. ``b'input 1'`` compressed in standalone/discrete mode
604 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
605 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
606
607 Each zstd frame **must** have the content size written.
608
609 The following Python code can be used to produce a *content-only dictionary
610 chain*::
611
612 def make_chain(inputs):
613 frames = []
614
615 # First frame is compressed in standalone/discrete mode.
616 zctx = zstd.ZstdCompressor(write_content_size=True)
617 frames.append(zctx.compress(inputs[0]))
618
619 # Subsequent frames use the previous fulltext as a content-only dictionary
620 for i, raw in enumerate(inputs[1:]):
621 dict_data = zstd.ZstdCompressionDict(inputs[i])
622 zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
623 frames.append(zctx.compress(raw))
624
625 return frames
626
627 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
628 element in the input chain.
629
630 It is possible to implement *content-only dictionary chain* decompression
631 on top of other Python APIs. However, this function will likely be significantly
632 faster, especially for long input chains, as it avoids the overhead of
633 instantiating and passing around intermediate objects between C and Python.
634
579 Choosing an API
635 Choosing an API
580 ---------------
636 ---------------
581
637
@@ -634,6 +690,13 b' Instances can be constructed from bytes:'
634
690
635 dict_data = zstd.ZstdCompressionDict(data)
691 dict_data = zstd.ZstdCompressionDict(data)
636
692
693 It is possible to construct a dictionary from *any* data. Unless the
694 data begins with a magic header, the dictionary will be treated as
695 *content-only*. *Content-only* dictionaries allow compression operations
696 that follow to reference raw data within the content. For one use of
697 *content-only* dictionaries, see
698 ``ZstdDecompressor.decompress_content_dict_chain()``.
699
637 More interestingly, instances can be created by *training* on sample data::
700 More interestingly, instances can be created by *training* on sample data::
638
701
639 dict_data = zstd.train_dictionary(size, samples)
702 dict_data = zstd.train_dictionary(size, samples)
@@ -700,19 +763,57 b' You can then configure a compressor to u'
700
763
701 cctx = zstd.ZstdCompressor(compression_params=params)
764 cctx = zstd.ZstdCompressor(compression_params=params)
702
765
703 The members of the ``CompressionParameters`` tuple are as follows::
766 The members/attributes of ``CompressionParameters`` instances are as follows::
704
767
705 * 0 - Window log
768 * window_log
706 * 1 - Chain log
769 * chain_log
707 * 2 - Hash log
770 * hash_log
708 * 3 - Search log
771 * search_log
709 * 4 - Search length
772 * search_length
710 * 5 - Target length
773 * target_length
711 * 6 - Strategy (one of the ``zstd.STRATEGY_`` constants)
774 * strategy
775
776 This is the order the arguments are passed to the constructor if not using
777 named arguments.
712
778
713 You'll need to read the Zstandard documentation for what these parameters
779 You'll need to read the Zstandard documentation for what these parameters
714 do.
780 do.
715
781
782 Frame Inspection
783 ----------------
784
785 Data emitted from zstd compression is encapsulated in a *frame*. This frame
786 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
787 the frame in more detail. For more info, see
788 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
789
790 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
791 instance and return a ``FrameParameters`` object describing the frame.
792
793 Depending on which fields are present in the frame and their values, the
794 length of the frame parameters varies. If insufficient bytes are passed
795 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
796 frame parameters can be parsed, pass in at least 18 bytes.
797
798 ``FrameParameters`` instances have the following attributes:
799
800 content_size
801 Integer size of original, uncompressed content. This will be ``0`` if the
802 original content size isn't written to the frame (controlled with the
803 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
804 content size was ``0``.
805
806 window_size
807 Integer size of maximum back-reference distance in compressed data.
808
809 dict_id
810 Integer of dictionary ID used for compression. ``0`` if no dictionary
811 ID was used or if the dictionary ID was ``0``.
812
813 has_checksum
814 Bool indicating whether a 4 byte content checksum is stored at the end
815 of the frame.
816
716 Misc Functionality
817 Misc Functionality
717 ------------------
818 ------------------
718
819
@@ -776,19 +877,32 b' TARGETLENGTH_MIN'
776 TARGETLENGTH_MAX
877 TARGETLENGTH_MAX
777 Maximum value for compression parameter
878 Maximum value for compression parameter
778 STRATEGY_FAST
879 STRATEGY_FAST
779 Compression strategory
880 Compression strategy
780 STRATEGY_DFAST
881 STRATEGY_DFAST
781 Compression strategory
882 Compression strategy
782 STRATEGY_GREEDY
883 STRATEGY_GREEDY
783 Compression strategory
884 Compression strategy
784 STRATEGY_LAZY
885 STRATEGY_LAZY
785 Compression strategory
886 Compression strategy
786 STRATEGY_LAZY2
887 STRATEGY_LAZY2
787 Compression strategory
888 Compression strategy
788 STRATEGY_BTLAZY2
889 STRATEGY_BTLAZY2
789 Compression strategory
890 Compression strategy
790 STRATEGY_BTOPT
891 STRATEGY_BTOPT
791 Compression strategory
892 Compression strategy
893
894 Performance Considerations
895 --------------------------
896
897 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
898 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
899 or ``ZstdDecompressor`` instance for multiple operations is faster than
900 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
901 operation. The differences are magnified as the size of data decreases. For
902 example, the difference between *context* reuse and non-reuse for 100,000
903 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
904 whereas 10 1,000,000 byte inputs will be more similar in speed (because the
905 time spent doing compression dwarfs time spent creating new *contexts*).
792
906
793 Note on Zstandard's *Experimental* API
907 Note on Zstandard's *Experimental* API
794 ======================================
908 ======================================
@@ -28,7 +28,8 b' ZstdCompressionDict* train_dictionary(Py'
28 void* dict;
28 void* dict;
29 ZstdCompressionDict* result;
29 ZstdCompressionDict* result;
30
30
31 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist,
31 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!:train_dictionary",
32 kwlist,
32 &capacity,
33 &capacity,
33 &PyList_Type, &samples,
34 &PyList_Type, &samples,
34 (PyObject*)&DictParametersType, &parameters)) {
35 (PyObject*)&DictParametersType, &parameters)) {
@@ -57,7 +58,6 b' ZstdCompressionDict* train_dictionary(Py'
57 sampleItem = PyList_GetItem(samples, sampleIndex);
58 sampleItem = PyList_GetItem(samples, sampleIndex);
58 if (!PyBytes_Check(sampleItem)) {
59 if (!PyBytes_Check(sampleItem)) {
59 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
60 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
60 /* TODO probably need to perform DECREF here */
61 return NULL;
61 return NULL;
62 }
62 }
63 samplesSize += PyBytes_GET_SIZE(sampleItem);
63 samplesSize += PyBytes_GET_SIZE(sampleItem);
@@ -133,10 +133,11 b' static int ZstdCompressionDict_init(Zstd'
133 self->dictSize = 0;
133 self->dictSize = 0;
134
134
135 #if PY_MAJOR_VERSION >= 3
135 #if PY_MAJOR_VERSION >= 3
136 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
136 if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
137 #else
137 #else
138 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
138 if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
139 #endif
139 #endif
140 &source, &sourceSize)) {
140 return -1;
141 return -1;
141 }
142 }
142
143
@@ -25,7 +25,8 b' CompressionParametersObject* get_compres'
25 ZSTD_compressionParameters params;
25 ZSTD_compressionParameters params;
26 CompressionParametersObject* result;
26 CompressionParametersObject* result;
27
27
28 if (!PyArg_ParseTuple(args, "i|Kn", &compressionLevel, &sourceSize, &dictSize)) {
28 if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
29 &compressionLevel, &sourceSize, &dictSize)) {
29 return NULL;
30 return NULL;
30 }
31 }
31
32
@@ -47,12 +48,85 b' CompressionParametersObject* get_compres'
47 return result;
48 return result;
48 }
49 }
49
50
51 static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
52 static char* kwlist[] = {
53 "window_log",
54 "chain_log",
55 "hash_log",
56 "search_log",
57 "search_length",
58 "target_length",
59 "strategy",
60 NULL
61 };
62
63 unsigned windowLog;
64 unsigned chainLog;
65 unsigned hashLog;
66 unsigned searchLog;
67 unsigned searchLength;
68 unsigned targetLength;
69 unsigned strategy;
70
71 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
72 kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
73 &targetLength, &strategy)) {
74 return -1;
75 }
76
77 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
78 PyErr_SetString(PyExc_ValueError, "invalid window log value");
79 return -1;
80 }
81
82 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
83 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
84 return -1;
85 }
86
87 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
88 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
89 return -1;
90 }
91
92 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
93 PyErr_SetString(PyExc_ValueError, "invalid search log value");
94 return -1;
95 }
96
97 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
98 PyErr_SetString(PyExc_ValueError, "invalid search length value");
99 return -1;
100 }
101
102 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
103 PyErr_SetString(PyExc_ValueError, "invalid target length value");
104 return -1;
105 }
106
107 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
108 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
109 return -1;
110 }
111
112 self->windowLog = windowLog;
113 self->chainLog = chainLog;
114 self->hashLog = hashLog;
115 self->searchLog = searchLog;
116 self->searchLength = searchLength;
117 self->targetLength = targetLength;
118 self->strategy = strategy;
119
120 return 0;
121 }
122
50 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
123 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
51 CompressionParametersObject* params;
124 CompressionParametersObject* params;
52 ZSTD_compressionParameters zparams;
125 ZSTD_compressionParameters zparams;
53 PyObject* result;
126 PyObject* result;
54
127
55 if (!PyArg_ParseTuple(args, "O!", &CompressionParametersType, &params)) {
128 if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
129 &CompressionParametersType, &params)) {
56 return NULL;
130 return NULL;
57 }
131 }
58
132
@@ -64,113 +138,33 b' PyObject* estimate_compression_context_s'
64 PyDoc_STRVAR(CompressionParameters__doc__,
138 PyDoc_STRVAR(CompressionParameters__doc__,
65 "CompressionParameters: low-level control over zstd compression");
139 "CompressionParameters: low-level control over zstd compression");
66
140
67 static PyObject* CompressionParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) {
68 CompressionParametersObject* self;
69 unsigned windowLog;
70 unsigned chainLog;
71 unsigned hashLog;
72 unsigned searchLog;
73 unsigned searchLength;
74 unsigned targetLength;
75 unsigned strategy;
76
77 if (!PyArg_ParseTuple(args, "IIIIIII", &windowLog, &chainLog, &hashLog, &searchLog,
78 &searchLength, &targetLength, &strategy)) {
79 return NULL;
80 }
81
82 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
83 PyErr_SetString(PyExc_ValueError, "invalid window log value");
84 return NULL;
85 }
86
87 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
88 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
89 return NULL;
90 }
91
92 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
93 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
94 return NULL;
95 }
96
97 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
98 PyErr_SetString(PyExc_ValueError, "invalid search log value");
99 return NULL;
100 }
101
102 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
103 PyErr_SetString(PyExc_ValueError, "invalid search length value");
104 return NULL;
105 }
106
107 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
108 PyErr_SetString(PyExc_ValueError, "invalid target length value");
109 return NULL;
110 }
111
112 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
113 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
114 return NULL;
115 }
116
117 self = (CompressionParametersObject*)subtype->tp_alloc(subtype, 1);
118 if (!self) {
119 return NULL;
120 }
121
122 self->windowLog = windowLog;
123 self->chainLog = chainLog;
124 self->hashLog = hashLog;
125 self->searchLog = searchLog;
126 self->searchLength = searchLength;
127 self->targetLength = targetLength;
128 self->strategy = strategy;
129
130 return (PyObject*)self;
131 }
132
133 static void CompressionParameters_dealloc(PyObject* self) {
141 static void CompressionParameters_dealloc(PyObject* self) {
134 PyObject_Del(self);
142 PyObject_Del(self);
135 }
143 }
136
144
137 static Py_ssize_t CompressionParameters_length(PyObject* self) {
145 static PyMemberDef CompressionParameters_members[] = {
138 return 7;
146 { "window_log", T_UINT,
139 }
147 offsetof(CompressionParametersObject, windowLog), READONLY,
140
148 "window log" },
141 static PyObject* CompressionParameters_item(PyObject* o, Py_ssize_t i) {
149 { "chain_log", T_UINT,
142 CompressionParametersObject* self = (CompressionParametersObject*)o;
150 offsetof(CompressionParametersObject, chainLog), READONLY,
143
151 "chain log" },
144 switch (i) {
152 { "hash_log", T_UINT,
145 case 0:
153 offsetof(CompressionParametersObject, hashLog), READONLY,
146 return PyLong_FromLong(self->windowLog);
154 "hash log" },
147 case 1:
155 { "search_log", T_UINT,
148 return PyLong_FromLong(self->chainLog);
156 offsetof(CompressionParametersObject, searchLog), READONLY,
149 case 2:
157 "search log" },
150 return PyLong_FromLong(self->hashLog);
158 { "search_length", T_UINT,
151 case 3:
159 offsetof(CompressionParametersObject, searchLength), READONLY,
152 return PyLong_FromLong(self->searchLog);
160 "search length" },
153 case 4:
161 { "target_length", T_UINT,
154 return PyLong_FromLong(self->searchLength);
162 offsetof(CompressionParametersObject, targetLength), READONLY,
155 case 5:
163 "target length" },
156 return PyLong_FromLong(self->targetLength);
164 { "strategy", T_INT,
157 case 6:
165 offsetof(CompressionParametersObject, strategy), READONLY,
158 return PyLong_FromLong(self->strategy);
166 "strategy" },
159 default:
167 { NULL }
160 PyErr_SetString(PyExc_IndexError, "index out of range");
161 return NULL;
162 }
163 }
164
165 static PySequenceMethods CompressionParameters_sq = {
166 CompressionParameters_length, /* sq_length */
167 0, /* sq_concat */
168 0, /* sq_repeat */
169 CompressionParameters_item, /* sq_item */
170 0, /* sq_ass_item */
171 0, /* sq_contains */
172 0, /* sq_inplace_concat */
173 0 /* sq_inplace_repeat */
174 };
168 };
175
169
176 PyTypeObject CompressionParametersType = {
170 PyTypeObject CompressionParametersType = {
@@ -185,7 +179,7 b' PyTypeObject CompressionParametersType ='
185 0, /* tp_compare */
179 0, /* tp_compare */
186 0, /* tp_repr */
180 0, /* tp_repr */
187 0, /* tp_as_number */
181 0, /* tp_as_number */
188 &CompressionParameters_sq, /* tp_as_sequence */
182 0, /* tp_as_sequence */
189 0, /* tp_as_mapping */
183 0, /* tp_as_mapping */
190 0, /* tp_hash */
184 0, /* tp_hash */
191 0, /* tp_call */
185 0, /* tp_call */
@@ -193,7 +187,7 b' PyTypeObject CompressionParametersType ='
193 0, /* tp_getattro */
187 0, /* tp_getattro */
194 0, /* tp_setattro */
188 0, /* tp_setattro */
195 0, /* tp_as_buffer */
189 0, /* tp_as_buffer */
196 Py_TPFLAGS_DEFAULT, /* tp_flags */
190 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
197 CompressionParameters__doc__, /* tp_doc */
191 CompressionParameters__doc__, /* tp_doc */
198 0, /* tp_traverse */
192 0, /* tp_traverse */
199 0, /* tp_clear */
193 0, /* tp_clear */
@@ -202,16 +196,16 b' PyTypeObject CompressionParametersType ='
202 0, /* tp_iter */
196 0, /* tp_iter */
203 0, /* tp_iternext */
197 0, /* tp_iternext */
204 0, /* tp_methods */
198 0, /* tp_methods */
205 0, /* tp_members */
199 CompressionParameters_members, /* tp_members */
206 0, /* tp_getset */
200 0, /* tp_getset */
207 0, /* tp_base */
201 0, /* tp_base */
208 0, /* tp_dict */
202 0, /* tp_dict */
209 0, /* tp_descr_get */
203 0, /* tp_descr_get */
210 0, /* tp_descr_set */
204 0, /* tp_descr_set */
211 0, /* tp_dictoffset */
205 0, /* tp_dictoffset */
212 0, /* tp_init */
206 (initproc)CompressionParameters_init, /* tp_init */
213 0, /* tp_alloc */
207 0, /* tp_alloc */
214 CompressionParameters_new, /* tp_new */
208 PyType_GenericNew, /* tp_new */
215 };
209 };
216
210
217 void compressionparams_module_init(PyObject* mod) {
211 void compressionparams_module_init(PyObject* mod) {
@@ -52,7 +52,7 b' static PyObject* ZstdCompressionWriter_e'
52 ZSTD_outBuffer output;
52 ZSTD_outBuffer output;
53 PyObject* res;
53 PyObject* res;
54
54
55 if (!PyArg_ParseTuple(args, "OOO", &exc_type, &exc_value, &exc_tb)) {
55 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
56 return NULL;
56 return NULL;
57 }
57 }
58
58
@@ -119,11 +119,12 b' static PyObject* ZstdCompressionWriter_w'
119 ZSTD_inBuffer input;
119 ZSTD_inBuffer input;
120 ZSTD_outBuffer output;
120 ZSTD_outBuffer output;
121 PyObject* res;
121 PyObject* res;
122 Py_ssize_t totalWrite = 0;
122
123
123 #if PY_MAJOR_VERSION >= 3
124 #if PY_MAJOR_VERSION >= 3
124 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
125 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
125 #else
126 #else
126 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
127 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
127 #endif
128 #endif
128 return NULL;
129 return NULL;
129 }
130 }
@@ -164,20 +165,21 b' static PyObject* ZstdCompressionWriter_w'
164 #endif
165 #endif
165 output.dst, output.pos);
166 output.dst, output.pos);
166 Py_XDECREF(res);
167 Py_XDECREF(res);
168 totalWrite += output.pos;
167 }
169 }
168 output.pos = 0;
170 output.pos = 0;
169 }
171 }
170
172
171 PyMem_Free(output.dst);
173 PyMem_Free(output.dst);
172
174
173 /* TODO return bytes written */
175 return PyLong_FromSsize_t(totalWrite);
174 Py_RETURN_NONE;
175 }
176 }
176
177
177 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
178 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
178 size_t zresult;
179 size_t zresult;
179 ZSTD_outBuffer output;
180 ZSTD_outBuffer output;
180 PyObject* res;
181 PyObject* res;
182 Py_ssize_t totalWrite = 0;
181
183
182 if (!self->entered) {
184 if (!self->entered) {
183 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
185 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
@@ -215,14 +217,14 b' static PyObject* ZstdCompressionWriter_f'
215 #endif
217 #endif
216 output.dst, output.pos);
218 output.dst, output.pos);
217 Py_XDECREF(res);
219 Py_XDECREF(res);
220 totalWrite += output.pos;
218 }
221 }
219 output.pos = 0;
222 output.pos = 0;
220 }
223 }
221
224
222 PyMem_Free(output.dst);
225 PyMem_Free(output.dst);
223
226
224 /* TODO return bytes written */
227 return PyLong_FromSsize_t(totalWrite);
225 Py_RETURN_NONE;
226 }
228 }
227
229
228 static PyMethodDef ZstdCompressionWriter_methods[] = {
230 static PyMethodDef ZstdCompressionWriter_methods[] = {
@@ -42,9 +42,9 b' static PyObject* ZstdCompressionObj_comp'
42 }
42 }
43
43
44 #if PY_MAJOR_VERSION >= 3
44 #if PY_MAJOR_VERSION >= 3
45 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
45 if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
46 #else
46 #else
47 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
47 if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
48 #endif
48 #endif
49 return NULL;
49 return NULL;
50 }
50 }
@@ -98,7 +98,7 b' static PyObject* ZstdCompressionObj_flus'
98 PyObject* result = NULL;
98 PyObject* result = NULL;
99 Py_ssize_t resultSize = 0;
99 Py_ssize_t resultSize = 0;
100
100
101 if (!PyArg_ParseTuple(args, "|i", &flushMode)) {
101 if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
102 return NULL;
102 return NULL;
103 }
103 }
104
104
@@ -16,7 +16,7 b' int populate_cdict(ZstdCompressor* compr'
16 Py_BEGIN_ALLOW_THREADS
16 Py_BEGIN_ALLOW_THREADS
17 memset(&zmem, 0, sizeof(zmem));
17 memset(&zmem, 0, sizeof(zmem));
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
19 compressor->dict->dictSize, *zparams, zmem);
19 compressor->dict->dictSize, 1, *zparams, zmem);
20 Py_END_ALLOW_THREADS
20 Py_END_ALLOW_THREADS
21
21
22 if (!compressor->cdict) {
22 if (!compressor->cdict) {
@@ -128,8 +128,8 b' static int ZstdCompressor_init(ZstdCompr'
128 self->cparams = NULL;
128 self->cparams = NULL;
129 self->cdict = NULL;
129 self->cdict = NULL;
130
130
131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist,
131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO:ZstdCompressor",
132 &level, &ZstdCompressionDictType, &dict,
132 kwlist, &level, &ZstdCompressionDictType, &dict,
133 &CompressionParametersType, &params,
133 &CompressionParametersType, &params,
134 &writeChecksum, &writeContentSize, &writeDictID)) {
134 &writeChecksum, &writeContentSize, &writeDictID)) {
135 return -1;
135 return -1;
@@ -243,8 +243,8 b' static PyObject* ZstdCompressor_copy_str'
243 PyObject* totalReadPy;
243 PyObject* totalReadPy;
244 PyObject* totalWritePy;
244 PyObject* totalWritePy;
245
245
246 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk", kwlist, &source, &dest, &sourceSize,
246 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
247 &inSize, &outSize)) {
247 &source, &dest, &sourceSize, &inSize, &outSize)) {
248 return NULL;
248 return NULL;
249 }
249 }
250
250
@@ -402,9 +402,9 b' static PyObject* ZstdCompressor_compress'
402 ZSTD_parameters zparams;
402 ZSTD_parameters zparams;
403
403
404 #if PY_MAJOR_VERSION >= 3
404 #if PY_MAJOR_VERSION >= 3
405 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O",
405 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
406 #else
406 #else
407 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O",
407 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
408 #endif
408 #endif
409 kwlist, &source, &sourceSize, &allowEmpty)) {
409 kwlist, &source, &sourceSize, &allowEmpty)) {
410 return NULL;
410 return NULL;
@@ -512,7 +512,7 b' static ZstdCompressionObj* ZstdCompresso'
512 return NULL;
512 return NULL;
513 }
513 }
514
514
515 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &inSize)) {
515 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
516 return NULL;
516 return NULL;
517 }
517 }
518
518
@@ -574,8 +574,8 b' static ZstdCompressorIterator* ZstdCompr'
574 size_t outSize = ZSTD_CStreamOutSize();
574 size_t outSize = ZSTD_CStreamOutSize();
575 ZstdCompressorIterator* result;
575 ZstdCompressorIterator* result;
576
576
577 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk", kwlist, &reader, &sourceSize,
577 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
578 &inSize, &outSize)) {
578 &reader, &sourceSize, &inSize, &outSize)) {
579 return NULL;
579 return NULL;
580 }
580 }
581
581
@@ -693,8 +693,8 b' static ZstdCompressionWriter* ZstdCompre'
693 Py_ssize_t sourceSize = 0;
693 Py_ssize_t sourceSize = 0;
694 size_t outSize = ZSTD_CStreamOutSize();
694 size_t outSize = ZSTD_CStreamOutSize();
695
695
696 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk", kwlist, &writer, &sourceSize,
696 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
697 &outSize)) {
697 &writer, &sourceSize, &outSize)) {
698 return NULL;
698 return NULL;
699 }
699 }
700
700
@@ -71,11 +71,12 b' static PyObject* ZstdDecompressionWriter'
71 ZSTD_inBuffer input;
71 ZSTD_inBuffer input;
72 ZSTD_outBuffer output;
72 ZSTD_outBuffer output;
73 PyObject* res;
73 PyObject* res;
74 Py_ssize_t totalWrite = 0;
74
75
75 #if PY_MAJOR_VERSION >= 3
76 #if PY_MAJOR_VERSION >= 3
76 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
77 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
77 #else
78 #else
78 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
79 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
79 #endif
80 #endif
80 return NULL;
81 return NULL;
81 }
82 }
@@ -116,15 +117,15 b' static PyObject* ZstdDecompressionWriter'
116 #endif
117 #endif
117 output.dst, output.pos);
118 output.dst, output.pos);
118 Py_XDECREF(res);
119 Py_XDECREF(res);
120 totalWrite += output.pos;
119 output.pos = 0;
121 output.pos = 0;
120 }
122 }
121 }
123 }
122
124
123 PyMem_Free(output.dst);
125 PyMem_Free(output.dst);
124
126
125 /* TODO return bytes written */
127 return PyLong_FromSsize_t(totalWrite);
126 Py_RETURN_NONE;
128 }
127 }
128
129
129 static PyMethodDef ZstdDecompressionWriter_methods[] = {
130 static PyMethodDef ZstdDecompressionWriter_methods[] = {
130 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
131 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
@@ -41,9 +41,9 b' static PyObject* DecompressionObj_decomp'
41 }
41 }
42
42
43 #if PY_MAJOR_VERSION >= 3
43 #if PY_MAJOR_VERSION >= 3
44 if (!PyArg_ParseTuple(args, "y#",
44 if (!PyArg_ParseTuple(args, "y#:decompress",
45 #else
45 #else
46 if (!PyArg_ParseTuple(args, "s#",
46 if (!PyArg_ParseTuple(args, "s#:decompress",
47 #endif
47 #endif
48 &source, &sourceSize)) {
48 &source, &sourceSize)) {
49 return NULL;
49 return NULL;
@@ -59,23 +59,19 b' static int Decompressor_init(ZstdDecompr'
59
59
60 ZstdCompressionDict* dict = NULL;
60 ZstdCompressionDict* dict = NULL;
61
61
62 self->refdctx = NULL;
62 self->dctx = NULL;
63 self->dict = NULL;
63 self->dict = NULL;
64 self->ddict = NULL;
64 self->ddict = NULL;
65
65
66 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!", kwlist,
66 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
67 &ZstdCompressionDictType, &dict)) {
67 &ZstdCompressionDictType, &dict)) {
68 return -1;
68 return -1;
69 }
69 }
70
70
71 /* Instead of creating a ZSTD_DCtx for every decompression operation,
72 we create an instance at object creation time and recycle it via
73 ZSTD_copyDCTx() on each use. This means each use is a malloc+memcpy
74 instead of a malloc+init. */
75 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
71 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
76 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
72 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
77 self->refdctx = ZSTD_createDCtx();
73 self->dctx = ZSTD_createDCtx();
78 if (!self->refdctx) {
74 if (!self->dctx) {
79 PyErr_NoMemory();
75 PyErr_NoMemory();
80 goto except;
76 goto except;
81 }
77 }
@@ -88,17 +84,17 b' static int Decompressor_init(ZstdDecompr'
88 return 0;
84 return 0;
89
85
90 except:
86 except:
91 if (self->refdctx) {
87 if (self->dctx) {
92 ZSTD_freeDCtx(self->refdctx);
88 ZSTD_freeDCtx(self->dctx);
93 self->refdctx = NULL;
89 self->dctx = NULL;
94 }
90 }
95
91
96 return -1;
92 return -1;
97 }
93 }
98
94
99 static void Decompressor_dealloc(ZstdDecompressor* self) {
95 static void Decompressor_dealloc(ZstdDecompressor* self) {
100 if (self->refdctx) {
96 if (self->dctx) {
101 ZSTD_freeDCtx(self->refdctx);
97 ZSTD_freeDCtx(self->dctx);
102 }
98 }
103
99
104 Py_XDECREF(self->dict);
100 Py_XDECREF(self->dict);
@@ -150,8 +146,8 b' static PyObject* Decompressor_copy_strea'
150 PyObject* totalReadPy;
146 PyObject* totalReadPy;
151 PyObject* totalWritePy;
147 PyObject* totalWritePy;
152
148
153 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk", kwlist, &source,
149 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
154 &dest, &inSize, &outSize)) {
150 &source, &dest, &inSize, &outSize)) {
155 return NULL;
151 return NULL;
156 }
152 }
157
153
@@ -243,7 +239,7 b' static PyObject* Decompressor_copy_strea'
243 Py_DecRef(totalReadPy);
239 Py_DecRef(totalReadPy);
244 Py_DecRef(totalWritePy);
240 Py_DecRef(totalWritePy);
245
241
246 finally:
242 finally:
247 if (output.dst) {
243 if (output.dst) {
248 PyMem_Free(output.dst);
244 PyMem_Free(output.dst);
249 }
245 }
@@ -291,28 +287,19 b' PyObject* Decompressor_decompress(ZstdDe'
291 unsigned long long decompressedSize;
287 unsigned long long decompressedSize;
292 size_t destCapacity;
288 size_t destCapacity;
293 PyObject* result = NULL;
289 PyObject* result = NULL;
294 ZSTD_DCtx* dctx = NULL;
295 void* dictData = NULL;
290 void* dictData = NULL;
296 size_t dictSize = 0;
291 size_t dictSize = 0;
297 size_t zresult;
292 size_t zresult;
298
293
299 #if PY_MAJOR_VERSION >= 3
294 #if PY_MAJOR_VERSION >= 3
300 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n", kwlist,
295 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
301 #else
296 #else
302 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n", kwlist,
297 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
303 #endif
298 #endif
304 &source, &sourceSize, &maxOutputSize)) {
299 kwlist, &source, &sourceSize, &maxOutputSize)) {
305 return NULL;
300 return NULL;
306 }
301 }
307
302
308 dctx = PyMem_Malloc(ZSTD_sizeof_DCtx(self->refdctx));
309 if (!dctx) {
310 PyErr_NoMemory();
311 return NULL;
312 }
313
314 ZSTD_copyDCtx(dctx, self->refdctx);
315
316 if (self->dict) {
303 if (self->dict) {
317 dictData = self->dict->dictData;
304 dictData = self->dict->dictData;
318 dictSize = self->dict->dictSize;
305 dictSize = self->dict->dictSize;
@@ -320,12 +307,12 b' PyObject* Decompressor_decompress(ZstdDe'
320
307
321 if (dictData && !self->ddict) {
308 if (dictData && !self->ddict) {
322 Py_BEGIN_ALLOW_THREADS
309 Py_BEGIN_ALLOW_THREADS
323 self->ddict = ZSTD_createDDict(dictData, dictSize);
310 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
324 Py_END_ALLOW_THREADS
311 Py_END_ALLOW_THREADS
325
312
326 if (!self->ddict) {
313 if (!self->ddict) {
327 PyErr_SetString(ZstdError, "could not create decompression dict");
314 PyErr_SetString(ZstdError, "could not create decompression dict");
328 goto except;
315 return NULL;
329 }
316 }
330 }
317 }
331
318
@@ -335,7 +322,7 b' PyObject* Decompressor_decompress(ZstdDe'
335 if (0 == maxOutputSize) {
322 if (0 == maxOutputSize) {
336 PyErr_SetString(ZstdError, "input data invalid or missing content size "
323 PyErr_SetString(ZstdError, "input data invalid or missing content size "
337 "in frame header");
324 "in frame header");
338 goto except;
325 return NULL;
339 }
326 }
340 else {
327 else {
341 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
328 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
@@ -348,45 +335,39 b' PyObject* Decompressor_decompress(ZstdDe'
348 }
335 }
349
336
350 if (!result) {
337 if (!result) {
351 goto except;
338 return NULL;
352 }
339 }
353
340
354 Py_BEGIN_ALLOW_THREADS
341 Py_BEGIN_ALLOW_THREADS
355 if (self->ddict) {
342 if (self->ddict) {
356 zresult = ZSTD_decompress_usingDDict(dctx, PyBytes_AsString(result), destCapacity,
343 zresult = ZSTD_decompress_usingDDict(self->dctx,
344 PyBytes_AsString(result), destCapacity,
357 source, sourceSize, self->ddict);
345 source, sourceSize, self->ddict);
358 }
346 }
359 else {
347 else {
360 zresult = ZSTD_decompressDCtx(dctx, PyBytes_AsString(result), destCapacity, source, sourceSize);
348 zresult = ZSTD_decompressDCtx(self->dctx,
349 PyBytes_AsString(result), destCapacity, source, sourceSize);
361 }
350 }
362 Py_END_ALLOW_THREADS
351 Py_END_ALLOW_THREADS
363
352
364 if (ZSTD_isError(zresult)) {
353 if (ZSTD_isError(zresult)) {
365 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
354 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
366 goto except;
355 Py_DecRef(result);
356 return NULL;
367 }
357 }
368 else if (decompressedSize && zresult != decompressedSize) {
358 else if (decompressedSize && zresult != decompressedSize) {
369 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
359 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
370 zresult, decompressedSize);
360 zresult, decompressedSize);
371 goto except;
361 Py_DecRef(result);
362 return NULL;
372 }
363 }
373 else if (zresult < destCapacity) {
364 else if (zresult < destCapacity) {
374 if (_PyBytes_Resize(&result, zresult)) {
365 if (_PyBytes_Resize(&result, zresult)) {
375 goto except;
366 Py_DecRef(result);
367 return NULL;
376 }
368 }
377 }
369 }
378
370
379 goto finally;
380
381 except:
382 Py_DecRef(result);
383 result = NULL;
384
385 finally:
386 if (dctx) {
387 PyMem_FREE(dctx);
388 }
389
390 return result;
371 return result;
391 }
372 }
392
373
@@ -455,8 +436,8 b' static ZstdDecompressorIterator* Decompr'
455 ZstdDecompressorIterator* result;
436 ZstdDecompressorIterator* result;
456 size_t skipBytes = 0;
437 size_t skipBytes = 0;
457
438
458 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk", kwlist, &reader,
439 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
459 &inSize, &outSize, &skipBytes)) {
440 &reader, &inSize, &outSize, &skipBytes)) {
460 return NULL;
441 return NULL;
461 }
442 }
462
443
@@ -534,19 +515,14 b' static ZstdDecompressorIterator* Decompr'
534 goto finally;
515 goto finally;
535
516
536 except:
517 except:
537 if (result->reader) {
518 Py_CLEAR(result->reader);
538 Py_DECREF(result->reader);
539 result->reader = NULL;
540 }
541
519
542 if (result->buffer) {
520 if (result->buffer) {
543 PyBuffer_Release(result->buffer);
521 PyBuffer_Release(result->buffer);
544 Py_DECREF(result->buffer);
522 Py_CLEAR(result->buffer);
545 result->buffer = NULL;
546 }
523 }
547
524
548 Py_DECREF(result);
525 Py_CLEAR(result);
549 result = NULL;
550
526
551 finally:
527 finally:
552
528
@@ -577,7 +553,8 b' static ZstdDecompressionWriter* Decompre'
577 size_t outSize = ZSTD_DStreamOutSize();
553 size_t outSize = ZSTD_DStreamOutSize();
578 ZstdDecompressionWriter* result;
554 ZstdDecompressionWriter* result;
579
555
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k", kwlist, &writer, &outSize)) {
556 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
557 &writer, &outSize)) {
581 return NULL;
558 return NULL;
582 }
559 }
583
560
@@ -605,6 +582,200 b' static ZstdDecompressionWriter* Decompre'
605 return result;
582 return result;
606 }
583 }
607
584
585 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
586 "Decompress a series of chunks using the content dictionary chaining technique\n"
587 );
588
589 static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
590 static char* kwlist[] = {
591 "frames",
592 NULL
593 };
594
595 PyObject* chunks;
596 Py_ssize_t chunksLen;
597 Py_ssize_t chunkIndex;
598 char parity = 0;
599 PyObject* chunk;
600 char* chunkData;
601 Py_ssize_t chunkSize;
602 ZSTD_DCtx* dctx = NULL;
603 size_t zresult;
604 ZSTD_frameParams frameParams;
605 void* buffer1 = NULL;
606 size_t buffer1Size = 0;
607 size_t buffer1ContentSize = 0;
608 void* buffer2 = NULL;
609 size_t buffer2Size = 0;
610 size_t buffer2ContentSize = 0;
611 void* destBuffer = NULL;
612 PyObject* result = NULL;
613
614 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
615 kwlist, &PyList_Type, &chunks)) {
616 return NULL;
617 }
618
619 chunksLen = PyList_Size(chunks);
620 if (!chunksLen) {
621 PyErr_SetString(PyExc_ValueError, "empty input chain");
622 return NULL;
623 }
624
625 /* The first chunk should not be using a dictionary. We handle it specially. */
626 chunk = PyList_GetItem(chunks, 0);
627 if (!PyBytes_Check(chunk)) {
628 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
629 return NULL;
630 }
631
632 /* We require that all chunks be zstd frames and that they have content size set. */
633 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
634 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
635 if (ZSTD_isError(zresult)) {
636 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
637 return NULL;
638 }
639 else if (zresult) {
640 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
641 return NULL;
642 }
643
644 if (0 == frameParams.frameContentSize) {
645 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
646 return NULL;
647 }
648
649 dctx = ZSTD_createDCtx();
650 if (!dctx) {
651 PyErr_NoMemory();
652 goto finally;
653 }
654
655 buffer1Size = frameParams.frameContentSize;
656 buffer1 = PyMem_Malloc(buffer1Size);
657 if (!buffer1) {
658 goto finally;
659 }
660
661 Py_BEGIN_ALLOW_THREADS
662 zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
663 Py_END_ALLOW_THREADS
664 if (ZSTD_isError(zresult)) {
665 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
666 goto finally;
667 }
668
669 buffer1ContentSize = zresult;
670
671 /* Special case of a simple chain. */
672 if (1 == chunksLen) {
673 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
674 goto finally;
675 }
676
677 /* This should ideally look at next chunk. But this is slightly simpler. */
678 buffer2Size = frameParams.frameContentSize;
679 buffer2 = PyMem_Malloc(buffer2Size);
680 if (!buffer2) {
681 goto finally;
682 }
683
684 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
685 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
686 used as a content dictionary) and the other holds the new fulltext. The
687 buffers grow when needed but never decrease in size. This limits the
688 memory allocator overhead.
689 */
690 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
691 chunk = PyList_GetItem(chunks, chunkIndex);
692 if (!PyBytes_Check(chunk)) {
693 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
694 goto finally;
695 }
696
697 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
698 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
699 if (ZSTD_isError(zresult)) {
700 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
701 goto finally;
702 }
703 else if (zresult) {
704 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
705 goto finally;
706 }
707
708 if (0 == frameParams.frameContentSize) {
709 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
710 goto finally;
711 }
712
713 parity = chunkIndex % 2;
714
715 /* This could definitely be abstracted to reduce code duplication. */
716 if (parity) {
717 /* Resize destination buffer to hold larger content. */
718 if (buffer2Size < frameParams.frameContentSize) {
719 buffer2Size = frameParams.frameContentSize;
720 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
721 if (!destBuffer) {
722 goto finally;
723 }
724 buffer2 = destBuffer;
725 }
726
727 Py_BEGIN_ALLOW_THREADS
728 zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
729 chunkData, chunkSize, buffer1, buffer1ContentSize);
730 Py_END_ALLOW_THREADS
731 if (ZSTD_isError(zresult)) {
732 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
733 chunkIndex, ZSTD_getErrorName(zresult));
734 goto finally;
735 }
736 buffer2ContentSize = zresult;
737 }
738 else {
739 if (buffer1Size < frameParams.frameContentSize) {
740 buffer1Size = frameParams.frameContentSize;
741 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
742 if (!destBuffer) {
743 goto finally;
744 }
745 buffer1 = destBuffer;
746 }
747
748 Py_BEGIN_ALLOW_THREADS
749 zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
750 chunkData, chunkSize, buffer2, buffer2ContentSize);
751 Py_END_ALLOW_THREADS
752 if (ZSTD_isError(zresult)) {
753 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
754 chunkIndex, ZSTD_getErrorName(zresult));
755 goto finally;
756 }
757 buffer1ContentSize = zresult;
758 }
759 }
760
761 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
762 parity ? buffer2ContentSize : buffer1ContentSize);
763
764 finally:
765 if (buffer2) {
766 PyMem_Free(buffer2);
767 }
768 if (buffer1) {
769 PyMem_Free(buffer1);
770 }
771
772 if (dctx) {
773 ZSTD_freeDCtx(dctx);
774 }
775
776 return result;
777 }
778
608 static PyMethodDef Decompressor_methods[] = {
779 static PyMethodDef Decompressor_methods[] = {
609 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
780 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
610 Decompressor_copy_stream__doc__ },
781 Decompressor_copy_stream__doc__ },
@@ -616,6 +787,8 b' static PyMethodDef Decompressor_methods['
616 Decompressor_read_from__doc__ },
787 Decompressor_read_from__doc__ },
617 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
788 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
618 Decompressor_write_to__doc__ },
789 Decompressor_write_to__doc__ },
790 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
791 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
619 { NULL, NULL }
792 { NULL, NULL }
620 };
793 };
621
794
@@ -18,8 +18,8 b' static PyObject* DictParameters_new(PyTy'
18 unsigned notificationLevel;
18 unsigned notificationLevel;
19 unsigned dictID;
19 unsigned dictID;
20
20
21 if (!PyArg_ParseTuple(args, "IiII", &selectivityLevel, &compressionLevel,
21 if (!PyArg_ParseTuple(args, "IiII:DictParameters",
22 &notificationLevel, &dictID)) {
22 &selectivityLevel, &compressionLevel, &notificationLevel, &dictID)) {
23 return NULL;
23 return NULL;
24 }
24 }
25
25
@@ -40,6 +40,22 b' static void DictParameters_dealloc(PyObj'
40 PyObject_Del(self);
40 PyObject_Del(self);
41 }
41 }
42
42
43 static PyMemberDef DictParameters_members[] = {
44 { "selectivity_level", T_UINT,
45 offsetof(DictParametersObject, selectivityLevel), READONLY,
46 "selectivity level" },
47 { "compression_level", T_INT,
48 offsetof(DictParametersObject, compressionLevel), READONLY,
49 "compression level" },
50 { "notification_level", T_UINT,
51 offsetof(DictParametersObject, notificationLevel), READONLY,
52 "notification level" },
53 { "dict_id", T_UINT,
54 offsetof(DictParametersObject, dictID), READONLY,
55 "dictionary ID" },
56 { NULL }
57 };
58
43 static Py_ssize_t DictParameters_length(PyObject* self) {
59 static Py_ssize_t DictParameters_length(PyObject* self) {
44 return 4;
60 return 4;
45 }
61 }
@@ -102,7 +118,7 b' PyTypeObject DictParametersType = {'
102 0, /* tp_iter */
118 0, /* tp_iter */
103 0, /* tp_iternext */
119 0, /* tp_iternext */
104 0, /* tp_methods */
120 0, /* tp_methods */
105 0, /* tp_members */
121 DictParameters_members, /* tp_members */
106 0, /* tp_getset */
122 0, /* tp_getset */
107 0, /* tp_base */
123 0, /* tp_base */
108 0, /* tp_dict */
124 0, /* tp_dict */
@@ -8,6 +8,7 b''
8
8
9 #define PY_SSIZE_T_CLEAN
9 #define PY_SSIZE_T_CLEAN
10 #include <Python.h>
10 #include <Python.h>
11 #include "structmember.h"
11
12
12 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZDICT_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
@@ -15,7 +16,7 b''
15 #include "zstd.h"
16 #include "zstd.h"
16 #include "zdict.h"
17 #include "zdict.h"
17
18
18 #define PYTHON_ZSTANDARD_VERSION "0.6.0"
19 #define PYTHON_ZSTANDARD_VERSION "0.7.0"
19
20
20 typedef enum {
21 typedef enum {
21 compressorobj_flush_finish,
22 compressorobj_flush_finish,
@@ -37,6 +38,16 b' extern PyTypeObject CompressionParameter'
37
38
38 typedef struct {
39 typedef struct {
39 PyObject_HEAD
40 PyObject_HEAD
41 unsigned long long frameContentSize;
42 unsigned windowSize;
43 unsigned dictID;
44 char checksumFlag;
45 } FrameParametersObject;
46
47 extern PyTypeObject FrameParametersType;
48
49 typedef struct {
50 PyObject_HEAD
40 unsigned selectivityLevel;
51 unsigned selectivityLevel;
41 int compressionLevel;
52 int compressionLevel;
42 unsigned notificationLevel;
53 unsigned notificationLevel;
@@ -115,7 +126,7 b' extern PyTypeObject ZstdCompressorIterat'
115 typedef struct {
126 typedef struct {
116 PyObject_HEAD
127 PyObject_HEAD
117
128
118 ZSTD_DCtx* refdctx;
129 ZSTD_DCtx* dctx;
119
130
120 ZstdCompressionDict* dict;
131 ZstdCompressionDict* dict;
121 ZSTD_DDict* ddict;
132 ZSTD_DDict* ddict;
@@ -172,6 +183,7 b' typedef struct {'
172
183
173 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
184 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
174 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
185 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
186 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
175 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
187 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
176 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
188 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
177 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
189 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
@@ -9,6 +9,7 b' from __future__ import absolute_import'
9 import cffi
9 import cffi
10 import distutils.ccompiler
10 import distutils.ccompiler
11 import os
11 import os
12 import re
12 import subprocess
13 import subprocess
13 import tempfile
14 import tempfile
14
15
@@ -19,6 +20,8 b" SOURCES = ['zstd/%s' % p for p in ("
19 'common/entropy_common.c',
20 'common/entropy_common.c',
20 'common/error_private.c',
21 'common/error_private.c',
21 'common/fse_decompress.c',
22 'common/fse_decompress.c',
23 'common/pool.c',
24 'common/threading.c',
22 'common/xxhash.c',
25 'common/xxhash.c',
23 'common/zstd_common.c',
26 'common/zstd_common.c',
24 'compress/fse_compress.c',
27 'compress/fse_compress.c',
@@ -26,10 +29,17 b" SOURCES = ['zstd/%s' % p for p in ("
26 'compress/zstd_compress.c',
29 'compress/zstd_compress.c',
27 'decompress/huf_decompress.c',
30 'decompress/huf_decompress.c',
28 'decompress/zstd_decompress.c',
31 'decompress/zstd_decompress.c',
32 'dictBuilder/cover.c',
29 'dictBuilder/divsufsort.c',
33 'dictBuilder/divsufsort.c',
30 'dictBuilder/zdict.c',
34 'dictBuilder/zdict.c',
31 )]
35 )]
32
36
37 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
38 ('zstd.h',),
39 ('common', 'pool.h'),
40 ('dictBuilder', 'zdict.h'),
41 )]
42
33 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
43 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
34 'zstd',
44 'zstd',
35 'zstd/common',
45 'zstd/common',
@@ -53,56 +63,92 b" if compiler.compiler_type == 'unix':"
53 args.extend([
63 args.extend([
54 '-E',
64 '-E',
55 '-DZSTD_STATIC_LINKING_ONLY',
65 '-DZSTD_STATIC_LINKING_ONLY',
66 '-DZDICT_STATIC_LINKING_ONLY',
56 ])
67 ])
57 elif compiler.compiler_type == 'msvc':
68 elif compiler.compiler_type == 'msvc':
58 args = [compiler.cc]
69 args = [compiler.cc]
59 args.extend([
70 args.extend([
60 '/EP',
71 '/EP',
61 '/DZSTD_STATIC_LINKING_ONLY',
72 '/DZSTD_STATIC_LINKING_ONLY',
73 '/DZDICT_STATIC_LINKING_ONLY',
62 ])
74 ])
63 else:
75 else:
64 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
76 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
65
77
66 # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
78 def preprocess(path):
67 # This can lead to duplicate declarations. So we strip this include from the
79 # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
68 # preprocessor invocation.
80 # This can lead to duplicate declarations. So we strip this include from the
81 # preprocessor invocation.
82 with open(path, 'rb') as fh:
83 lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
69
84
70 with open(os.path.join(HERE, 'zstd', 'zstd.h'), 'rb') as fh:
85 fd, input_file = tempfile.mkstemp(suffix='.h')
71 lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
86 os.write(fd, b''.join(lines))
72
87 os.close(fd)
73 fd, input_file = tempfile.mkstemp(suffix='.h')
74 os.write(fd, b''.join(lines))
75 os.close(fd)
76
88
77 args.append(input_file)
89 try:
90 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
91 output = process.communicate()[0]
92 ret = process.poll()
93 if ret:
94 raise Exception('preprocessor exited with error')
78
95
79 try:
96 return output
80 process = subprocess.Popen(args, stdout=subprocess.PIPE)
97 finally:
81 output = process.communicate()[0]
98 os.unlink(input_file)
82 ret = process.poll()
83 if ret:
84 raise Exception('preprocessor exited with error')
85 finally:
86 os.unlink(input_file)
87
99
88 def normalize_output():
100
101 def normalize_output(output):
89 lines = []
102 lines = []
90 for line in output.splitlines():
103 for line in output.splitlines():
91 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
104 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
92 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
105 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
93 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
106 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
94
107
108 if line.startswith(b'__attribute__((deprecated('):
109 continue
110 elif b'__declspec(deprecated(' in line:
111 continue
112
95 lines.append(line)
113 lines.append(line)
96
114
97 return b'\n'.join(lines)
115 return b'\n'.join(lines)
98
116
117
99 ffi = cffi.FFI()
118 ffi = cffi.FFI()
100 ffi.set_source('_zstd_cffi', '''
119 ffi.set_source('_zstd_cffi', '''
120 #include "mem.h"
101 #define ZSTD_STATIC_LINKING_ONLY
121 #define ZSTD_STATIC_LINKING_ONLY
102 #include "zstd.h"
122 #include "zstd.h"
123 #define ZDICT_STATIC_LINKING_ONLY
124 #include "pool.h"
125 #include "zdict.h"
103 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
126 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
104
127
105 ffi.cdef(normalize_output().decode('latin1'))
128 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
129
130 sources = []
131
132 for header in HEADERS:
133 preprocessed = preprocess(header)
134 sources.append(normalize_output(preprocessed))
135
136 # Do another pass over source and find constants that were preprocessed
137 # away.
138 with open(header, 'rb') as fh:
139 for line in fh:
140 line = line.strip()
141 m = DEFINE.match(line)
142 if not m:
143 continue
144
145 # The parser doesn't like some constants with complex values.
146 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
147 continue
148
149 sources.append(m.group(0) + b' ...')
150
151 ffi.cdef(u'\n'.join(s.decode('latin1') for s in sources))
106
152
107 if __name__ == '__main__':
153 if __name__ == '__main__':
108 ffi.compile()
154 ffi.compile()
@@ -62,6 +62,7 b' setup('
62 'Programming Language :: Python :: 3.3',
62 'Programming Language :: Python :: 3.3',
63 'Programming Language :: Python :: 3.4',
63 'Programming Language :: Python :: 3.4',
64 'Programming Language :: Python :: 3.5',
64 'Programming Language :: Python :: 3.5',
65 'Programming Language :: Python :: 3.6',
65 ],
66 ],
66 keywords='zstandard zstd compression',
67 keywords='zstandard zstd compression',
67 ext_modules=extensions,
68 ext_modules=extensions,
@@ -12,6 +12,8 b" zstd_sources = ['zstd/%s' % p for p in ("
12 'common/entropy_common.c',
12 'common/entropy_common.c',
13 'common/error_private.c',
13 'common/error_private.c',
14 'common/fse_decompress.c',
14 'common/fse_decompress.c',
15 'common/pool.c',
16 'common/threading.c',
15 'common/xxhash.c',
17 'common/xxhash.c',
16 'common/zstd_common.c',
18 'common/zstd_common.c',
17 'compress/fse_compress.c',
19 'compress/fse_compress.c',
@@ -19,11 +21,13 b" zstd_sources = ['zstd/%s' % p for p in ("
19 'compress/zstd_compress.c',
21 'compress/zstd_compress.c',
20 'decompress/huf_decompress.c',
22 'decompress/huf_decompress.c',
21 'decompress/zstd_decompress.c',
23 'decompress/zstd_decompress.c',
24 'dictBuilder/cover.c',
22 'dictBuilder/divsufsort.c',
25 'dictBuilder/divsufsort.c',
23 'dictBuilder/zdict.c',
26 'dictBuilder/zdict.c',
24 )]
27 )]
25
28
26 zstd_sources_legacy = ['zstd/%s' % p for p in (
29 zstd_sources_legacy = ['zstd/%s' % p for p in (
30 'deprecated/zbuff_common.c',
27 'deprecated/zbuff_compress.c',
31 'deprecated/zbuff_compress.c',
28 'deprecated/zbuff_decompress.c',
32 'deprecated/zbuff_decompress.c',
29 'legacy/zstd_v01.c',
33 'legacy/zstd_v01.c',
@@ -63,6 +67,7 b' ext_sources = ['
63 'c-ext/decompressoriterator.c',
67 'c-ext/decompressoriterator.c',
64 'c-ext/decompressionwriter.c',
68 'c-ext/decompressionwriter.c',
65 'c-ext/dictparams.c',
69 'c-ext/dictparams.c',
70 'c-ext/frameparams.c',
66 ]
71 ]
67
72
68 zstd_depends = [
73 zstd_depends = [
@@ -1,4 +1,50 b''
1 import inspect
1 import io
2 import io
3 import types
4
5
6 def make_cffi(cls):
7 """Decorator to add CFFI versions of each test method."""
8
9 try:
10 import zstd_cffi
11 except ImportError:
12 return cls
13
14 # If CFFI version is available, dynamically construct test methods
15 # that use it.
16
17 for attr in dir(cls):
18 fn = getattr(cls, attr)
19 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
20 continue
21
22 if not fn.__name__.startswith('test_'):
23 continue
24
25 name = '%s_cffi' % fn.__name__
26
27 # Replace the "zstd" symbol with the CFFI module instance. Then copy
28 # the function object and install it in a new attribute.
29 if isinstance(fn, types.FunctionType):
30 globs = dict(fn.__globals__)
31 globs['zstd'] = zstd_cffi
32 new_fn = types.FunctionType(fn.__code__, globs, name,
33 fn.__defaults__, fn.__closure__)
34 new_method = new_fn
35 else:
36 globs = dict(fn.__func__.func_globals)
37 globs['zstd'] = zstd_cffi
38 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
39 fn.__func__.func_defaults,
40 fn.__func__.func_closure)
41 new_method = types.UnboundMethodType(new_fn, fn.im_self,
42 fn.im_class)
43
44 setattr(cls, name, new_method)
45
46 return cls
47
2
48
3 class OpCountingBytesIO(io.BytesIO):
49 class OpCountingBytesIO(io.BytesIO):
4 def __init__(self, *args, **kwargs):
50 def __init__(self, *args, **kwargs):
@@ -10,7 +10,10 b' except ImportError:'
10
10
11 import zstd
11 import zstd
12
12
13 from .common import OpCountingBytesIO
13 from .common import (
14 make_cffi,
15 OpCountingBytesIO,
16 )
14
17
15
18
16 if sys.version_info[0] >= 3:
19 if sys.version_info[0] >= 3:
@@ -19,6 +22,7 b' else:'
19 next = lambda it: it.next()
22 next = lambda it: it.next()
20
23
21
24
25 @make_cffi
22 class TestCompressor(unittest.TestCase):
26 class TestCompressor(unittest.TestCase):
23 def test_level_bounds(self):
27 def test_level_bounds(self):
24 with self.assertRaises(ValueError):
28 with self.assertRaises(ValueError):
@@ -28,18 +32,17 b' class TestCompressor(unittest.TestCase):'
28 zstd.ZstdCompressor(level=23)
32 zstd.ZstdCompressor(level=23)
29
33
30
34
35 @make_cffi
31 class TestCompressor_compress(unittest.TestCase):
36 class TestCompressor_compress(unittest.TestCase):
32 def test_compress_empty(self):
37 def test_compress_empty(self):
33 cctx = zstd.ZstdCompressor(level=1)
38 cctx = zstd.ZstdCompressor(level=1)
34 cctx.compress(b'')
39 result = cctx.compress(b'')
35
40 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
36 cctx = zstd.ZstdCompressor(level=22)
41 params = zstd.get_frame_parameters(result)
37 cctx.compress(b'')
42 self.assertEqual(params.content_size, 0)
38
43 self.assertEqual(params.window_size, 524288)
39 def test_compress_empty(self):
44 self.assertEqual(params.dict_id, 0)
40 cctx = zstd.ZstdCompressor(level=1)
45 self.assertFalse(params.has_checksum, 0)
41 self.assertEqual(cctx.compress(b''),
42 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
43
46
44 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
47 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
45 # is fixed.
48 # is fixed.
@@ -59,6 +62,13 b' class TestCompressor_compress(unittest.T'
59 self.assertEqual(len(result), 999)
62 self.assertEqual(len(result), 999)
60 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
63 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
61
64
65 # This matches the test for read_from() below.
66 cctx = zstd.ZstdCompressor(level=1)
67 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
68 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
69 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
70 b'\x02\x09\x00\x00\x6f')
71
62 def test_write_checksum(self):
72 def test_write_checksum(self):
63 cctx = zstd.ZstdCompressor(level=1)
73 cctx = zstd.ZstdCompressor(level=1)
64 no_checksum = cctx.compress(b'foobar')
74 no_checksum = cctx.compress(b'foobar')
@@ -67,6 +77,12 b' class TestCompressor_compress(unittest.T'
67
77
68 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
78 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
69
79
80 no_params = zstd.get_frame_parameters(no_checksum)
81 with_params = zstd.get_frame_parameters(with_checksum)
82
83 self.assertFalse(no_params.has_checksum)
84 self.assertTrue(with_params.has_checksum)
85
70 def test_write_content_size(self):
86 def test_write_content_size(self):
71 cctx = zstd.ZstdCompressor(level=1)
87 cctx = zstd.ZstdCompressor(level=1)
72 no_size = cctx.compress(b'foobar' * 256)
88 no_size = cctx.compress(b'foobar' * 256)
@@ -75,6 +91,11 b' class TestCompressor_compress(unittest.T'
75
91
76 self.assertEqual(len(with_size), len(no_size) + 1)
92 self.assertEqual(len(with_size), len(no_size) + 1)
77
93
94 no_params = zstd.get_frame_parameters(no_size)
95 with_params = zstd.get_frame_parameters(with_size)
96 self.assertEqual(no_params.content_size, 0)
97 self.assertEqual(with_params.content_size, 1536)
98
78 def test_no_dict_id(self):
99 def test_no_dict_id(self):
79 samples = []
100 samples = []
80 for i in range(128):
101 for i in range(128):
@@ -92,6 +113,11 b' class TestCompressor_compress(unittest.T'
92
113
93 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
114 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
94
115
116 no_params = zstd.get_frame_parameters(no_dict_id)
117 with_params = zstd.get_frame_parameters(with_dict_id)
118 self.assertEqual(no_params.dict_id, 0)
119 self.assertEqual(with_params.dict_id, 1584102229)
120
95 def test_compress_dict_multiple(self):
121 def test_compress_dict_multiple(self):
96 samples = []
122 samples = []
97 for i in range(128):
123 for i in range(128):
@@ -107,6 +133,7 b' class TestCompressor_compress(unittest.T'
107 cctx.compress(b'foo bar foobar foo bar foobar')
133 cctx.compress(b'foo bar foobar foo bar foobar')
108
134
109
135
136 @make_cffi
110 class TestCompressor_compressobj(unittest.TestCase):
137 class TestCompressor_compressobj(unittest.TestCase):
111 def test_compressobj_empty(self):
138 def test_compressobj_empty(self):
112 cctx = zstd.ZstdCompressor(level=1)
139 cctx = zstd.ZstdCompressor(level=1)
@@ -127,6 +154,12 b' class TestCompressor_compressobj(unittes'
127 self.assertEqual(len(result), 999)
154 self.assertEqual(len(result), 999)
128 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
155 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
129
156
157 params = zstd.get_frame_parameters(result)
158 self.assertEqual(params.content_size, 0)
159 self.assertEqual(params.window_size, 1048576)
160 self.assertEqual(params.dict_id, 0)
161 self.assertFalse(params.has_checksum)
162
130 def test_write_checksum(self):
163 def test_write_checksum(self):
131 cctx = zstd.ZstdCompressor(level=1)
164 cctx = zstd.ZstdCompressor(level=1)
132 cobj = cctx.compressobj()
165 cobj = cctx.compressobj()
@@ -135,6 +168,15 b' class TestCompressor_compressobj(unittes'
135 cobj = cctx.compressobj()
168 cobj = cctx.compressobj()
136 with_checksum = cobj.compress(b'foobar') + cobj.flush()
169 with_checksum = cobj.compress(b'foobar') + cobj.flush()
137
170
171 no_params = zstd.get_frame_parameters(no_checksum)
172 with_params = zstd.get_frame_parameters(with_checksum)
173 self.assertEqual(no_params.content_size, 0)
174 self.assertEqual(with_params.content_size, 0)
175 self.assertEqual(no_params.dict_id, 0)
176 self.assertEqual(with_params.dict_id, 0)
177 self.assertFalse(no_params.has_checksum)
178 self.assertTrue(with_params.has_checksum)
179
138 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
180 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
139
181
140 def test_write_content_size(self):
182 def test_write_content_size(self):
@@ -145,6 +187,15 b' class TestCompressor_compressobj(unittes'
145 cobj = cctx.compressobj(size=len(b'foobar' * 256))
187 cobj = cctx.compressobj(size=len(b'foobar' * 256))
146 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
188 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
147
189
190 no_params = zstd.get_frame_parameters(no_size)
191 with_params = zstd.get_frame_parameters(with_size)
192 self.assertEqual(no_params.content_size, 0)
193 self.assertEqual(with_params.content_size, 1536)
194 self.assertEqual(no_params.dict_id, 0)
195 self.assertEqual(with_params.dict_id, 0)
196 self.assertFalse(no_params.has_checksum)
197 self.assertFalse(with_params.has_checksum)
198
148 self.assertEqual(len(with_size), len(no_size) + 1)
199 self.assertEqual(len(with_size), len(no_size) + 1)
149
200
150 def test_compress_after_finished(self):
201 def test_compress_after_finished(self):
@@ -187,6 +238,7 b' class TestCompressor_compressobj(unittes'
187 self.assertEqual(header, b'\x01\x00\x00')
238 self.assertEqual(header, b'\x01\x00\x00')
188
239
189
240
241 @make_cffi
190 class TestCompressor_copy_stream(unittest.TestCase):
242 class TestCompressor_copy_stream(unittest.TestCase):
191 def test_no_read(self):
243 def test_no_read(self):
192 source = object()
244 source = object()
@@ -229,6 +281,12 b' class TestCompressor_copy_stream(unittes'
229 self.assertEqual(r, 255 * 16384)
281 self.assertEqual(r, 255 * 16384)
230 self.assertEqual(w, 999)
282 self.assertEqual(w, 999)
231
283
284 params = zstd.get_frame_parameters(dest.getvalue())
285 self.assertEqual(params.content_size, 0)
286 self.assertEqual(params.window_size, 1048576)
287 self.assertEqual(params.dict_id, 0)
288 self.assertFalse(params.has_checksum)
289
232 def test_write_checksum(self):
290 def test_write_checksum(self):
233 source = io.BytesIO(b'foobar')
291 source = io.BytesIO(b'foobar')
234 no_checksum = io.BytesIO()
292 no_checksum = io.BytesIO()
@@ -244,6 +302,15 b' class TestCompressor_copy_stream(unittes'
244 self.assertEqual(len(with_checksum.getvalue()),
302 self.assertEqual(len(with_checksum.getvalue()),
245 len(no_checksum.getvalue()) + 4)
303 len(no_checksum.getvalue()) + 4)
246
304
305 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
306 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
307 self.assertEqual(no_params.content_size, 0)
308 self.assertEqual(with_params.content_size, 0)
309 self.assertEqual(no_params.dict_id, 0)
310 self.assertEqual(with_params.dict_id, 0)
311 self.assertFalse(no_params.has_checksum)
312 self.assertTrue(with_params.has_checksum)
313
247 def test_write_content_size(self):
314 def test_write_content_size(self):
248 source = io.BytesIO(b'foobar' * 256)
315 source = io.BytesIO(b'foobar' * 256)
249 no_size = io.BytesIO()
316 no_size = io.BytesIO()
@@ -268,6 +335,15 b' class TestCompressor_copy_stream(unittes'
268 self.assertEqual(len(with_size.getvalue()),
335 self.assertEqual(len(with_size.getvalue()),
269 len(no_size.getvalue()) + 1)
336 len(no_size.getvalue()) + 1)
270
337
338 no_params = zstd.get_frame_parameters(no_size.getvalue())
339 with_params = zstd.get_frame_parameters(with_size.getvalue())
340 self.assertEqual(no_params.content_size, 0)
341 self.assertEqual(with_params.content_size, 1536)
342 self.assertEqual(no_params.dict_id, 0)
343 self.assertEqual(with_params.dict_id, 0)
344 self.assertFalse(no_params.has_checksum)
345 self.assertFalse(with_params.has_checksum)
346
271 def test_read_write_size(self):
347 def test_read_write_size(self):
272 source = OpCountingBytesIO(b'foobarfoobar')
348 source = OpCountingBytesIO(b'foobarfoobar')
273 dest = OpCountingBytesIO()
349 dest = OpCountingBytesIO()
@@ -288,18 +364,25 b' def compress(data, level):'
288 return buffer.getvalue()
364 return buffer.getvalue()
289
365
290
366
367 @make_cffi
291 class TestCompressor_write_to(unittest.TestCase):
368 class TestCompressor_write_to(unittest.TestCase):
292 def test_empty(self):
369 def test_empty(self):
293 self.assertEqual(compress(b'', 1),
370 result = compress(b'', 1)
294 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
371 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
372
373 params = zstd.get_frame_parameters(result)
374 self.assertEqual(params.content_size, 0)
375 self.assertEqual(params.window_size, 524288)
376 self.assertEqual(params.dict_id, 0)
377 self.assertFalse(params.has_checksum)
295
378
296 def test_multiple_compress(self):
379 def test_multiple_compress(self):
297 buffer = io.BytesIO()
380 buffer = io.BytesIO()
298 cctx = zstd.ZstdCompressor(level=5)
381 cctx = zstd.ZstdCompressor(level=5)
299 with cctx.write_to(buffer) as compressor:
382 with cctx.write_to(buffer) as compressor:
300 compressor.write(b'foo')
383 self.assertEqual(compressor.write(b'foo'), 0)
301 compressor.write(b'bar')
384 self.assertEqual(compressor.write(b'bar'), 0)
302 compressor.write(b'x' * 8192)
385 self.assertEqual(compressor.write(b'x' * 8192), 0)
303
386
304 result = buffer.getvalue()
387 result = buffer.getvalue()
305 self.assertEqual(result,
388 self.assertEqual(result,
@@ -318,11 +401,23 b' class TestCompressor_write_to(unittest.T'
318 buffer = io.BytesIO()
401 buffer = io.BytesIO()
319 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
402 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
320 with cctx.write_to(buffer) as compressor:
403 with cctx.write_to(buffer) as compressor:
321 compressor.write(b'foo')
404 self.assertEqual(compressor.write(b'foo'), 0)
322 compressor.write(b'bar')
405 self.assertEqual(compressor.write(b'bar'), 0)
323 compressor.write(b'foo' * 16384)
406 self.assertEqual(compressor.write(b'foo' * 16384), 634)
324
407
325 compressed = buffer.getvalue()
408 compressed = buffer.getvalue()
409
410 params = zstd.get_frame_parameters(compressed)
411 self.assertEqual(params.content_size, 0)
412 self.assertEqual(params.window_size, 1024)
413 self.assertEqual(params.dict_id, d.dict_id())
414 self.assertFalse(params.has_checksum)
415
416 self.assertEqual(compressed[0:32],
417 b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
418 b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
419 b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
420
326 h = hashlib.sha1(compressed).hexdigest()
421 h = hashlib.sha1(compressed).hexdigest()
327 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
422 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
328
423
@@ -332,11 +427,18 b' class TestCompressor_write_to(unittest.T'
332 buffer = io.BytesIO()
427 buffer = io.BytesIO()
333 cctx = zstd.ZstdCompressor(compression_params=params)
428 cctx = zstd.ZstdCompressor(compression_params=params)
334 with cctx.write_to(buffer) as compressor:
429 with cctx.write_to(buffer) as compressor:
335 compressor.write(b'foo')
430 self.assertEqual(compressor.write(b'foo'), 0)
336 compressor.write(b'bar')
431 self.assertEqual(compressor.write(b'bar'), 0)
337 compressor.write(b'foobar' * 16384)
432 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
338
433
339 compressed = buffer.getvalue()
434 compressed = buffer.getvalue()
435
436 params = zstd.get_frame_parameters(compressed)
437 self.assertEqual(params.content_size, 0)
438 self.assertEqual(params.window_size, 1048576)
439 self.assertEqual(params.dict_id, 0)
440 self.assertFalse(params.has_checksum)
441
340 h = hashlib.sha1(compressed).hexdigest()
442 h = hashlib.sha1(compressed).hexdigest()
341 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
443 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
342
444
@@ -344,12 +446,21 b' class TestCompressor_write_to(unittest.T'
344 no_checksum = io.BytesIO()
446 no_checksum = io.BytesIO()
345 cctx = zstd.ZstdCompressor(level=1)
447 cctx = zstd.ZstdCompressor(level=1)
346 with cctx.write_to(no_checksum) as compressor:
448 with cctx.write_to(no_checksum) as compressor:
347 compressor.write(b'foobar')
449 self.assertEqual(compressor.write(b'foobar'), 0)
348
450
349 with_checksum = io.BytesIO()
451 with_checksum = io.BytesIO()
350 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
452 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
351 with cctx.write_to(with_checksum) as compressor:
453 with cctx.write_to(with_checksum) as compressor:
352 compressor.write(b'foobar')
454 self.assertEqual(compressor.write(b'foobar'), 0)
455
456 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
457 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
458 self.assertEqual(no_params.content_size, 0)
459 self.assertEqual(with_params.content_size, 0)
460 self.assertEqual(no_params.dict_id, 0)
461 self.assertEqual(with_params.dict_id, 0)
462 self.assertFalse(no_params.has_checksum)
463 self.assertTrue(with_params.has_checksum)
353
464
354 self.assertEqual(len(with_checksum.getvalue()),
465 self.assertEqual(len(with_checksum.getvalue()),
355 len(no_checksum.getvalue()) + 4)
466 len(no_checksum.getvalue()) + 4)
@@ -358,12 +469,12 b' class TestCompressor_write_to(unittest.T'
358 no_size = io.BytesIO()
469 no_size = io.BytesIO()
359 cctx = zstd.ZstdCompressor(level=1)
470 cctx = zstd.ZstdCompressor(level=1)
360 with cctx.write_to(no_size) as compressor:
471 with cctx.write_to(no_size) as compressor:
361 compressor.write(b'foobar' * 256)
472 self.assertEqual(compressor.write(b'foobar' * 256), 0)
362
473
363 with_size = io.BytesIO()
474 with_size = io.BytesIO()
364 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
475 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
365 with cctx.write_to(with_size) as compressor:
476 with cctx.write_to(with_size) as compressor:
366 compressor.write(b'foobar' * 256)
477 self.assertEqual(compressor.write(b'foobar' * 256), 0)
367
478
368 # Source size is not known in streaming mode, so header not
479 # Source size is not known in streaming mode, so header not
369 # written.
480 # written.
@@ -373,7 +484,16 b' class TestCompressor_write_to(unittest.T'
373 # Declaring size will write the header.
484 # Declaring size will write the header.
374 with_size = io.BytesIO()
485 with_size = io.BytesIO()
375 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
486 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
376 compressor.write(b'foobar' * 256)
487 self.assertEqual(compressor.write(b'foobar' * 256), 0)
488
489 no_params = zstd.get_frame_parameters(no_size.getvalue())
490 with_params = zstd.get_frame_parameters(with_size.getvalue())
491 self.assertEqual(no_params.content_size, 0)
492 self.assertEqual(with_params.content_size, 1536)
493 self.assertEqual(no_params.dict_id, 0)
494 self.assertEqual(with_params.dict_id, 0)
495 self.assertFalse(no_params.has_checksum)
496 self.assertFalse(with_params.has_checksum)
377
497
378 self.assertEqual(len(with_size.getvalue()),
498 self.assertEqual(len(with_size.getvalue()),
379 len(no_size.getvalue()) + 1)
499 len(no_size.getvalue()) + 1)
@@ -390,12 +510,21 b' class TestCompressor_write_to(unittest.T'
390 with_dict_id = io.BytesIO()
510 with_dict_id = io.BytesIO()
391 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
511 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
392 with cctx.write_to(with_dict_id) as compressor:
512 with cctx.write_to(with_dict_id) as compressor:
393 compressor.write(b'foobarfoobar')
513 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
394
514
395 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
515 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
396 no_dict_id = io.BytesIO()
516 no_dict_id = io.BytesIO()
397 with cctx.write_to(no_dict_id) as compressor:
517 with cctx.write_to(no_dict_id) as compressor:
398 compressor.write(b'foobarfoobar')
518 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
519
520 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
521 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
522 self.assertEqual(no_params.content_size, 0)
523 self.assertEqual(with_params.content_size, 0)
524 self.assertEqual(no_params.dict_id, 0)
525 self.assertEqual(with_params.dict_id, d.dict_id())
526 self.assertFalse(no_params.has_checksum)
527 self.assertFalse(with_params.has_checksum)
399
528
400 self.assertEqual(len(with_dict_id.getvalue()),
529 self.assertEqual(len(with_dict_id.getvalue()),
401 len(no_dict_id.getvalue()) + 4)
530 len(no_dict_id.getvalue()) + 4)
@@ -412,9 +541,9 b' class TestCompressor_write_to(unittest.T'
412 cctx = zstd.ZstdCompressor(level=3)
541 cctx = zstd.ZstdCompressor(level=3)
413 dest = OpCountingBytesIO()
542 dest = OpCountingBytesIO()
414 with cctx.write_to(dest, write_size=1) as compressor:
543 with cctx.write_to(dest, write_size=1) as compressor:
415 compressor.write(b'foo')
544 self.assertEqual(compressor.write(b'foo'), 0)
416 compressor.write(b'bar')
545 self.assertEqual(compressor.write(b'bar'), 0)
417 compressor.write(b'foobar')
546 self.assertEqual(compressor.write(b'foobar'), 0)
418
547
419 self.assertEqual(len(dest.getvalue()), dest._write_count)
548 self.assertEqual(len(dest.getvalue()), dest._write_count)
420
549
@@ -422,15 +551,15 b' class TestCompressor_write_to(unittest.T'
422 cctx = zstd.ZstdCompressor(level=3)
551 cctx = zstd.ZstdCompressor(level=3)
423 dest = OpCountingBytesIO()
552 dest = OpCountingBytesIO()
424 with cctx.write_to(dest) as compressor:
553 with cctx.write_to(dest) as compressor:
425 compressor.write(b'foo')
554 self.assertEqual(compressor.write(b'foo'), 0)
426 self.assertEqual(dest._write_count, 0)
555 self.assertEqual(dest._write_count, 0)
427 compressor.flush()
556 self.assertEqual(compressor.flush(), 12)
428 self.assertEqual(dest._write_count, 1)
557 self.assertEqual(dest._write_count, 1)
429 compressor.write(b'bar')
558 self.assertEqual(compressor.write(b'bar'), 0)
430 self.assertEqual(dest._write_count, 1)
559 self.assertEqual(dest._write_count, 1)
431 compressor.flush()
560 self.assertEqual(compressor.flush(), 6)
432 self.assertEqual(dest._write_count, 2)
561 self.assertEqual(dest._write_count, 2)
433 compressor.write(b'baz')
562 self.assertEqual(compressor.write(b'baz'), 0)
434
563
435 self.assertEqual(dest._write_count, 3)
564 self.assertEqual(dest._write_count, 3)
436
565
@@ -438,10 +567,10 b' class TestCompressor_write_to(unittest.T'
438 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
567 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
439 dest = OpCountingBytesIO()
568 dest = OpCountingBytesIO()
440 with cctx.write_to(dest) as compressor:
569 with cctx.write_to(dest) as compressor:
441 compressor.write(b'foobar' * 8192)
570 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
442 count = dest._write_count
571 count = dest._write_count
443 offset = dest.tell()
572 offset = dest.tell()
444 compressor.flush()
573 self.assertEqual(compressor.flush(), 23)
445 self.assertGreater(dest._write_count, count)
574 self.assertGreater(dest._write_count, count)
446 self.assertGreater(dest.tell(), offset)
575 self.assertGreater(dest.tell(), offset)
447 offset = dest.tell()
576 offset = dest.tell()
@@ -456,18 +585,22 b' class TestCompressor_write_to(unittest.T'
456 self.assertEqual(header, b'\x01\x00\x00')
585 self.assertEqual(header, b'\x01\x00\x00')
457
586
458
587
588 @make_cffi
459 class TestCompressor_read_from(unittest.TestCase):
589 class TestCompressor_read_from(unittest.TestCase):
460 def test_type_validation(self):
590 def test_type_validation(self):
461 cctx = zstd.ZstdCompressor()
591 cctx = zstd.ZstdCompressor()
462
592
463 # Object with read() works.
593 # Object with read() works.
464 cctx.read_from(io.BytesIO())
594 for chunk in cctx.read_from(io.BytesIO()):
595 pass
465
596
466 # Buffer protocol works.
597 # Buffer protocol works.
467 cctx.read_from(b'foobar')
598 for chunk in cctx.read_from(b'foobar'):
599 pass
468
600
469 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
601 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
470 cctx.read_from(True)
602 for chunk in cctx.read_from(True):
603 pass
471
604
472 def test_read_empty(self):
605 def test_read_empty(self):
473 cctx = zstd.ZstdCompressor(level=1)
606 cctx = zstd.ZstdCompressor(level=1)
@@ -521,6 +654,12 b' class TestCompressor_read_from(unittest.'
521 # We should get the same output as the one-shot compression mechanism.
654 # We should get the same output as the one-shot compression mechanism.
522 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
655 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
523
656
657 params = zstd.get_frame_parameters(b''.join(chunks))
658 self.assertEqual(params.content_size, 0)
659 self.assertEqual(params.window_size, 262144)
660 self.assertEqual(params.dict_id, 0)
661 self.assertFalse(params.has_checksum)
662
524 # Now check the buffer protocol.
663 # Now check the buffer protocol.
525 it = cctx.read_from(source.getvalue())
664 it = cctx.read_from(source.getvalue())
526 chunks = list(it)
665 chunks = list(it)
@@ -13,6 +13,12 b' except ImportError:'
13
13
14 import zstd
14 import zstd
15
15
16 from . common import (
17 make_cffi,
18 )
19
20
21 @make_cffi
16 class TestCompressionParameters(unittest.TestCase):
22 class TestCompressionParameters(unittest.TestCase):
17 def test_init_bad_arg_type(self):
23 def test_init_bad_arg_type(self):
18 with self.assertRaises(TypeError):
24 with self.assertRaises(TypeError):
@@ -42,7 +48,81 b' class TestCompressionParameters(unittest'
42 p = zstd.get_compression_parameters(1)
48 p = zstd.get_compression_parameters(1)
43 self.assertIsInstance(p, zstd.CompressionParameters)
49 self.assertIsInstance(p, zstd.CompressionParameters)
44
50
45 self.assertEqual(p[0], 19)
51 self.assertEqual(p.window_log, 19)
52
53 def test_members(self):
54 p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
55 self.assertEqual(p.window_log, 10)
56 self.assertEqual(p.chain_log, 6)
57 self.assertEqual(p.hash_log, 7)
58 self.assertEqual(p.search_log, 4)
59 self.assertEqual(p.search_length, 5)
60 self.assertEqual(p.target_length, 8)
61 self.assertEqual(p.strategy, 1)
62
63
64 @make_cffi
65 class TestFrameParameters(unittest.TestCase):
66 def test_invalid_type(self):
67 with self.assertRaises(TypeError):
68 zstd.get_frame_parameters(None)
69
70 with self.assertRaises(TypeError):
71 zstd.get_frame_parameters(u'foobarbaz')
72
73 def test_invalid_input_sizes(self):
74 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
75 zstd.get_frame_parameters(b'')
76
77 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
78 zstd.get_frame_parameters(zstd.FRAME_HEADER)
79
80 def test_invalid_frame(self):
81 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
82 zstd.get_frame_parameters(b'foobarbaz')
83
84 def test_attributes(self):
85 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
86 self.assertEqual(params.content_size, 0)
87 self.assertEqual(params.window_size, 1024)
88 self.assertEqual(params.dict_id, 0)
89 self.assertFalse(params.has_checksum)
90
91 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
92 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
93 self.assertEqual(params.content_size, 0)
94 self.assertEqual(params.window_size, 1024)
95 self.assertEqual(params.dict_id, 255)
96 self.assertFalse(params.has_checksum)
97
98 # Lowest 3rd bit indicates if checksum is present.
99 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
100 self.assertEqual(params.content_size, 0)
101 self.assertEqual(params.window_size, 1024)
102 self.assertEqual(params.dict_id, 0)
103 self.assertTrue(params.has_checksum)
104
105 # Upper 2 bits indicate content size.
106 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
107 self.assertEqual(params.content_size, 511)
108 self.assertEqual(params.window_size, 1024)
109 self.assertEqual(params.dict_id, 0)
110 self.assertFalse(params.has_checksum)
111
112 # Window descriptor is 2nd byte after frame header.
113 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
114 self.assertEqual(params.content_size, 0)
115 self.assertEqual(params.window_size, 262144)
116 self.assertEqual(params.dict_id, 0)
117 self.assertFalse(params.has_checksum)
118
119 # Set multiple things.
120 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
121 self.assertEqual(params.content_size, 272)
122 self.assertEqual(params.window_size, 262144)
123 self.assertEqual(params.dict_id, 15)
124 self.assertTrue(params.has_checksum)
125
46
126
47 if hypothesis:
127 if hypothesis:
48 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
128 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
@@ -65,6 +145,8 b' if hypothesis:'
65 zstd.STRATEGY_BTLAZY2,
145 zstd.STRATEGY_BTLAZY2,
66 zstd.STRATEGY_BTOPT))
146 zstd.STRATEGY_BTOPT))
67
147
148
149 @make_cffi
68 class TestCompressionParametersHypothesis(unittest.TestCase):
150 class TestCompressionParametersHypothesis(unittest.TestCase):
69 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
151 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
70 s_searchlength, s_targetlength, s_strategy)
152 s_searchlength, s_targetlength, s_strategy)
@@ -73,9 +155,6 b' if hypothesis:'
73 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
155 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
74 searchlog, searchlength,
156 searchlog, searchlength,
75 targetlength, strategy)
157 targetlength, strategy)
76 self.assertEqual(tuple(p),
77 (windowlog, chainlog, hashlog, searchlog,
78 searchlength, targetlength, strategy))
79
158
80 # Verify we can instantiate a compressor with the supplied values.
159 # Verify we can instantiate a compressor with the supplied values.
81 # ZSTD_checkCParams moves the goal posts on us from what's advertised
160 # ZSTD_checkCParams moves the goal posts on us from what's advertised
@@ -10,7 +10,10 b' except ImportError:'
10
10
11 import zstd
11 import zstd
12
12
13 from .common import OpCountingBytesIO
13 from .common import (
14 make_cffi,
15 OpCountingBytesIO,
16 )
14
17
15
18
16 if sys.version_info[0] >= 3:
19 if sys.version_info[0] >= 3:
@@ -19,6 +22,7 b' else:'
19 next = lambda it: it.next()
22 next = lambda it: it.next()
20
23
21
24
25 @make_cffi
22 class TestDecompressor_decompress(unittest.TestCase):
26 class TestDecompressor_decompress(unittest.TestCase):
23 def test_empty_input(self):
27 def test_empty_input(self):
24 dctx = zstd.ZstdDecompressor()
28 dctx = zstd.ZstdDecompressor()
@@ -119,6 +123,7 b' class TestDecompressor_decompress(unitte'
119 self.assertEqual(decompressed, sources[i])
123 self.assertEqual(decompressed, sources[i])
120
124
121
125
126 @make_cffi
122 class TestDecompressor_copy_stream(unittest.TestCase):
127 class TestDecompressor_copy_stream(unittest.TestCase):
123 def test_no_read(self):
128 def test_no_read(self):
124 source = object()
129 source = object()
@@ -180,6 +185,7 b' class TestDecompressor_copy_stream(unitt'
180 self.assertEqual(dest._write_count, len(dest.getvalue()))
185 self.assertEqual(dest._write_count, len(dest.getvalue()))
181
186
182
187
188 @make_cffi
183 class TestDecompressor_decompressobj(unittest.TestCase):
189 class TestDecompressor_decompressobj(unittest.TestCase):
184 def test_simple(self):
190 def test_simple(self):
185 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
191 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
@@ -207,6 +213,7 b' def decompress_via_writer(data):'
207 return buffer.getvalue()
213 return buffer.getvalue()
208
214
209
215
216 @make_cffi
210 class TestDecompressor_write_to(unittest.TestCase):
217 class TestDecompressor_write_to(unittest.TestCase):
211 def test_empty_roundtrip(self):
218 def test_empty_roundtrip(self):
212 cctx = zstd.ZstdCompressor()
219 cctx = zstd.ZstdCompressor()
@@ -256,14 +263,14 b' class TestDecompressor_write_to(unittest'
256 buffer = io.BytesIO()
263 buffer = io.BytesIO()
257 cctx = zstd.ZstdCompressor(dict_data=d)
264 cctx = zstd.ZstdCompressor(dict_data=d)
258 with cctx.write_to(buffer) as compressor:
265 with cctx.write_to(buffer) as compressor:
259 compressor.write(orig)
266 self.assertEqual(compressor.write(orig), 1544)
260
267
261 compressed = buffer.getvalue()
268 compressed = buffer.getvalue()
262 buffer = io.BytesIO()
269 buffer = io.BytesIO()
263
270
264 dctx = zstd.ZstdDecompressor(dict_data=d)
271 dctx = zstd.ZstdDecompressor(dict_data=d)
265 with dctx.write_to(buffer) as decompressor:
272 with dctx.write_to(buffer) as decompressor:
266 decompressor.write(compressed)
273 self.assertEqual(decompressor.write(compressed), len(orig))
267
274
268 self.assertEqual(buffer.getvalue(), orig)
275 self.assertEqual(buffer.getvalue(), orig)
269
276
@@ -291,6 +298,7 b' class TestDecompressor_write_to(unittest'
291 self.assertEqual(dest._write_count, len(dest.getvalue()))
298 self.assertEqual(dest._write_count, len(dest.getvalue()))
292
299
293
300
301 @make_cffi
294 class TestDecompressor_read_from(unittest.TestCase):
302 class TestDecompressor_read_from(unittest.TestCase):
295 def test_type_validation(self):
303 def test_type_validation(self):
296 dctx = zstd.ZstdDecompressor()
304 dctx = zstd.ZstdDecompressor()
@@ -302,7 +310,7 b' class TestDecompressor_read_from(unittes'
302 dctx.read_from(b'foobar')
310 dctx.read_from(b'foobar')
303
311
304 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
312 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
305 dctx.read_from(True)
313 b''.join(dctx.read_from(True))
306
314
307 def test_empty_input(self):
315 def test_empty_input(self):
308 dctx = zstd.ZstdDecompressor()
316 dctx = zstd.ZstdDecompressor()
@@ -351,7 +359,7 b' class TestDecompressor_read_from(unittes'
351 dctx = zstd.ZstdDecompressor()
359 dctx = zstd.ZstdDecompressor()
352
360
353 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
361 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
354 dctx.read_from(b'', skip_bytes=1, read_size=1)
362 b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
355
363
356 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
364 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
357 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
365 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
@@ -476,3 +484,94 b' class TestDecompressor_read_from(unittes'
476 self.assertEqual(len(chunk), 1)
484 self.assertEqual(len(chunk), 1)
477
485
478 self.assertEqual(source._read_count, len(source.getvalue()))
486 self.assertEqual(source._read_count, len(source.getvalue()))
487
488
489 @make_cffi
490 class TestDecompressor_content_dict_chain(unittest.TestCase):
491 def test_bad_inputs_simple(self):
492 dctx = zstd.ZstdDecompressor()
493
494 with self.assertRaises(TypeError):
495 dctx.decompress_content_dict_chain(b'foo')
496
497 with self.assertRaises(TypeError):
498 dctx.decompress_content_dict_chain((b'foo', b'bar'))
499
500 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
501 dctx.decompress_content_dict_chain([])
502
503 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
504 dctx.decompress_content_dict_chain([u'foo'])
505
506 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
507 dctx.decompress_content_dict_chain([True])
508
509 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
510 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
511
512 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
513 dctx.decompress_content_dict_chain([b'foo' * 8])
514
515 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
516
517 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
518 dctx.decompress_content_dict_chain([no_size])
519
520 # Corrupt first frame.
521 frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
522 frame = frame[0:12] + frame[15:]
523 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
524 dctx.decompress_content_dict_chain([frame])
525
526 def test_bad_subsequent_input(self):
527 initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
528
529 dctx = zstd.ZstdDecompressor()
530
531 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
532 dctx.decompress_content_dict_chain([initial, u'foo'])
533
534 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
535 dctx.decompress_content_dict_chain([initial, None])
536
537 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
538 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
539
540 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
541 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
542
543 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
544
545 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
546 dctx.decompress_content_dict_chain([initial, no_size])
547
548 # Corrupt second frame.
549 cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
550 frame = cctx.compress(b'bar' * 64)
551 frame = frame[0:12] + frame[15:]
552
553 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
554 dctx.decompress_content_dict_chain([initial, frame])
555
556 def test_simple(self):
557 original = [
558 b'foo' * 64,
559 b'foobar' * 64,
560 b'baz' * 64,
561 b'foobaz' * 64,
562 b'foobarbaz' * 64,
563 ]
564
565 chunks = []
566 chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
567 for i, chunk in enumerate(original[1:]):
568 d = zstd.ZstdCompressionDict(original[i])
569 cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
570 chunks.append(cctx.compress(chunk))
571
572 for i in range(1, len(original)):
573 chain = chunks[0:i]
574 expected = original[i - 1]
575 dctx = zstd.ZstdDecompressor()
576 decompressed = dctx.decompress_content_dict_chain(chain)
577 self.assertEqual(decompressed, expected)
@@ -5,7 +5,12 b' except ImportError:'
5
5
6 import zstd
6 import zstd
7
7
8 from . common import (
9 make_cffi,
10 )
8
11
12
13 @make_cffi
9 class TestSizes(unittest.TestCase):
14 class TestSizes(unittest.TestCase):
10 def test_decompression_size(self):
15 def test_decompression_size(self):
11 size = zstd.estimate_decompression_context_size()
16 size = zstd.estimate_decompression_context_size()
@@ -7,9 +7,15 b' except ImportError:'
7
7
8 import zstd
8 import zstd
9
9
10 from . common import (
11 make_cffi,
12 )
13
14
15 @make_cffi
10 class TestModuleAttributes(unittest.TestCase):
16 class TestModuleAttributes(unittest.TestCase):
11 def test_version(self):
17 def test_version(self):
12 self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 2))
18 self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3))
13
19
14 def test_constants(self):
20 def test_constants(self):
15 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
21 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
@@ -45,4 +51,4 b' class TestModuleAttributes(unittest.Test'
45 )
51 )
46
52
47 for a in attrs:
53 for a in attrs:
48 self.assertTrue(hasattr(zstd, a))
54 self.assertTrue(hasattr(zstd, a), a)
@@ -13,10 +13,14 b' except ImportError:'
13
13
14 import zstd
14 import zstd
15
15
16 from .common import (
17 make_cffi,
18 )
16
19
17 compression_levels = strategies.integers(min_value=1, max_value=22)
20 compression_levels = strategies.integers(min_value=1, max_value=22)
18
21
19
22
23 @make_cffi
20 class TestRoundTrip(unittest.TestCase):
24 class TestRoundTrip(unittest.TestCase):
21 @hypothesis.given(strategies.binary(), compression_levels)
25 @hypothesis.given(strategies.binary(), compression_levels)
22 def test_compress_write_to(self, data, level):
26 def test_compress_write_to(self, data, level):
@@ -7,6 +7,9 b' except ImportError:'
7
7
8 import zstd
8 import zstd
9
9
10 from . common import (
11 make_cffi,
12 )
10
13
11 if sys.version_info[0] >= 3:
14 if sys.version_info[0] >= 3:
12 int_type = int
15 int_type = int
@@ -14,6 +17,7 b' else:'
14 int_type = long
17 int_type = long
15
18
16
19
20 @make_cffi
17 class TestTrainDictionary(unittest.TestCase):
21 class TestTrainDictionary(unittest.TestCase):
18 def test_no_args(self):
22 def test_no_args(self):
19 with self.assertRaises(TypeError):
23 with self.assertRaises(TypeError):
@@ -34,6 +34,11 b' PyDoc_STRVAR(get_compression_parameters_'
34 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
34 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
35 "optional input size and dictionary size");
35 "optional input size and dictionary size");
36
36
37 PyDoc_STRVAR(get_frame_parameters__doc__,
38 "get_frame_parameters(data)\n"
39 "\n"
40 "Obtains a ``FrameParameters`` instance by parsing data.\n");
41
37 PyDoc_STRVAR(train_dictionary__doc__,
42 PyDoc_STRVAR(train_dictionary__doc__,
38 "train_dictionary(dict_size, samples)\n"
43 "train_dictionary(dict_size, samples)\n"
39 "\n"
44 "\n"
@@ -53,6 +58,8 b' static PyMethodDef zstd_methods[] = {'
53 METH_NOARGS, estimate_decompression_context_size__doc__ },
58 METH_NOARGS, estimate_decompression_context_size__doc__ },
54 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
59 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
55 METH_VARARGS, get_compression_parameters__doc__ },
60 METH_VARARGS, get_compression_parameters__doc__ },
61 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
62 METH_VARARGS, get_frame_parameters__doc__ },
56 { "train_dictionary", (PyCFunction)train_dictionary,
63 { "train_dictionary", (PyCFunction)train_dictionary,
57 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
64 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
58 { NULL, NULL }
65 { NULL, NULL }
@@ -70,6 +77,7 b' void decompressor_module_init(PyObject* '
70 void decompressobj_module_init(PyObject* mod);
77 void decompressobj_module_init(PyObject* mod);
71 void decompressionwriter_module_init(PyObject* mod);
78 void decompressionwriter_module_init(PyObject* mod);
72 void decompressoriterator_module_init(PyObject* mod);
79 void decompressoriterator_module_init(PyObject* mod);
80 void frameparams_module_init(PyObject* mod);
73
81
74 void zstd_module_init(PyObject* m) {
82 void zstd_module_init(PyObject* m) {
75 /* python-zstandard relies on unstable zstd C API features. This means
83 /* python-zstandard relies on unstable zstd C API features. This means
@@ -87,7 +95,7 b' void zstd_module_init(PyObject* m) {'
87 We detect this mismatch here and refuse to load the module if this
95 We detect this mismatch here and refuse to load the module if this
88 scenario is detected.
96 scenario is detected.
89 */
97 */
90 if (ZSTD_VERSION_NUMBER != 10102 || ZSTD_versionNumber() != 10102) {
98 if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
91 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
99 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
92 return;
100 return;
93 }
101 }
@@ -104,6 +112,7 b' void zstd_module_init(PyObject* m) {'
104 decompressobj_module_init(m);
112 decompressobj_module_init(m);
105 decompressionwriter_module_init(m);
113 decompressionwriter_module_init(m);
106 decompressoriterator_module_init(m);
114 decompressoriterator_module_init(m);
115 frameparams_module_init(m);
107 }
116 }
108
117
109 #if PY_MAJOR_VERSION >= 3
118 #if PY_MAJOR_VERSION >= 3
@@ -39,7 +39,7 b' extern "C" {'
39 #endif
39 #endif
40
40
41 /* code only tested on 32 and 64 bits systems */
41 /* code only tested on 32 and 64 bits systems */
42 #define MEM_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
42 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
43 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
43 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
44
44
45
45
@@ -43,10 +43,6 b' ZSTD_ErrorCode ZSTD_getErrorCode(size_t '
43 * provides error code string from enum */
43 * provides error code string from enum */
44 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
44 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
45
45
46 /* --- ZBUFF Error Management (deprecated) --- */
47 unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
48 const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
49
50
46
51 /*=**************************************************************
47 /*=**************************************************************
52 * Custom allocator
48 * Custom allocator
@@ -18,6 +18,20 b' extern "C" {'
18 #include <stddef.h> /* size_t */
18 #include <stddef.h> /* size_t */
19
19
20
20
21 /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
22 #if defined(__GNUC__) && (__GNUC__ >= 4)
23 # define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
24 #else
25 # define ZSTDERRORLIB_VISIBILITY
26 #endif
27 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28 # define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
29 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
30 # define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
31 #else
32 # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
33 #endif
34
21 /*-****************************************
35 /*-****************************************
22 * error codes list
36 * error codes list
23 ******************************************/
37 ******************************************/
@@ -49,8 +63,8 b' typedef enum {'
49 /*! ZSTD_getErrorCode() :
63 /*! ZSTD_getErrorCode() :
50 convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
64 convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
51 which can be used to compare directly with enum list published into "error_public.h" */
65 which can be used to compare directly with enum list published into "error_public.h" */
52 ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
66 ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
53 const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
67 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
54
68
55
69
56 #if defined (__cplusplus)
70 #if defined (__cplusplus)
@@ -267,4 +267,13 b' MEM_STATIC U32 ZSTD_highbit32(U32 val)'
267 }
267 }
268
268
269
269
270 /* hidden functions */
271
272 /* ZSTD_invalidateRepCodes() :
273 * ensures next compression will not use repcodes from previous block.
274 * Note : only works with regular variant;
275 * do not use with extDict variant ! */
276 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
277
278
270 #endif /* ZSTD_CCOMMON_H_MODULE */
279 #endif /* ZSTD_CCOMMON_H_MODULE */
@@ -51,8 +51,7 b' static void ZSTD_resetSeqStore(seqStore_'
51 /*-*************************************
51 /*-*************************************
52 * Context memory management
52 * Context memory management
53 ***************************************/
53 ***************************************/
54 struct ZSTD_CCtx_s
54 struct ZSTD_CCtx_s {
55 {
56 const BYTE* nextSrc; /* next block here to continue on current prefix */
55 const BYTE* nextSrc; /* next block here to continue on current prefix */
57 const BYTE* base; /* All regular indexes relative to this position */
56 const BYTE* base; /* All regular indexes relative to this position */
58 const BYTE* dictBase; /* extDict indexes relative to this position */
57 const BYTE* dictBase; /* extDict indexes relative to this position */
@@ -61,10 +60,11 b' struct ZSTD_CCtx_s'
61 U32 nextToUpdate; /* index from which to continue dictionary update */
60 U32 nextToUpdate; /* index from which to continue dictionary update */
62 U32 nextToUpdate3; /* index from which to continue dictionary update */
61 U32 nextToUpdate3; /* index from which to continue dictionary update */
63 U32 hashLog3; /* dispatch table : larger == faster, more memory */
62 U32 hashLog3; /* dispatch table : larger == faster, more memory */
64 U32 loadedDictEnd;
63 U32 loadedDictEnd; /* index of end of dictionary */
64 U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
65 ZSTD_compressionStage_e stage;
65 ZSTD_compressionStage_e stage;
66 U32 rep[ZSTD_REP_NUM];
66 U32 rep[ZSTD_REP_NUM];
67 U32 savedRep[ZSTD_REP_NUM];
67 U32 repToConfirm[ZSTD_REP_NUM];
68 U32 dictID;
68 U32 dictID;
69 ZSTD_parameters params;
69 ZSTD_parameters params;
70 void* workSpace;
70 void* workSpace;
@@ -101,7 +101,7 b' ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD'
101 cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
101 cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
102 if (!cctx) return NULL;
102 if (!cctx) return NULL;
103 memset(cctx, 0, sizeof(ZSTD_CCtx));
103 memset(cctx, 0, sizeof(ZSTD_CCtx));
104 memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
104 cctx->customMem = customMem;
105 return cctx;
105 return cctx;
106 }
106 }
107
107
@@ -119,6 +119,15 b' size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx*'
119 return sizeof(*cctx) + cctx->workSpaceSize;
119 return sizeof(*cctx) + cctx->workSpaceSize;
120 }
120 }
121
121
122 size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
123 {
124 switch(param)
125 {
126 case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
127 default: return ERROR(parameter_unknown);
128 }
129 }
130
122 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
131 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
123 {
132 {
124 return &(ctx->seqStore);
133 return &(ctx->seqStore);
@@ -318,6 +327,14 b' static size_t ZSTD_resetCCtx_advanced (Z'
318 }
327 }
319 }
328 }
320
329
330 /* ZSTD_invalidateRepCodes() :
331 * ensures next compression will not use repcodes from previous block.
332 * Note : only works with regular variant;
333 * do not use with extDict variant ! */
334 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
335 int i;
336 for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
337 }
321
338
322 /*! ZSTD_copyCCtx() :
339 /*! ZSTD_copyCCtx() :
323 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
340 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
@@ -735,12 +752,19 b' size_t ZSTD_compressSequences(ZSTD_CCtx*'
735 if ((size_t)(op-ostart) >= maxCSize) return 0; }
752 if ((size_t)(op-ostart) >= maxCSize) return 0; }
736
753
737 /* confirm repcodes */
754 /* confirm repcodes */
738 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
755 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
739
756
740 return op - ostart;
757 return op - ostart;
741 }
758 }
742
759
743
760
761 #if 0 /* for debug */
762 # define STORESEQ_DEBUG
763 #include <stdio.h> /* fprintf */
764 U32 g_startDebug = 0;
765 const BYTE* g_start = NULL;
766 #endif
767
744 /*! ZSTD_storeSeq() :
768 /*! ZSTD_storeSeq() :
745 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
769 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
746 `offsetCode` : distance to match, or 0 == repCode.
770 `offsetCode` : distance to match, or 0 == repCode.
@@ -748,13 +772,14 b' size_t ZSTD_compressSequences(ZSTD_CCtx*'
748 */
772 */
749 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
773 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
750 {
774 {
751 #if 0 /* for debug */
775 #ifdef STORESEQ_DEBUG
752 static const BYTE* g_start = NULL;
776 if (g_startDebug) {
753 const U32 pos = (U32)((const BYTE*)literals - g_start);
777 const U32 pos = (U32)((const BYTE*)literals - g_start);
754 if (g_start==NULL) g_start = (const BYTE*)literals;
778 if (g_start==NULL) g_start = (const BYTE*)literals;
755 //if ((pos > 1) && (pos < 50000))
779 if ((pos > 1895000) && (pos < 1895300))
756 printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
780 fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
757 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
781 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
782 }
758 #endif
783 #endif
759 /* copy Literals */
784 /* copy Literals */
760 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
785 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
@@ -1004,8 +1029,8 b' void ZSTD_compressBlock_fast_generic(ZST'
1004 } } }
1029 } } }
1005
1030
1006 /* save reps for next block */
1031 /* save reps for next block */
1007 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1032 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1008 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1033 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
1009
1034
1010 /* Last Literals */
1035 /* Last Literals */
1011 { size_t const lastLLSize = iend - anchor;
1036 { size_t const lastLLSize = iend - anchor;
@@ -1119,7 +1144,7 b' static void ZSTD_compressBlock_fast_extD'
1119 } } }
1144 } } }
1120
1145
1121 /* save reps for next block */
1146 /* save reps for next block */
1122 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1147 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1123
1148
1124 /* Last Literals */
1149 /* Last Literals */
1125 { size_t const lastLLSize = iend - anchor;
1150 { size_t const lastLLSize = iend - anchor;
@@ -1273,8 +1298,8 b' void ZSTD_compressBlock_doubleFast_gener'
1273 } } }
1298 } } }
1274
1299
1275 /* save reps for next block */
1300 /* save reps for next block */
1276 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1301 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1277 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1302 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
1278
1303
1279 /* Last Literals */
1304 /* Last Literals */
1280 { size_t const lastLLSize = iend - anchor;
1305 { size_t const lastLLSize = iend - anchor;
@@ -1423,7 +1448,7 b' static void ZSTD_compressBlock_doubleFas'
1423 } } }
1448 } } }
1424
1449
1425 /* save reps for next block */
1450 /* save reps for next block */
1426 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1451 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1427
1452
1428 /* Last Literals */
1453 /* Last Literals */
1429 { size_t const lastLLSize = iend - anchor;
1454 { size_t const lastLLSize = iend - anchor;
@@ -1955,8 +1980,8 b' void ZSTD_compressBlock_lazy_generic(ZST'
1955 } }
1980 } }
1956
1981
1957 /* Save reps for next block */
1982 /* Save reps for next block */
1958 ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
1983 ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
1959 ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
1984 ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
1960
1985
1961 /* Last Literals */
1986 /* Last Literals */
1962 { size_t const lastLLSize = iend - anchor;
1987 { size_t const lastLLSize = iend - anchor;
@@ -2150,7 +2175,7 b' void ZSTD_compressBlock_lazy_extDict_gen'
2150 } }
2175 } }
2151
2176
2152 /* Save reps for next block */
2177 /* Save reps for next block */
2153 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
2178 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
2154
2179
2155 /* Last Literals */
2180 /* Last Literals */
2156 { size_t const lastLLSize = iend - anchor;
2181 { size_t const lastLLSize = iend - anchor;
@@ -2409,12 +2434,14 b' static size_t ZSTD_compressContinue_inte'
2409
2434
2410 cctx->nextSrc = ip + srcSize;
2435 cctx->nextSrc = ip + srcSize;
2411
2436
2412 { size_t const cSize = frame ?
2437 if (srcSize) {
2438 size_t const cSize = frame ?
2413 ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2439 ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2414 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2440 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2415 if (ZSTD_isError(cSize)) return cSize;
2441 if (ZSTD_isError(cSize)) return cSize;
2416 return cSize + fhSize;
2442 return cSize + fhSize;
2417 }
2443 } else
2444 return fhSize;
2418 }
2445 }
2419
2446
2420
2447
@@ -2450,7 +2477,7 b' static size_t ZSTD_loadDictionaryContent'
2450 zc->dictBase = zc->base;
2477 zc->dictBase = zc->base;
2451 zc->base += ip - zc->nextSrc;
2478 zc->base += ip - zc->nextSrc;
2452 zc->nextToUpdate = zc->dictLimit;
2479 zc->nextToUpdate = zc->dictLimit;
2453 zc->loadedDictEnd = (U32)(iend - zc->base);
2480 zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
2454
2481
2455 zc->nextSrc = iend;
2482 zc->nextSrc = iend;
2456 if (srcSize <= HASH_READ_SIZE) return 0;
2483 if (srcSize <= HASH_READ_SIZE) return 0;
@@ -2557,9 +2584,9 b' static size_t ZSTD_loadDictEntropyStats('
2557 }
2584 }
2558
2585
2559 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2586 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2560 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2587 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2561 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2588 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2562 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2589 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2563 dictPtr += 12;
2590 dictPtr += 12;
2564
2591
2565 { U32 offcodeMax = MaxOff;
2592 { U32 offcodeMax = MaxOff;
@@ -2594,7 +2621,6 b' static size_t ZSTD_compress_insertDictio'
2594 }
2621 }
2595 }
2622 }
2596
2623
2597
2598 /*! ZSTD_compressBegin_internal() :
2624 /*! ZSTD_compressBegin_internal() :
2599 * @return : 0, or an error code */
2625 * @return : 0, or an error code */
2600 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2626 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
@@ -2626,9 +2652,9 b' size_t ZSTD_compressBegin_usingDict(ZSTD'
2626 }
2652 }
2627
2653
2628
2654
2629 size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
2655 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
2630 {
2656 {
2631 return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
2657 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
2632 }
2658 }
2633
2659
2634
2660
@@ -2733,7 +2759,8 b' size_t ZSTD_compress(void* dst, size_t d'
2733 /* ===== Dictionary API ===== */
2759 /* ===== Dictionary API ===== */
2734
2760
2735 struct ZSTD_CDict_s {
2761 struct ZSTD_CDict_s {
2736 void* dictContent;
2762 void* dictBuffer;
2763 const void* dictContent;
2737 size_t dictContentSize;
2764 size_t dictContentSize;
2738 ZSTD_CCtx* refContext;
2765 ZSTD_CCtx* refContext;
2739 }; /* typedef'd tp ZSTD_CDict within "zstd.h" */
2766 }; /* typedef'd tp ZSTD_CDict within "zstd.h" */
@@ -2741,39 +2768,45 b' struct ZSTD_CDict_s {'
2741 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
2768 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
2742 {
2769 {
2743 if (cdict==NULL) return 0; /* support sizeof on NULL */
2770 if (cdict==NULL) return 0; /* support sizeof on NULL */
2744 return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
2771 return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
2745 }
2772 }
2746
2773
2747 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
2774 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
2775 ZSTD_parameters params, ZSTD_customMem customMem)
2748 {
2776 {
2749 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2777 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2750 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2778 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2751
2779
2752 { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2780 { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2753 void* const dictContent = ZSTD_malloc(dictSize, customMem);
2754 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2781 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2755
2782
2756 if (!dictContent || !cdict || !cctx) {
2783 if (!cdict || !cctx) {
2757 ZSTD_free(dictContent, customMem);
2758 ZSTD_free(cdict, customMem);
2784 ZSTD_free(cdict, customMem);
2759 ZSTD_free(cctx, customMem);
2785 ZSTD_free(cctx, customMem);
2760 return NULL;
2786 return NULL;
2761 }
2787 }
2762
2788
2763 if (dictSize) {
2789 if ((byReference) || (!dictBuffer) || (!dictSize)) {
2764 memcpy(dictContent, dict, dictSize);
2790 cdict->dictBuffer = NULL;
2791 cdict->dictContent = dictBuffer;
2792 } else {
2793 void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
2794 if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; }
2795 memcpy(internalBuffer, dictBuffer, dictSize);
2796 cdict->dictBuffer = internalBuffer;
2797 cdict->dictContent = internalBuffer;
2765 }
2798 }
2766 { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
2799
2800 { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
2767 if (ZSTD_isError(errorCode)) {
2801 if (ZSTD_isError(errorCode)) {
2768 ZSTD_free(dictContent, customMem);
2802 ZSTD_free(cdict->dictBuffer, customMem);
2803 ZSTD_free(cctx, customMem);
2769 ZSTD_free(cdict, customMem);
2804 ZSTD_free(cdict, customMem);
2770 ZSTD_free(cctx, customMem);
2771 return NULL;
2805 return NULL;
2772 } }
2806 } }
2773
2807
2774 cdict->dictContent = dictContent;
2808 cdict->refContext = cctx;
2775 cdict->dictContentSize = dictSize;
2809 cdict->dictContentSize = dictSize;
2776 cdict->refContext = cctx;
2777 return cdict;
2810 return cdict;
2778 }
2811 }
2779 }
2812 }
@@ -2783,7 +2816,15 b' ZSTD_CDict* ZSTD_createCDict(const void*'
2783 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2816 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2784 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2817 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2785 params.fParams.contentSizeFlag = 1;
2818 params.fParams.contentSizeFlag = 1;
2786 return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
2819 return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
2820 }
2821
2822 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
2823 {
2824 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2825 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2826 params.fParams.contentSizeFlag = 1;
2827 return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
2787 }
2828 }
2788
2829
2789 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
2830 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@@ -2791,7 +2832,7 b' size_t ZSTD_freeCDict(ZSTD_CDict* cdict)'
2791 if (cdict==NULL) return 0; /* support free on NULL */
2832 if (cdict==NULL) return 0; /* support free on NULL */
2792 { ZSTD_customMem const cMem = cdict->refContext->customMem;
2833 { ZSTD_customMem const cMem = cdict->refContext->customMem;
2793 ZSTD_freeCCtx(cdict->refContext);
2834 ZSTD_freeCCtx(cdict->refContext);
2794 ZSTD_free(cdict->dictContent, cMem);
2835 ZSTD_free(cdict->dictBuffer, cMem);
2795 ZSTD_free(cdict, cMem);
2836 ZSTD_free(cdict, cMem);
2796 return 0;
2837 return 0;
2797 }
2838 }
@@ -2801,7 +2842,7 b' static ZSTD_parameters ZSTD_getParamsFro'
2801 return ZSTD_getParamsFromCCtx(cdict->refContext);
2842 return ZSTD_getParamsFromCCtx(cdict->refContext);
2802 }
2843 }
2803
2844
2804 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
2845 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
2805 {
2846 {
2806 if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2847 if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2807 else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
2848 else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
@@ -2900,7 +2941,7 b' size_t ZSTD_CStreamOutSize(void) { retur'
2900
2941
2901 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2942 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2902 {
2943 {
2903 if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */
2944 if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
2904
2945
2905 if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
2946 if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
2906 else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
2947 else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
@@ -2937,9 +2978,9 b' size_t ZSTD_initCStream_advanced(ZSTD_CS'
2937 if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2978 if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2938 }
2979 }
2939
2980
2940 if (dict) {
2981 if (dict && dictSize >= 8) {
2941 ZSTD_freeCDict(zcs->cdictLocal);
2982 ZSTD_freeCDict(zcs->cdictLocal);
2942 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
2983 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
2943 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
2984 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
2944 zcs->cdict = zcs->cdictLocal;
2985 zcs->cdict = zcs->cdictLocal;
2945 } else zcs->cdict = NULL;
2986 } else zcs->cdict = NULL;
@@ -2956,6 +2997,7 b' size_t ZSTD_initCStream_usingCDict(ZSTD_'
2956 ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
2997 ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
2957 size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
2998 size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
2958 zcs->cdict = cdict;
2999 zcs->cdict = cdict;
3000 zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
2959 return initError;
3001 return initError;
2960 }
3002 }
2961
3003
@@ -2967,7 +3009,8 b' size_t ZSTD_initCStream_usingDict(ZSTD_C'
2967
3009
2968 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
3010 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
2969 {
3011 {
2970 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3012 ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3013 if (pledgedSrcSize) params.fParams.contentSizeFlag = 1;
2971 return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
3014 return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
2972 }
3015 }
2973
3016
@@ -38,7 +38,7 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto'
38
38
39 ssPtr->cachedLiterals = NULL;
39 ssPtr->cachedLiterals = NULL;
40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
41 ssPtr->staticPrices = 0;
41 ssPtr->staticPrices = 0;
42
42
43 if (ssPtr->litLengthSum == 0) {
43 if (ssPtr->litLengthSum == 0) {
44 if (srcSize <= 1024) ssPtr->staticPrices = 1;
44 if (srcSize <= 1024) ssPtr->staticPrices = 1;
@@ -56,7 +56,7 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto'
56
56
57 for (u=0; u<=MaxLit; u++) {
57 for (u=0; u<=MaxLit; u++) {
58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
59 ssPtr->litSum += ssPtr->litFreq[u];
59 ssPtr->litSum += ssPtr->litFreq[u];
60 }
60 }
61 for (u=0; u<=MaxLL; u++)
61 for (u=0; u<=MaxLL; u++)
62 ssPtr->litLengthFreq[u] = 1;
62 ssPtr->litLengthFreq[u] = 1;
@@ -634,7 +634,7 b' void ZSTD_compressBlock_opt_generic(ZSTD'
634 } } /* for (cur=0; cur < last_pos; ) */
634 } } /* for (cur=0; cur < last_pos; ) */
635
635
636 /* Save reps for next block */
636 /* Save reps for next block */
637 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
637 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
638
638
639 /* Last Literals */
639 /* Last Literals */
640 { size_t const lastLLSize = iend - anchor;
640 { size_t const lastLLSize = iend - anchor;
@@ -825,7 +825,7 b' void ZSTD_compressBlock_opt_extDict_gene'
825
825
826 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
826 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
827
827
828 if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
828 if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
829 best_mlen = matches[match_num-1].len;
829 best_mlen = matches[match_num-1].len;
830 best_off = matches[match_num-1].off;
830 best_off = matches[match_num-1].off;
831 last_pos = cur + 1;
831 last_pos = cur + 1;
@@ -835,7 +835,7 b' void ZSTD_compressBlock_opt_extDict_gene'
835 /* set prices using matches at position = cur */
835 /* set prices using matches at position = cur */
836 for (u = 0; u < match_num; u++) {
836 for (u = 0; u < match_num; u++) {
837 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
837 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
838 best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
838 best_mlen = matches[u].len;
839
839
840 while (mlen <= best_mlen) {
840 while (mlen <= best_mlen) {
841 if (opt[cur].mlen == 1) {
841 if (opt[cur].mlen == 1) {
@@ -907,7 +907,7 b' void ZSTD_compressBlock_opt_extDict_gene'
907 } } /* for (cur=0; cur < last_pos; ) */
907 } } /* for (cur=0; cur < last_pos; ) */
908
908
909 /* Save reps for next block */
909 /* Save reps for next block */
910 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
910 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
911
911
912 /* Last Literals */
912 /* Last Literals */
913 { size_t lastLLSize = iend - anchor;
913 { size_t lastLLSize = iend - anchor;
@@ -1444,7 +1444,7 b' size_t ZSTD_decompress_usingDict(ZSTD_DC'
1444 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
1444 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
1445 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
1445 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
1446 #endif
1446 #endif
1447 ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
1447 CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
1448 ZSTD_checkContinuity(dctx, dst);
1448 ZSTD_checkContinuity(dctx, dst);
1449 return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
1449 return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
1450 }
1450 }
@@ -1671,9 +1671,9 b' static size_t ZSTD_loadEntropy(ZSTD_DCtx'
1671 }
1671 }
1672
1672
1673 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
1673 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
1674 dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
1674 dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
1675 dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
1675 dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
1676 dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
1676 dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
1677 dictPtr += 12;
1677 dictPtr += 12;
1678
1678
1679 dctx->litEntropy = dctx->fseEntropy = 1;
1679 dctx->litEntropy = dctx->fseEntropy = 1;
@@ -1713,39 +1713,44 b' size_t ZSTD_decompressBegin_usingDict(ZS'
1713 /* ====== ZSTD_DDict ====== */
1713 /* ====== ZSTD_DDict ====== */
1714
1714
1715 struct ZSTD_DDict_s {
1715 struct ZSTD_DDict_s {
1716 void* dict;
1716 void* dictBuffer;
1717 const void* dictContent;
1717 size_t dictSize;
1718 size_t dictSize;
1718 ZSTD_DCtx* refContext;
1719 ZSTD_DCtx* refContext;
1719 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
1720 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
1720
1721
1721 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
1722 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
1722 {
1723 {
1723 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
1724 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
1724 if (!customMem.customAlloc || !customMem.customFree) return NULL;
1725 if (!customMem.customAlloc || !customMem.customFree) return NULL;
1725
1726
1726 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
1727 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
1727 void* const dictContent = ZSTD_malloc(dictSize, customMem);
1728 ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
1728 ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
1729
1729
1730 if (!dictContent || !ddict || !dctx) {
1730 if (!ddict || !dctx) {
1731 ZSTD_free(dictContent, customMem);
1732 ZSTD_free(ddict, customMem);
1731 ZSTD_free(ddict, customMem);
1733 ZSTD_free(dctx, customMem);
1732 ZSTD_free(dctx, customMem);
1734 return NULL;
1733 return NULL;
1735 }
1734 }
1736
1735
1737 if (dictSize) {
1736 if ((byReference) || (!dict) || (!dictSize)) {
1738 memcpy(dictContent, dict, dictSize);
1737 ddict->dictBuffer = NULL;
1738 ddict->dictContent = dict;
1739 } else {
1740 void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
1741 if (!internalBuffer) { ZSTD_free(dctx, customMem); ZSTD_free(ddict, customMem); return NULL; }
1742 memcpy(internalBuffer, dict, dictSize);
1743 ddict->dictBuffer = internalBuffer;
1744 ddict->dictContent = internalBuffer;
1739 }
1745 }
1740 { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
1746 { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize);
1741 if (ZSTD_isError(errorCode)) {
1747 if (ZSTD_isError(errorCode)) {
1742 ZSTD_free(dictContent, customMem);
1748 ZSTD_free(ddict->dictBuffer, customMem);
1743 ZSTD_free(ddict, customMem);
1749 ZSTD_free(ddict, customMem);
1744 ZSTD_free(dctx, customMem);
1750 ZSTD_free(dctx, customMem);
1745 return NULL;
1751 return NULL;
1746 } }
1752 } }
1747
1753
1748 ddict->dict = dictContent;
1749 ddict->dictSize = dictSize;
1754 ddict->dictSize = dictSize;
1750 ddict->refContext = dctx;
1755 ddict->refContext = dctx;
1751 return ddict;
1756 return ddict;
@@ -1758,15 +1763,27 b' ZSTD_DDict* ZSTD_createDDict_advanced(co'
1758 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
1763 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
1759 {
1764 {
1760 ZSTD_customMem const allocator = { NULL, NULL, NULL };
1765 ZSTD_customMem const allocator = { NULL, NULL, NULL };
1761 return ZSTD_createDDict_advanced(dict, dictSize, allocator);
1766 return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator);
1762 }
1767 }
1763
1768
1769
1770 /*! ZSTD_createDDict_byReference() :
1771 * Create a digested dictionary, ready to start decompression operation without startup delay.
1772 * Dictionary content is simply referenced, and therefore stays in dictBuffer.
1773 * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
1774 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
1775 {
1776 ZSTD_customMem const allocator = { NULL, NULL, NULL };
1777 return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator);
1778 }
1779
1780
1764 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
1781 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
1765 {
1782 {
1766 if (ddict==NULL) return 0; /* support free on NULL */
1783 if (ddict==NULL) return 0; /* support free on NULL */
1767 { ZSTD_customMem const cMem = ddict->refContext->customMem;
1784 { ZSTD_customMem const cMem = ddict->refContext->customMem;
1768 ZSTD_freeDCtx(ddict->refContext);
1785 ZSTD_freeDCtx(ddict->refContext);
1769 ZSTD_free(ddict->dict, cMem);
1786 ZSTD_free(ddict->dictBuffer, cMem);
1770 ZSTD_free(ddict, cMem);
1787 ZSTD_free(ddict, cMem);
1771 return 0;
1788 return 0;
1772 }
1789 }
@@ -1775,7 +1792,7 b' size_t ZSTD_freeDDict(ZSTD_DDict* ddict)'
1775 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
1792 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
1776 {
1793 {
1777 if (ddict==NULL) return 0; /* support sizeof on NULL */
1794 if (ddict==NULL) return 0; /* support sizeof on NULL */
1778 return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize;
1795 return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
1779 }
1796 }
1780
1797
1781 /*! ZSTD_getDictID_fromDict() :
1798 /*! ZSTD_getDictID_fromDict() :
@@ -1796,7 +1813,7 b' unsigned ZSTD_getDictID_fromDict(const v'
1796 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
1813 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
1797 {
1814 {
1798 if (ddict==NULL) return 0;
1815 if (ddict==NULL) return 0;
1799 return ZSTD_getDictID_fromDict(ddict->dict, ddict->dictSize);
1816 return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
1800 }
1817 }
1801
1818
1802 /*! ZSTD_getDictID_fromFrame() :
1819 /*! ZSTD_getDictID_fromFrame() :
@@ -1827,7 +1844,7 b' size_t ZSTD_decompress_usingDDict(ZSTD_D'
1827 const ZSTD_DDict* ddict)
1844 const ZSTD_DDict* ddict)
1828 {
1845 {
1829 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
1846 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
1830 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize);
1847 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dictContent, ddict->dictSize);
1831 #endif
1848 #endif
1832 ZSTD_refDCtx(dctx, ddict->refContext);
1849 ZSTD_refDCtx(dctx, ddict->refContext);
1833 ZSTD_checkContinuity(dctx, dst);
1850 ZSTD_checkContinuity(dctx, dst);
@@ -1919,7 +1936,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D'
1919 zds->stage = zdss_loadHeader;
1936 zds->stage = zdss_loadHeader;
1920 zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
1937 zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
1921 ZSTD_freeDDict(zds->ddictLocal);
1938 ZSTD_freeDDict(zds->ddictLocal);
1922 if (dict) {
1939 if (dict && dictSize >= 8) {
1923 zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
1940 zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
1924 if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
1941 if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
1925 } else zds->ddictLocal = NULL;
1942 } else zds->ddictLocal = NULL;
@@ -1956,7 +1973,7 b' size_t ZSTD_setDStreamParameter(ZSTD_DSt'
1956 switch(paramType)
1973 switch(paramType)
1957 {
1974 {
1958 default : return ERROR(parameter_unknown);
1975 default : return ERROR(parameter_unknown);
1959 case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
1976 case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
1960 }
1977 }
1961 return 0;
1978 return 0;
1962 }
1979 }
@@ -2007,7 +2024,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2007 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2024 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2008 { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
2025 { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
2009 if (legacyVersion) {
2026 if (legacyVersion) {
2010 const void* const dict = zds->ddict ? zds->ddict->dict : NULL;
2027 const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
2011 size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
2028 size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
2012 CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
2029 CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
2013 dict, dictSize));
2030 dict, dictSize));
@@ -36,12 +36,11 b''
36 #include <time.h> /* clock */
36 #include <time.h> /* clock */
37
37
38 #include "mem.h" /* read */
38 #include "mem.h" /* read */
39 #include "error_private.h"
40 #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
39 #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
41 #define HUF_STATIC_LINKING_ONLY
40 #define HUF_STATIC_LINKING_ONLY
42 #include "huf.h"
41 #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
43 #include "zstd_internal.h" /* includes zstd.h */
42 #include "zstd_internal.h" /* includes zstd.h */
44 #include "xxhash.h"
43 #include "xxhash.h" /* XXH64 */
45 #include "divsufsort.h"
44 #include "divsufsort.h"
46 #ifndef ZDICT_STATIC_LINKING_ONLY
45 #ifndef ZDICT_STATIC_LINKING_ONLY
47 # define ZDICT_STATIC_LINKING_ONLY
46 # define ZDICT_STATIC_LINKING_ONLY
@@ -61,7 +60,7 b''
61 #define NOISELENGTH 32
60 #define NOISELENGTH 32
62
61
63 #define MINRATIO 4
62 #define MINRATIO 4
64 static const int g_compressionLevel_default = 5;
63 static const int g_compressionLevel_default = 6;
65 static const U32 g_selectivity_default = 9;
64 static const U32 g_selectivity_default = 9;
66 static const size_t g_provision_entropySize = 200;
65 static const size_t g_provision_entropySize = 200;
67 static const size_t g_min_fast_dictContent = 192;
66 static const size_t g_min_fast_dictContent = 192;
@@ -307,13 +306,13 b' static dictItem ZDICT_analyzePos('
307 } while (length >=MINMATCHLENGTH);
306 } while (length >=MINMATCHLENGTH);
308
307
309 /* look backward */
308 /* look backward */
310 length = MINMATCHLENGTH;
309 length = MINMATCHLENGTH;
311 while ((length >= MINMATCHLENGTH) & (start > 0)) {
310 while ((length >= MINMATCHLENGTH) & (start > 0)) {
312 length = ZDICT_count(b + pos, b + suffix[start - 1]);
311 length = ZDICT_count(b + pos, b + suffix[start - 1]);
313 if (length >= LLIMIT) length = LLIMIT - 1;
312 if (length >= LLIMIT) length = LLIMIT - 1;
314 lengthList[length]++;
313 lengthList[length]++;
315 if (length >= MINMATCHLENGTH) start--;
314 if (length >= MINMATCHLENGTH) start--;
316 }
315 }
317
316
318 /* largest useful length */
317 /* largest useful length */
319 memset(cumulLength, 0, sizeof(cumulLength));
318 memset(cumulLength, 0, sizeof(cumulLength));
@@ -570,7 +569,7 b' static void ZDICT_countEStats(EStats_res'
570 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
569 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
571 }
570 }
572 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
571 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
573 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
572 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
574
573
575 if (cSize) { /* if == 0; block is not compressible */
574 if (cSize) { /* if == 0; block is not compressible */
576 const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
575 const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -825,6 +824,55 b' static size_t ZDICT_analyzeEntropy(void*'
825 }
824 }
826
825
827
826
827
828 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
829 const void* customDictContent, size_t dictContentSize,
830 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
831 ZDICT_params_t params)
832 {
833 size_t hSize;
834 #define HBUFFSIZE 256
835 BYTE header[HBUFFSIZE];
836 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837 U32 const notificationLevel = params.notificationLevel;
838
839 /* check conditions */
840 if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
841 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
842 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
843
844 /* dictionary header */
845 MEM_writeLE32(header, ZSTD_DICT_MAGIC);
846 { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
847 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
848 U32 const dictID = params.dictID ? params.dictID : compliantID;
849 MEM_writeLE32(header+4, dictID);
850 }
851 hSize = 8;
852
853 /* entropy tables */
854 DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
855 DISPLAYLEVEL(2, "statistics ... \n");
856 { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
857 compressionLevel,
858 samplesBuffer, samplesSizes, nbSamples,
859 customDictContent, dictContentSize,
860 notificationLevel);
861 if (ZDICT_isError(eSize)) return eSize;
862 hSize += eSize;
863 }
864
865 /* copy elements in final buffer ; note : src and dst buffer can overlap */
866 if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
867 { size_t const dictSize = hSize + dictContentSize;
868 char* dictEnd = (char*)dictBuffer + dictSize;
869 memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
870 memcpy(dictBuffer, header, hSize);
871 return dictSize;
872 }
873 }
874
875
828 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
876 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
829 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
877 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
830 ZDICT_params_t params)
878 ZDICT_params_t params)
@@ -19,15 +19,18 b' extern "C" {'
19 #include <stddef.h> /* size_t */
19 #include <stddef.h> /* size_t */
20
20
21
21
22 /*====== Export for Windows ======*/
22 /* ===== ZDICTLIB_API : control library symbols visibility ===== */
23 /*!
23 #if defined(__GNUC__) && (__GNUC__ >= 4)
24 * ZSTD_DLL_EXPORT :
24 # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
25 * Enable exporting of functions when building a Windows DLL
26 */
27 #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28 # define ZDICTLIB_API __declspec(dllexport)
29 #else
25 #else
30 # define ZDICTLIB_API
26 # define ZDICTLIB_VISIBILITY
27 #endif
28 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
29 # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
30 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
31 # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
32 #else
33 # define ZDICTLIB_API ZDICTLIB_VISIBILITY
31 #endif
34 #endif
32
35
33
36
@@ -79,27 +82,114 b' typedef struct {'
79 or an error code, which can be tested by ZDICT_isError().
82 or an error code, which can be tested by ZDICT_isError().
80 note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
83 note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
81 */
84 */
82 size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
85 ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
86 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
87 ZDICT_params_t parameters);
88
89 /*! COVER_params_t :
90 For all values 0 means default.
91 kMin and d are the only required parameters.
92 */
93 typedef struct {
94 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
95 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
96 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
97
98 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
99 unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
100 unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
101 int compressionLevel; /* 0 means default; target a specific zstd compression level */
102 } COVER_params_t;
103
104
105 /*! COVER_trainFromBuffer() :
106 Train a dictionary from an array of samples using the COVER algorithm.
107 Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
108 supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
109 The resulting dictionary will be saved into `dictBuffer`.
110 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
111 or an error code, which can be tested with ZDICT_isError().
112 Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
113 Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
114 It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
115 In general, it's recommended to provide a few thousands samples, but this can vary a lot.
116 It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
117 */
118 ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
119 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
120 COVER_params_t parameters);
121
122 /*! COVER_optimizeTrainFromBuffer() :
123 The same requirements as above hold for all the parameters except `parameters`.
124 This function tries many parameter combinations and picks the best parameters.
125 `*parameters` is filled with the best parameters found, and the dictionary
126 constructed with those parameters is stored in `dictBuffer`.
127
128 All of the parameters d, k, steps are optional.
129 If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
130 if steps is zero it defaults to its default value.
131 If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
132
133 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
134 or an error code, which can be tested with ZDICT_isError().
135 On success `*parameters` contains the parameters selected.
136 Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
137 */
138 ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
139 const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
140 COVER_params_t *parameters);
141
142 /*! ZDICT_finalizeDictionary() :
143
144 Given a custom content as a basis for dictionary, and a set of samples,
145 finalize dictionary by adding headers and statistics.
146
147 Samples must be stored concatenated in a flat buffer `samplesBuffer`,
148 supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
149
150 dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
151 maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
152
153 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154 or an error code, which can be tested by ZDICT_isError().
155 note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156 note 2 : dictBuffer and customDictContent can overlap
157 */
158 #define ZDICT_CONTENTSIZE_MIN 256
159 #define ZDICT_DICTSIZE_MIN 512
160 ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
161 const void* customDictContent, size_t dictContentSize,
83 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
162 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
84 ZDICT_params_t parameters);
163 ZDICT_params_t parameters);
85
164
86
165
87 /*! ZDICT_addEntropyTablesFromBuffer() :
88
89 Given a content-only dictionary (built using any 3rd party algorithm),
90 add entropy tables computed from an array of samples.
91 Samples must be stored concatenated in a flat buffer `samplesBuffer`,
92 supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
93
166
94 The input dictionary content must be stored *at the end* of `dictBuffer`.
167 /* Deprecation warnings */
95 Its size is `dictContentSize`.
168 /* It is generally possible to disable deprecation warnings from compiler,
96 The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
169 for example with -Wno-deprecated-declarations for gcc
97 starting from its beginning.
170 or _CRT_SECURE_NO_WARNINGS in Visual.
98 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
171 Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
99 */
172 #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
173 # define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
174 #else
175 # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
176 # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
177 # define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
178 # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
179 # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
180 # elif (ZDICT_GCC_VERSION >= 301)
181 # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
182 # elif defined(_MSC_VER)
183 # define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
184 # else
185 # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
186 # define ZDICT_DEPRECATED(message) ZDICTLIB_API
187 # endif
188 #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
189
190 ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
100 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
191 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
101 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
192 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
102
103
193
104
194
105 #endif /* ZDICT_STATIC_LINKING_ONLY */
195 #endif /* ZDICT_STATIC_LINKING_ONLY */
@@ -20,13 +20,16 b' extern "C" {'
20
20
21 /* ===== ZSTDLIB_API : control library symbols visibility ===== */
21 /* ===== ZSTDLIB_API : control library symbols visibility ===== */
22 #if defined(__GNUC__) && (__GNUC__ >= 4)
22 #if defined(__GNUC__) && (__GNUC__ >= 4)
23 # define ZSTDLIB_API __attribute__ ((visibility ("default")))
23 # define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
24 #elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
24 #else
25 # define ZSTDLIB_API __declspec(dllexport)
25 # define ZSTDLIB_VISIBILITY
26 #endif
27 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28 # define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
26 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
29 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
27 # define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
30 # define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
28 #else
31 #else
29 # define ZSTDLIB_API
32 # define ZSTDLIB_API ZSTDLIB_VISIBILITY
30 #endif
33 #endif
31
34
32
35
@@ -53,7 +56,7 b' extern "C" {'
53 /*------ Version ------*/
56 /*------ Version ------*/
54 #define ZSTD_VERSION_MAJOR 1
57 #define ZSTD_VERSION_MAJOR 1
55 #define ZSTD_VERSION_MINOR 1
58 #define ZSTD_VERSION_MINOR 1
56 #define ZSTD_VERSION_RELEASE 2
59 #define ZSTD_VERSION_RELEASE 3
57
60
58 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
61 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
59 #define ZSTD_QUOTE(str) #str
62 #define ZSTD_QUOTE(str) #str
@@ -170,8 +173,8 b' typedef struct ZSTD_CDict_s ZSTD_CDict;'
170 * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
173 * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
171 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
174 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
172 * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
175 * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
173 * `dict` can be released after ZSTD_CDict creation. */
176 * `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
174 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
177 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
175
178
176 /*! ZSTD_freeCDict() :
179 /*! ZSTD_freeCDict() :
177 * Function frees memory allocated by ZSTD_createCDict(). */
180 * Function frees memory allocated by ZSTD_createCDict(). */
@@ -191,8 +194,8 b' typedef struct ZSTD_DDict_s ZSTD_DDict;'
191
194
192 /*! ZSTD_createDDict() :
195 /*! ZSTD_createDDict() :
193 * Create a digested dictionary, ready to start decompression operation without startup delay.
196 * Create a digested dictionary, ready to start decompression operation without startup delay.
194 * `dict` can be released after creation. */
197 * dictBuffer can be released after DDict creation, as its content is copied inside DDict */
195 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
198 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
196
199
197 /*! ZSTD_freeDDict() :
200 /*! ZSTD_freeDDict() :
198 * Function frees memory allocated with ZSTD_createDDict() */
201 * Function frees memory allocated with ZSTD_createDDict() */
@@ -325,7 +328,7 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
325 * ***************************************************************************************/
328 * ***************************************************************************************/
326
329
327 /* --- Constants ---*/
330 /* --- Constants ---*/
328 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */
331 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
329 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
332 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
330
333
331 #define ZSTD_WINDOWLOG_MAX_32 25
334 #define ZSTD_WINDOWLOG_MAX_32 25
@@ -345,8 +348,9 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
345 #define ZSTD_TARGETLENGTH_MAX 999
348 #define ZSTD_TARGETLENGTH_MAX 999
346
349
347 #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
350 #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
351 #define ZSTD_FRAMEHEADERSIZE_MIN 6
348 static const size_t ZSTD_frameHeaderSize_prefix = 5;
352 static const size_t ZSTD_frameHeaderSize_prefix = 5;
349 static const size_t ZSTD_frameHeaderSize_min = 6;
353 static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
350 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
354 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
351 static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
355 static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
352
356
@@ -365,9 +369,9 b' typedef struct {'
365 } ZSTD_compressionParameters;
369 } ZSTD_compressionParameters;
366
370
367 typedef struct {
371 typedef struct {
368 unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
372 unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
369 unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
373 unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
370 unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
374 unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
371 } ZSTD_frameParameters;
375 } ZSTD_frameParameters;
372
376
373 typedef struct {
377 typedef struct {
@@ -397,9 +401,23 b' ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_a'
397 * Gives the amount of memory used by a given ZSTD_CCtx */
401 * Gives the amount of memory used by a given ZSTD_CCtx */
398 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
402 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
399
403
404 typedef enum {
405 ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
406 } ZSTD_CCtxParameter;
407 /*! ZSTD_setCCtxParameter() :
408 * Set advanced parameters, selected through enum ZSTD_CCtxParameter
409 * @result : 0, or an error code (which can be tested with ZSTD_isError()) */
410 ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
411
412 /*! ZSTD_createCDict_byReference() :
413 * Create a digested dictionary for compression
414 * Dictionary content is simply referenced, and therefore stays in dictBuffer.
415 * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
416 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
417
400 /*! ZSTD_createCDict_advanced() :
418 /*! ZSTD_createCDict_advanced() :
401 * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
419 * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
402 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
420 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
403 ZSTD_parameters params, ZSTD_customMem customMem);
421 ZSTD_parameters params, ZSTD_customMem customMem);
404
422
405 /*! ZSTD_sizeof_CDict() :
423 /*! ZSTD_sizeof_CDict() :
@@ -455,6 +473,15 b' ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_a'
455 * Gives the amount of memory used by a given ZSTD_DCtx */
473 * Gives the amount of memory used by a given ZSTD_DCtx */
456 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
474 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
457
475
476 /*! ZSTD_createDDict_byReference() :
477 * Create a digested dictionary, ready to start decompression operation without startup delay.
478 * Dictionary content is simply referenced, and therefore stays in dictBuffer.
479 * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
480 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
481
482 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
483 unsigned byReference, ZSTD_customMem customMem);
484
458 /*! ZSTD_sizeof_DDict() :
485 /*! ZSTD_sizeof_DDict() :
459 * Gives the amount of memory used by a given ZSTD_DDict */
486 * Gives the amount of memory used by a given ZSTD_DDict */
460 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
487 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
@@ -463,13 +490,13 b' ZSTDLIB_API size_t ZSTD_sizeof_DDict(con'
463 * Provides the dictID stored within dictionary.
490 * Provides the dictID stored within dictionary.
464 * if @return == 0, the dictionary is not conformant with Zstandard specification.
491 * if @return == 0, the dictionary is not conformant with Zstandard specification.
465 * It can still be loaded, but as a content-only dictionary. */
492 * It can still be loaded, but as a content-only dictionary. */
466 unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
493 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
467
494
468 /*! ZSTD_getDictID_fromDDict() :
495 /*! ZSTD_getDictID_fromDDict() :
469 * Provides the dictID of the dictionary loaded into `ddict`.
496 * Provides the dictID of the dictionary loaded into `ddict`.
470 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
497 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
471 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
498 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
472 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
499 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
473
500
474 /*! ZSTD_getDictID_fromFrame() :
501 /*! ZSTD_getDictID_fromFrame() :
475 * Provides the dictID required to decompressed the frame stored within `src`.
502 * Provides the dictID required to decompressed the frame stored within `src`.
@@ -481,7 +508,7 b' unsigned ZSTD_getDictID_fromDDict(const '
481 * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
508 * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
482 * - This is not a Zstandard frame.
509 * - This is not a Zstandard frame.
483 * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
510 * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
484 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
511 ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
485
512
486
513
487 /********************************************************************
514 /********************************************************************
@@ -491,7 +518,7 b' unsigned ZSTD_getDictID_fromFrame(const '
491 /*===== Advanced Streaming compression functions =====*/
518 /*===== Advanced Streaming compression functions =====*/
492 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
519 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
493 ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
520 ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
494 ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
521 ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
495 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
522 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
496 ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
523 ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
497 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
524 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
@@ -500,9 +527,9 b' ZSTDLIB_API size_t ZSTD_sizeof_CStream(c'
500
527
501
528
502 /*===== Advanced Streaming decompression functions =====*/
529 /*===== Advanced Streaming decompression functions =====*/
503 typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
530 typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
504 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
531 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
505 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
532 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
506 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
533 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
507 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
534 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
508 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
535 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
@@ -542,10 +569,10 b' ZSTDLIB_API size_t ZSTD_sizeof_DStream(c'
542 In which case, it will "discard" the relevant memory section from its history.
569 In which case, it will "discard" the relevant memory section from its history.
543
570
544 Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
571 Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
545 It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
572 It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
546 Without last block mark, frames will be considered unfinished (broken) by decoders.
573 Without last block mark, frames will be considered unfinished (corrupted) by decoders.
547
574
548 You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
575 `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
549 */
576 */
550
577
551 /*===== Buffer-less streaming compression functions =====*/
578 /*===== Buffer-less streaming compression functions =====*/
@@ -553,6 +580,7 b' ZSTDLIB_API size_t ZSTD_compressBegin(ZS'
553 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
580 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
554 ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
581 ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
555 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
582 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
583 ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
556 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
584 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
557 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
585 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
558
586
This diff has been collapsed as it changes many lines, (1016 lines changed) Show them Hide them
@@ -8,145 +8,1035 b''
8
8
9 from __future__ import absolute_import, unicode_literals
9 from __future__ import absolute_import, unicode_literals
10
10
11 import io
11 import sys
12
12
13 from _zstd_cffi import (
13 from _zstd_cffi import (
14 ffi,
14 ffi,
15 lib,
15 lib,
16 )
16 )
17
17
18 if sys.version_info[0] == 2:
19 bytes_type = str
20 int_type = long
21 else:
22 bytes_type = bytes
23 int_type = int
18
24
19 _CSTREAM_IN_SIZE = lib.ZSTD_CStreamInSize()
25
20 _CSTREAM_OUT_SIZE = lib.ZSTD_CStreamOutSize()
26 COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
27 COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
28 DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
29 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
30
31 new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
32
33
34 MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
35 MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
36 FRAME_HEADER = b'\x28\xb5\x2f\xfd'
37 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
38
39 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
40 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
41 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
42 CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
43 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
44 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
45 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
46 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
47 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
48 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
49 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
50 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
51 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
52
53 STRATEGY_FAST = lib.ZSTD_fast
54 STRATEGY_DFAST = lib.ZSTD_dfast
55 STRATEGY_GREEDY = lib.ZSTD_greedy
56 STRATEGY_LAZY = lib.ZSTD_lazy
57 STRATEGY_LAZY2 = lib.ZSTD_lazy2
58 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
59 STRATEGY_BTOPT = lib.ZSTD_btopt
60
61 COMPRESSOBJ_FLUSH_FINISH = 0
62 COMPRESSOBJ_FLUSH_BLOCK = 1
63
64
65 class ZstdError(Exception):
66 pass
21
67
22
68
23 class _ZstdCompressionWriter(object):
69 class CompressionParameters(object):
24 def __init__(self, cstream, writer):
70 def __init__(self, window_log, chain_log, hash_log, search_log,
25 self._cstream = cstream
71 search_length, target_length, strategy):
72 if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX:
73 raise ValueError('invalid window log value')
74
75 if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX:
76 raise ValueError('invalid chain log value')
77
78 if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX:
79 raise ValueError('invalid hash log value')
80
81 if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX:
82 raise ValueError('invalid search log value')
83
84 if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX:
85 raise ValueError('invalid search length value')
86
87 if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX:
88 raise ValueError('invalid target length value')
89
90 if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT:
91 raise ValueError('invalid strategy value')
92
93 self.window_log = window_log
94 self.chain_log = chain_log
95 self.hash_log = hash_log
96 self.search_log = search_log
97 self.search_length = search_length
98 self.target_length = target_length
99 self.strategy = strategy
100
101 def as_compression_parameters(self):
102 p = ffi.new('ZSTD_compressionParameters *')[0]
103 p.windowLog = self.window_log
104 p.chainLog = self.chain_log
105 p.hashLog = self.hash_log
106 p.searchLog = self.search_log
107 p.searchLength = self.search_length
108 p.targetLength = self.target_length
109 p.strategy = self.strategy
110
111 return p
112
113 def get_compression_parameters(level, source_size=0, dict_size=0):
114 params = lib.ZSTD_getCParams(level, source_size, dict_size)
115 return CompressionParameters(window_log=params.windowLog,
116 chain_log=params.chainLog,
117 hash_log=params.hashLog,
118 search_log=params.searchLog,
119 search_length=params.searchLength,
120 target_length=params.targetLength,
121 strategy=params.strategy)
122
123
124 def estimate_compression_context_size(params):
125 if not isinstance(params, CompressionParameters):
126 raise ValueError('argument must be a CompressionParameters')
127
128 cparams = params.as_compression_parameters()
129 return lib.ZSTD_estimateCCtxSize(cparams)
130
131
132 def estimate_decompression_context_size():
133 return lib.ZSTD_estimateDCtxSize()
134
135
136 class ZstdCompressionWriter(object):
137 def __init__(self, compressor, writer, source_size, write_size):
138 self._compressor = compressor
26 self._writer = writer
139 self._writer = writer
140 self._source_size = source_size
141 self._write_size = write_size
142 self._entered = False
27
143
28 def __enter__(self):
144 def __enter__(self):
145 if self._entered:
146 raise ZstdError('cannot __enter__ multiple times')
147
148 self._cstream = self._compressor._get_cstream(self._source_size)
149 self._entered = True
29 return self
150 return self
30
151
31 def __exit__(self, exc_type, exc_value, exc_tb):
152 def __exit__(self, exc_type, exc_value, exc_tb):
153 self._entered = False
154
32 if not exc_type and not exc_value and not exc_tb:
155 if not exc_type and not exc_value and not exc_tb:
33 out_buffer = ffi.new('ZSTD_outBuffer *')
156 out_buffer = ffi.new('ZSTD_outBuffer *')
34 out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
157 dst_buffer = ffi.new('char[]', self._write_size)
35 out_buffer.size = _CSTREAM_OUT_SIZE
158 out_buffer.dst = dst_buffer
159 out_buffer.size = self._write_size
36 out_buffer.pos = 0
160 out_buffer.pos = 0
37
161
38 while True:
162 while True:
39 res = lib.ZSTD_endStream(self._cstream, out_buffer)
163 zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
40 if lib.ZSTD_isError(res):
164 if lib.ZSTD_isError(zresult):
41 raise Exception('error ending compression stream: %s' % lib.ZSTD_getErrorName)
165 raise ZstdError('error ending compression stream: %s' %
166 ffi.string(lib.ZSTD_getErrorName(zresult)))
42
167
43 if out_buffer.pos:
168 if out_buffer.pos:
44 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
169 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
45 out_buffer.pos = 0
170 out_buffer.pos = 0
46
171
47 if res == 0:
172 if zresult == 0:
48 break
173 break
49
174
175 self._cstream = None
176 self._compressor = None
177
50 return False
178 return False
51
179
180 def memory_size(self):
181 if not self._entered:
182 raise ZstdError('cannot determine size of an inactive compressor; '
183 'call when a context manager is active')
184
185 return lib.ZSTD_sizeof_CStream(self._cstream)
186
52 def write(self, data):
187 def write(self, data):
188 if not self._entered:
189 raise ZstdError('write() must be called from an active context '
190 'manager')
191
192 total_write = 0
193
194 data_buffer = ffi.from_buffer(data)
195
196 in_buffer = ffi.new('ZSTD_inBuffer *')
197 in_buffer.src = data_buffer
198 in_buffer.size = len(data_buffer)
199 in_buffer.pos = 0
200
53 out_buffer = ffi.new('ZSTD_outBuffer *')
201 out_buffer = ffi.new('ZSTD_outBuffer *')
54 out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
202 dst_buffer = ffi.new('char[]', self._write_size)
55 out_buffer.size = _CSTREAM_OUT_SIZE
203 out_buffer.dst = dst_buffer
204 out_buffer.size = self._write_size
205 out_buffer.pos = 0
206
207 while in_buffer.pos < in_buffer.size:
208 zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
209 if lib.ZSTD_isError(zresult):
210 raise ZstdError('zstd compress error: %s' %
211 ffi.string(lib.ZSTD_getErrorName(zresult)))
212
213 if out_buffer.pos:
214 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
215 total_write += out_buffer.pos
216 out_buffer.pos = 0
217
218 return total_write
219
220 def flush(self):
221 if not self._entered:
222 raise ZstdError('flush must be called from an active context manager')
223
224 total_write = 0
225
226 out_buffer = ffi.new('ZSTD_outBuffer *')
227 dst_buffer = ffi.new('char[]', self._write_size)
228 out_buffer.dst = dst_buffer
229 out_buffer.size = self._write_size
56 out_buffer.pos = 0
230 out_buffer.pos = 0
57
231
58 # TODO can we reuse existing memory?
232 while True:
59 in_buffer = ffi.new('ZSTD_inBuffer *')
233 zresult = lib.ZSTD_flushStream(self._cstream, out_buffer)
60 in_buffer.src = ffi.new('char[]', data)
234 if lib.ZSTD_isError(zresult):
61 in_buffer.size = len(data)
235 raise ZstdError('zstd compress error: %s' %
62 in_buffer.pos = 0
236 ffi.string(lib.ZSTD_getErrorName(zresult)))
63 while in_buffer.pos < in_buffer.size:
237
64 res = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
238 if not out_buffer.pos:
65 if lib.ZSTD_isError(res):
239 break
66 raise Exception('zstd compress error: %s' % lib.ZSTD_getErrorName(res))
240
241 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
242 total_write += out_buffer.pos
243 out_buffer.pos = 0
244
245 return total_write
246
247
248 class ZstdCompressionObj(object):
249 def compress(self, data):
250 if self._finished:
251 raise ZstdError('cannot call compress() after compressor finished')
252
253 data_buffer = ffi.from_buffer(data)
254 source = ffi.new('ZSTD_inBuffer *')
255 source.src = data_buffer
256 source.size = len(data_buffer)
257 source.pos = 0
258
259 chunks = []
260
261 while source.pos < len(data):
262 zresult = lib.ZSTD_compressStream(self._cstream, self._out, source)
263 if lib.ZSTD_isError(zresult):
264 raise ZstdError('zstd compress error: %s' %
265 ffi.string(lib.ZSTD_getErrorName(zresult)))
266
267 if self._out.pos:
268 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
269 self._out.pos = 0
270
271 return b''.join(chunks)
67
272
68 if out_buffer.pos:
273 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
69 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
274 if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
70 out_buffer.pos = 0
275 raise ValueError('flush mode not recognized')
276
277 if self._finished:
278 raise ZstdError('compressor object already finished')
279
280 assert self._out.pos == 0
281
282 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
283 zresult = lib.ZSTD_flushStream(self._cstream, self._out)
284 if lib.ZSTD_isError(zresult):
285 raise ZstdError('zstd compress error: %s' %
286 ffi.string(lib.ZSTD_getErrorName(zresult)))
287
288 # Output buffer is guaranteed to hold full block.
289 assert zresult == 0
290
291 if self._out.pos:
292 result = ffi.buffer(self._out.dst, self._out.pos)[:]
293 self._out.pos = 0
294 return result
295 else:
296 return b''
297
298 assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
299 self._finished = True
300
301 chunks = []
302
303 while True:
304 zresult = lib.ZSTD_endStream(self._cstream, self._out)
305 if lib.ZSTD_isError(zresult):
306 raise ZstdError('error ending compression stream: %s' %
307 ffi.string(lib.ZSTD_getErroName(zresult)))
308
309 if self._out.pos:
310 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
311 self._out.pos = 0
312
313 if not zresult:
314 break
315
316 # GC compression stream immediately.
317 self._cstream = None
318
319 return b''.join(chunks)
71
320
72
321
73 class ZstdCompressor(object):
322 class ZstdCompressor(object):
74 def __init__(self, level=3, dict_data=None, compression_params=None):
323 def __init__(self, level=3, dict_data=None, compression_params=None,
75 if dict_data:
324 write_checksum=False, write_content_size=False,
76 raise Exception('dict_data not yet supported')
325 write_dict_id=True):
77 if compression_params:
326 if level < 1:
78 raise Exception('compression_params not yet supported')
327 raise ValueError('level must be greater than 0')
328 elif level > lib.ZSTD_maxCLevel():
329 raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
79
330
80 self._compression_level = level
331 self._compression_level = level
332 self._dict_data = dict_data
333 self._cparams = compression_params
334 self._fparams = ffi.new('ZSTD_frameParameters *')[0]
335 self._fparams.checksumFlag = write_checksum
336 self._fparams.contentSizeFlag = write_content_size
337 self._fparams.noDictIDFlag = not write_dict_id
81
338
82 def compress(self, data):
339 cctx = lib.ZSTD_createCCtx()
83 # Just use the stream API for now.
340 if cctx == ffi.NULL:
84 output = io.BytesIO()
341 raise MemoryError()
85 with self.write_to(output) as compressor:
342
86 compressor.write(data)
343 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
87 return output.getvalue()
344
345 def compress(self, data, allow_empty=False):
346 if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty:
347 raise ValueError('cannot write empty inputs when writing content sizes')
348
349 # TODO use a CDict for performance.
350 dict_data = ffi.NULL
351 dict_size = 0
352
353 if self._dict_data:
354 dict_data = self._dict_data.as_bytes()
355 dict_size = len(self._dict_data)
356
357 params = ffi.new('ZSTD_parameters *')[0]
358 if self._cparams:
359 params.cParams = self._cparams.as_compression_parameters()
360 else:
361 params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data),
362 dict_size)
363 params.fParams = self._fparams
364
365 dest_size = lib.ZSTD_compressBound(len(data))
366 out = new_nonzero('char[]', dest_size)
88
367
89 def copy_stream(self, ifh, ofh):
368 zresult = lib.ZSTD_compress_advanced(self._cctx,
90 cstream = self._get_cstream()
369 ffi.addressof(out), dest_size,
370 data, len(data),
371 dict_data, dict_size,
372 params)
373
374 if lib.ZSTD_isError(zresult):
375 raise ZstdError('cannot compress: %s' %
376 ffi.string(lib.ZSTD_getErrorName(zresult)))
377
378 return ffi.buffer(out, zresult)[:]
379
380 def compressobj(self, size=0):
381 cstream = self._get_cstream(size)
382 cobj = ZstdCompressionObj()
383 cobj._cstream = cstream
384 cobj._out = ffi.new('ZSTD_outBuffer *')
385 cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
386 cobj._out.dst = cobj._dst_buffer
387 cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
388 cobj._out.pos = 0
389 cobj._compressor = self
390 cobj._finished = False
391
392 return cobj
393
394 def copy_stream(self, ifh, ofh, size=0,
395 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
396 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
397
398 if not hasattr(ifh, 'read'):
399 raise ValueError('first argument must have a read() method')
400 if not hasattr(ofh, 'write'):
401 raise ValueError('second argument must have a write() method')
402
403 cstream = self._get_cstream(size)
91
404
92 in_buffer = ffi.new('ZSTD_inBuffer *')
405 in_buffer = ffi.new('ZSTD_inBuffer *')
93 out_buffer = ffi.new('ZSTD_outBuffer *')
406 out_buffer = ffi.new('ZSTD_outBuffer *')
94
407
95 out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
408 dst_buffer = ffi.new('char[]', write_size)
96 out_buffer.size = _CSTREAM_OUT_SIZE
409 out_buffer.dst = dst_buffer
410 out_buffer.size = write_size
97 out_buffer.pos = 0
411 out_buffer.pos = 0
98
412
99 total_read, total_write = 0, 0
413 total_read, total_write = 0, 0
100
414
101 while True:
415 while True:
102 data = ifh.read(_CSTREAM_IN_SIZE)
416 data = ifh.read(read_size)
103 if not data:
417 if not data:
104 break
418 break
105
419
106 total_read += len(data)
420 data_buffer = ffi.from_buffer(data)
107
421 total_read += len(data_buffer)
108 in_buffer.src = ffi.new('char[]', data)
422 in_buffer.src = data_buffer
109 in_buffer.size = len(data)
423 in_buffer.size = len(data_buffer)
110 in_buffer.pos = 0
424 in_buffer.pos = 0
111
425
112 while in_buffer.pos < in_buffer.size:
426 while in_buffer.pos < in_buffer.size:
113 res = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
427 zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
114 if lib.ZSTD_isError(res):
428 if lib.ZSTD_isError(zresult):
115 raise Exception('zstd compress error: %s' %
429 raise ZstdError('zstd compress error: %s' %
116 lib.ZSTD_getErrorName(res))
430 ffi.string(lib.ZSTD_getErrorName(zresult)))
117
431
118 if out_buffer.pos:
432 if out_buffer.pos:
119 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
433 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
120 total_write = out_buffer.pos
434 total_write += out_buffer.pos
121 out_buffer.pos = 0
435 out_buffer.pos = 0
122
436
123 # We've finished reading. Flush the compressor.
437 # We've finished reading. Flush the compressor.
124 while True:
438 while True:
125 res = lib.ZSTD_endStream(cstream, out_buffer)
439 zresult = lib.ZSTD_endStream(cstream, out_buffer)
126 if lib.ZSTD_isError(res):
440 if lib.ZSTD_isError(zresult):
127 raise Exception('error ending compression stream: %s' %
441 raise ZstdError('error ending compression stream: %s' %
128 lib.ZSTD_getErrorName(res))
442 ffi.string(lib.ZSTD_getErrorName(zresult)))
129
443
130 if out_buffer.pos:
444 if out_buffer.pos:
131 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
445 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
132 total_write += out_buffer.pos
446 total_write += out_buffer.pos
133 out_buffer.pos = 0
447 out_buffer.pos = 0
134
448
135 if res == 0:
449 if zresult == 0:
136 break
450 break
137
451
138 return total_read, total_write
452 return total_read, total_write
139
453
140 def write_to(self, writer):
454 def write_to(self, writer, size=0,
141 return _ZstdCompressionWriter(self._get_cstream(), writer)
455 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
456
457 if not hasattr(writer, 'write'):
458 raise ValueError('must pass an object with a write() method')
459
460 return ZstdCompressionWriter(self, writer, size, write_size)
461
462 def read_from(self, reader, size=0,
463 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
464 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
465 if hasattr(reader, 'read'):
466 have_read = True
467 elif hasattr(reader, '__getitem__'):
468 have_read = False
469 buffer_offset = 0
470 size = len(reader)
471 else:
472 raise ValueError('must pass an object with a read() method or '
473 'conforms to buffer protocol')
474
475 cstream = self._get_cstream(size)
476
477 in_buffer = ffi.new('ZSTD_inBuffer *')
478 out_buffer = ffi.new('ZSTD_outBuffer *')
479
480 in_buffer.src = ffi.NULL
481 in_buffer.size = 0
482 in_buffer.pos = 0
483
484 dst_buffer = ffi.new('char[]', write_size)
485 out_buffer.dst = dst_buffer
486 out_buffer.size = write_size
487 out_buffer.pos = 0
488
489 while True:
490 # We should never have output data sitting around after a previous
491 # iteration.
492 assert out_buffer.pos == 0
493
494 # Collect input data.
495 if have_read:
496 read_result = reader.read(read_size)
497 else:
498 remaining = len(reader) - buffer_offset
499 slice_size = min(remaining, read_size)
500 read_result = reader[buffer_offset:buffer_offset + slice_size]
501 buffer_offset += slice_size
142
502
143 def _get_cstream(self):
503 # No new input data. Break out of the read loop.
504 if not read_result:
505 break
506
507 # Feed all read data into the compressor and emit output until
508 # exhausted.
509 read_buffer = ffi.from_buffer(read_result)
510 in_buffer.src = read_buffer
511 in_buffer.size = len(read_buffer)
512 in_buffer.pos = 0
513
514 while in_buffer.pos < in_buffer.size:
515 zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
516 if lib.ZSTD_isError(zresult):
517 raise ZstdError('zstd compress error: %s' %
518 ffi.string(lib.ZSTD_getErrorName(zresult)))
519
520 if out_buffer.pos:
521 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
522 out_buffer.pos = 0
523 yield data
524
525 assert out_buffer.pos == 0
526
527 # And repeat the loop to collect more data.
528 continue
529
530 # If we get here, input is exhausted. End the stream and emit what
531 # remains.
532 while True:
533 assert out_buffer.pos == 0
534 zresult = lib.ZSTD_endStream(cstream, out_buffer)
535 if lib.ZSTD_isError(zresult):
536 raise ZstdError('error ending compression stream: %s' %
537 ffi.string(lib.ZSTD_getErrorName(zresult)))
538
539 if out_buffer.pos:
540 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
541 out_buffer.pos = 0
542 yield data
543
544 if zresult == 0:
545 break
546
547 def _get_cstream(self, size):
144 cstream = lib.ZSTD_createCStream()
548 cstream = lib.ZSTD_createCStream()
549 if cstream == ffi.NULL:
550 raise MemoryError()
551
145 cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
552 cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
146
553
147 res = lib.ZSTD_initCStream(cstream, self._compression_level)
554 dict_data = ffi.NULL
148 if lib.ZSTD_isError(res):
555 dict_size = 0
556 if self._dict_data:
557 dict_data = self._dict_data.as_bytes()
558 dict_size = len(self._dict_data)
559
560 zparams = ffi.new('ZSTD_parameters *')[0]
561 if self._cparams:
562 zparams.cParams = self._cparams.as_compression_parameters()
563 else:
564 zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
565 size, dict_size)
566 zparams.fParams = self._fparams
567
568 zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size,
569 zparams, size)
570 if lib.ZSTD_isError(zresult):
149 raise Exception('cannot init CStream: %s' %
571 raise Exception('cannot init CStream: %s' %
150 lib.ZSTD_getErrorName(res))
572 ffi.string(lib.ZSTD_getErrorName(zresult)))
151
573
152 return cstream
574 return cstream
575
576
577 class FrameParameters(object):
578 def __init__(self, fparams):
579 self.content_size = fparams.frameContentSize
580 self.window_size = fparams.windowSize
581 self.dict_id = fparams.dictID
582 self.has_checksum = bool(fparams.checksumFlag)
583
584
585 def get_frame_parameters(data):
586 if not isinstance(data, bytes_type):
587 raise TypeError('argument must be bytes')
588
589 params = ffi.new('ZSTD_frameParams *')
590
591 zresult = lib.ZSTD_getFrameParams(params, data, len(data))
592 if lib.ZSTD_isError(zresult):
593 raise ZstdError('cannot get frame parameters: %s' %
594 ffi.string(lib.ZSTD_getErrorName(zresult)))
595
596 if zresult:
597 raise ZstdError('not enough data for frame parameters; need %d bytes' %
598 zresult)
599
600 return FrameParameters(params[0])
601
602
603 class ZstdCompressionDict(object):
604 def __init__(self, data):
605 assert isinstance(data, bytes_type)
606 self._data = data
607
608 def __len__(self):
609 return len(self._data)
610
611 def dict_id(self):
612 return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
613
614 def as_bytes(self):
615 return self._data
616
617
618 def train_dictionary(dict_size, samples, parameters=None):
619 if not isinstance(samples, list):
620 raise TypeError('samples must be a list')
621
622 total_size = sum(map(len, samples))
623
624 samples_buffer = new_nonzero('char[]', total_size)
625 sample_sizes = new_nonzero('size_t[]', len(samples))
626
627 offset = 0
628 for i, sample in enumerate(samples):
629 if not isinstance(sample, bytes_type):
630 raise ValueError('samples must be bytes')
631
632 l = len(sample)
633 ffi.memmove(samples_buffer + offset, sample, l)
634 offset += l
635 sample_sizes[i] = l
636
637 dict_data = new_nonzero('char[]', dict_size)
638
639 zresult = lib.ZDICT_trainFromBuffer(ffi.addressof(dict_data), dict_size,
640 ffi.addressof(samples_buffer),
641 ffi.addressof(sample_sizes, 0),
642 len(samples))
643 if lib.ZDICT_isError(zresult):
644 raise ZstdError('Cannot train dict: %s' %
645 ffi.string(lib.ZDICT_getErrorName(zresult)))
646
647 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:])
648
649
650 class ZstdDecompressionObj(object):
651 def __init__(self, decompressor):
652 self._decompressor = decompressor
653 self._dstream = self._decompressor._get_dstream()
654 self._finished = False
655
656 def decompress(self, data):
657 if self._finished:
658 raise ZstdError('cannot use a decompressobj multiple times')
659
660 in_buffer = ffi.new('ZSTD_inBuffer *')
661 out_buffer = ffi.new('ZSTD_outBuffer *')
662
663 data_buffer = ffi.from_buffer(data)
664 in_buffer.src = data_buffer
665 in_buffer.size = len(data_buffer)
666 in_buffer.pos = 0
667
668 dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
669 out_buffer.dst = dst_buffer
670 out_buffer.size = len(dst_buffer)
671 out_buffer.pos = 0
672
673 chunks = []
674
675 while in_buffer.pos < in_buffer.size:
676 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
677 if lib.ZSTD_isError(zresult):
678 raise ZstdError('zstd decompressor error: %s' %
679 ffi.string(lib.ZSTD_getErrorName(zresult)))
680
681 if zresult == 0:
682 self._finished = True
683 self._dstream = None
684 self._decompressor = None
685
686 if out_buffer.pos:
687 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
688 out_buffer.pos = 0
689
690 return b''.join(chunks)
691
692
693 class ZstdDecompressionWriter(object):
694 def __init__(self, decompressor, writer, write_size):
695 self._decompressor = decompressor
696 self._writer = writer
697 self._write_size = write_size
698 self._dstream = None
699 self._entered = False
700
701 def __enter__(self):
702 if self._entered:
703 raise ZstdError('cannot __enter__ multiple times')
704
705 self._dstream = self._decompressor._get_dstream()
706 self._entered = True
707
708 return self
709
710 def __exit__(self, exc_type, exc_value, exc_tb):
711 self._entered = False
712 self._dstream = None
713
714 def memory_size(self):
715 if not self._dstream:
716 raise ZstdError('cannot determine size of inactive decompressor '
717 'call when context manager is active')
718
719 return lib.ZSTD_sizeof_DStream(self._dstream)
720
721 def write(self, data):
722 if not self._entered:
723 raise ZstdError('write must be called from an active context manager')
724
725 total_write = 0
726
727 in_buffer = ffi.new('ZSTD_inBuffer *')
728 out_buffer = ffi.new('ZSTD_outBuffer *')
729
730 data_buffer = ffi.from_buffer(data)
731 in_buffer.src = data_buffer
732 in_buffer.size = len(data_buffer)
733 in_buffer.pos = 0
734
735 dst_buffer = ffi.new('char[]', self._write_size)
736 out_buffer.dst = dst_buffer
737 out_buffer.size = len(dst_buffer)
738 out_buffer.pos = 0
739
740 while in_buffer.pos < in_buffer.size:
741 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
742 if lib.ZSTD_isError(zresult):
743 raise ZstdError('zstd decompress error: %s' %
744 ffi.string(lib.ZSTD_getErrorName(zresult)))
745
746 if out_buffer.pos:
747 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
748 total_write += out_buffer.pos
749 out_buffer.pos = 0
750
751 return total_write
752
753
754 class ZstdDecompressor(object):
755 def __init__(self, dict_data=None):
756 self._dict_data = dict_data
757
758 dctx = lib.ZSTD_createDCtx()
759 if dctx == ffi.NULL:
760 raise MemoryError()
761
762 self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
763
764 @property
765 def _ddict(self):
766 if self._dict_data:
767 dict_data = self._dict_data.as_bytes()
768 dict_size = len(self._dict_data)
769
770 ddict = lib.ZSTD_createDDict(dict_data, dict_size)
771 if ddict == ffi.NULL:
772 raise ZstdError('could not create decompression dict')
773 else:
774 ddict = None
775
776 self.__dict__['_ddict'] = ddict
777 return ddict
778
779 def decompress(self, data, max_output_size=0):
780 data_buffer = ffi.from_buffer(data)
781
782 orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx))
783 dctx = ffi.cast('ZSTD_DCtx *', orig_dctx)
784 lib.ZSTD_copyDCtx(dctx, self._refdctx)
785
786 ddict = self._ddict
787
788 output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer))
789 if output_size:
790 result_buffer = ffi.new('char[]', output_size)
791 result_size = output_size
792 else:
793 if not max_output_size:
794 raise ZstdError('input data invalid or missing content size '
795 'in frame header')
796
797 result_buffer = ffi.new('char[]', max_output_size)
798 result_size = max_output_size
799
800 if ddict:
801 zresult = lib.ZSTD_decompress_usingDDict(dctx,
802 result_buffer, result_size,
803 data_buffer, len(data_buffer),
804 ddict)
805 else:
806 zresult = lib.ZSTD_decompressDCtx(dctx,
807 result_buffer, result_size,
808 data_buffer, len(data_buffer))
809 if lib.ZSTD_isError(zresult):
810 raise ZstdError('decompression error: %s' %
811 ffi.string(lib.ZSTD_getErrorName(zresult)))
812 elif output_size and zresult != output_size:
813 raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
814 (zresult, output_size))
815
816 return ffi.buffer(result_buffer, zresult)[:]
817
818 def decompressobj(self):
819 return ZstdDecompressionObj(self)
820
821 def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
822 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
823 skip_bytes=0):
824 if skip_bytes >= read_size:
825 raise ValueError('skip_bytes must be smaller than read_size')
826
827 if hasattr(reader, 'read'):
828 have_read = True
829 elif hasattr(reader, '__getitem__'):
830 have_read = False
831 buffer_offset = 0
832 size = len(reader)
833 else:
834 raise ValueError('must pass an object with a read() method or '
835 'conforms to buffer protocol')
836
837 if skip_bytes:
838 if have_read:
839 reader.read(skip_bytes)
840 else:
841 if skip_bytes > size:
842 raise ValueError('skip_bytes larger than first input chunk')
843
844 buffer_offset = skip_bytes
845
846 dstream = self._get_dstream()
847
848 in_buffer = ffi.new('ZSTD_inBuffer *')
849 out_buffer = ffi.new('ZSTD_outBuffer *')
850
851 dst_buffer = ffi.new('char[]', write_size)
852 out_buffer.dst = dst_buffer
853 out_buffer.size = len(dst_buffer)
854 out_buffer.pos = 0
855
856 while True:
857 assert out_buffer.pos == 0
858
859 if have_read:
860 read_result = reader.read(read_size)
861 else:
862 remaining = size - buffer_offset
863 slice_size = min(remaining, read_size)
864 read_result = reader[buffer_offset:buffer_offset + slice_size]
865 buffer_offset += slice_size
866
867 # No new input. Break out of read loop.
868 if not read_result:
869 break
870
871 # Feed all read data into decompressor and emit output until
872 # exhausted.
873 read_buffer = ffi.from_buffer(read_result)
874 in_buffer.src = read_buffer
875 in_buffer.size = len(read_buffer)
876 in_buffer.pos = 0
877
878 while in_buffer.pos < in_buffer.size:
879 assert out_buffer.pos == 0
880
881 zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
882 if lib.ZSTD_isError(zresult):
883 raise ZstdError('zstd decompress error: %s' %
884 ffi.string(lib.ZSTD_getErrorName(zresult)))
885
886 if out_buffer.pos:
887 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
888 out_buffer.pos = 0
889 yield data
890
891 if zresult == 0:
892 return
893
894 # Repeat loop to collect more input data.
895 continue
896
897 # If we get here, input is exhausted.
898
899 def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
900 if not hasattr(writer, 'write'):
901 raise ValueError('must pass an object with a write() method')
902
903 return ZstdDecompressionWriter(self, writer, write_size)
904
905 def copy_stream(self, ifh, ofh,
906 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
907 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
908 if not hasattr(ifh, 'read'):
909 raise ValueError('first argument must have a read() method')
910 if not hasattr(ofh, 'write'):
911 raise ValueError('second argument must have a write() method')
912
913 dstream = self._get_dstream()
914
915 in_buffer = ffi.new('ZSTD_inBuffer *')
916 out_buffer = ffi.new('ZSTD_outBuffer *')
917
918 dst_buffer = ffi.new('char[]', write_size)
919 out_buffer.dst = dst_buffer
920 out_buffer.size = write_size
921 out_buffer.pos = 0
922
923 total_read, total_write = 0, 0
924
925 # Read all available input.
926 while True:
927 data = ifh.read(read_size)
928 if not data:
929 break
930
931 data_buffer = ffi.from_buffer(data)
932 total_read += len(data_buffer)
933 in_buffer.src = data_buffer
934 in_buffer.size = len(data_buffer)
935 in_buffer.pos = 0
936
937 # Flush all read data to output.
938 while in_buffer.pos < in_buffer.size:
939 zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
940 if lib.ZSTD_isError(zresult):
941 raise ZstdError('zstd decompressor error: %s' %
942 ffi.string(lib.ZSTD_getErrorName(zresult)))
943
944 if out_buffer.pos:
945 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
946 total_write += out_buffer.pos
947 out_buffer.pos = 0
948
949 # Continue loop to keep reading.
950
951 return total_read, total_write
952
953 def decompress_content_dict_chain(self, frames):
954 if not isinstance(frames, list):
955 raise TypeError('argument must be a list')
956
957 if not frames:
958 raise ValueError('empty input chain')
959
960 # First chunk should not be using a dictionary. We handle it specially.
961 chunk = frames[0]
962 if not isinstance(chunk, bytes_type):
963 raise ValueError('chunk 0 must be bytes')
964
965 # All chunks should be zstd frames and should have content size set.
966 chunk_buffer = ffi.from_buffer(chunk)
967 params = ffi.new('ZSTD_frameParams *')
968 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
969 if lib.ZSTD_isError(zresult):
970 raise ValueError('chunk 0 is not a valid zstd frame')
971 elif zresult:
972 raise ValueError('chunk 0 is too small to contain a zstd frame')
973
974 if not params.frameContentSize:
975 raise ValueError('chunk 0 missing content size in frame')
976
977 dctx = lib.ZSTD_createDCtx()
978 if dctx == ffi.NULL:
979 raise MemoryError()
980
981 dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
982
983 last_buffer = ffi.new('char[]', params.frameContentSize)
984
985 zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer),
986 chunk_buffer, len(chunk_buffer))
987 if lib.ZSTD_isError(zresult):
988 raise ZstdError('could not decompress chunk 0: %s' %
989 ffi.string(lib.ZSTD_getErrorName(zresult)))
990
991 # Special case of chain length of 1
992 if len(frames) == 1:
993 return ffi.buffer(last_buffer, len(last_buffer))[:]
994
995 i = 1
996 while i < len(frames):
997 chunk = frames[i]
998 if not isinstance(chunk, bytes_type):
999 raise ValueError('chunk %d must be bytes' % i)
1000
1001 chunk_buffer = ffi.from_buffer(chunk)
1002 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
1003 if lib.ZSTD_isError(zresult):
1004 raise ValueError('chunk %d is not a valid zstd frame' % i)
1005 elif zresult:
1006 raise ValueError('chunk %d is too small to contain a zstd frame' % i)
1007
1008 if not params.frameContentSize:
1009 raise ValueError('chunk %d missing content size in frame' % i)
1010
1011 dest_buffer = ffi.new('char[]', params.frameContentSize)
1012
1013 zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer),
1014 chunk_buffer, len(chunk_buffer),
1015 last_buffer, len(last_buffer))
1016 if lib.ZSTD_isError(zresult):
1017 raise ZstdError('could not decompress chunk %d' % i)
1018
1019 last_buffer = dest_buffer
1020 i += 1
1021
1022 return ffi.buffer(last_buffer, len(last_buffer))[:]
1023
1024 def _get_dstream(self):
1025 dstream = lib.ZSTD_createDStream()
1026 if dstream == ffi.NULL:
1027 raise MemoryError()
1028
1029 dstream = ffi.gc(dstream, lib.ZSTD_freeDStream)
1030
1031 if self._dict_data:
1032 zresult = lib.ZSTD_initDStream_usingDict(dstream,
1033 self._dict_data.as_bytes(),
1034 len(self._dict_data))
1035 else:
1036 zresult = lib.ZSTD_initDStream(dstream)
1037
1038 if lib.ZSTD_isError(zresult):
1039 raise ZstdError('could not initialize DStream: %s' %
1040 ffi.string(lib.ZSTD_getErrorName(zresult)))
1041
1042 return dstream
@@ -7,7 +7,6 b''
7 contrib/python-zstandard/setup.py not using absolute_import
7 contrib/python-zstandard/setup.py not using absolute_import
8 contrib/python-zstandard/setup_zstd.py not using absolute_import
8 contrib/python-zstandard/setup_zstd.py not using absolute_import
9 contrib/python-zstandard/tests/common.py not using absolute_import
9 contrib/python-zstandard/tests/common.py not using absolute_import
10 contrib/python-zstandard/tests/test_cffi.py not using absolute_import
11 contrib/python-zstandard/tests/test_compressor.py not using absolute_import
10 contrib/python-zstandard/tests/test_compressor.py not using absolute_import
12 contrib/python-zstandard/tests/test_data_structures.py not using absolute_import
11 contrib/python-zstandard/tests/test_data_structures.py not using absolute_import
13 contrib/python-zstandard/tests/test_decompressor.py not using absolute_import
12 contrib/python-zstandard/tests/test_decompressor.py not using absolute_import
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now