##// END OF EJS Templates
zstd: vendor python-zstandard 0.7.0...
Gregory Szorc -
r30895:c32454d6 default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,132 b''
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(FrameParameters__doc__,
14 "FrameParameters: information about a zstd frame");
15
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
17 const char* source;
18 Py_ssize_t sourceSize;
19 ZSTD_frameParams params;
20 FrameParametersObject* result = NULL;
21 size_t zresult;
22
23 #if PY_MAJOR_VERSION >= 3
24 if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
25 #else
26 if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
27 #endif
28 &source, &sourceSize)) {
29 return NULL;
30 }
31
32 /* Needed for Python 2 to reject unicode */
33 if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
34 PyErr_SetString(PyExc_TypeError, "argument must be bytes");
35 return NULL;
36 }
37
38 zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
39
40 if (ZSTD_isError(zresult)) {
41 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
42 return NULL;
43 }
44
45 if (zresult) {
46 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
47 return NULL;
48 }
49
50 result = PyObject_New(FrameParametersObject, &FrameParametersType);
51 if (!result) {
52 return NULL;
53 }
54
55 result->frameContentSize = params.frameContentSize;
56 result->windowSize = params.windowSize;
57 result->dictID = params.dictID;
58 result->checksumFlag = params.checksumFlag ? 1 : 0;
59
60 return result;
61 }
62
63 static void FrameParameters_dealloc(PyObject* self) {
64 PyObject_Del(self);
65 }
66
67 static PyMemberDef FrameParameters_members[] = {
68 { "content_size", T_ULONGLONG,
69 offsetof(FrameParametersObject, frameContentSize), READONLY,
70 "frame content size" },
71 { "window_size", T_UINT,
72 offsetof(FrameParametersObject, windowSize), READONLY,
73 "window size" },
74 { "dict_id", T_UINT,
75 offsetof(FrameParametersObject, dictID), READONLY,
76 "dictionary ID" },
77 { "has_checksum", T_BOOL,
78 offsetof(FrameParametersObject, checksumFlag), READONLY,
79 "checksum flag" },
80 { NULL }
81 };
82
83 PyTypeObject FrameParametersType = {
84 PyVarObject_HEAD_INIT(NULL, 0)
85 "FrameParameters", /* tp_name */
86 sizeof(FrameParametersObject), /* tp_basicsize */
87 0, /* tp_itemsize */
88 (destructor)FrameParameters_dealloc, /* tp_dealloc */
89 0, /* tp_print */
90 0, /* tp_getattr */
91 0, /* tp_setattr */
92 0, /* tp_compare */
93 0, /* tp_repr */
94 0, /* tp_as_number */
95 0, /* tp_as_sequence */
96 0, /* tp_as_mapping */
97 0, /* tp_hash */
98 0, /* tp_call */
99 0, /* tp_str */
100 0, /* tp_getattro */
101 0, /* tp_setattro */
102 0, /* tp_as_buffer */
103 Py_TPFLAGS_DEFAULT, /* tp_flags */
104 FrameParameters__doc__, /* tp_doc */
105 0, /* tp_traverse */
106 0, /* tp_clear */
107 0, /* tp_richcompare */
108 0, /* tp_weaklistoffset */
109 0, /* tp_iter */
110 0, /* tp_iternext */
111 0, /* tp_methods */
112 FrameParameters_members, /* tp_members */
113 0, /* tp_getset */
114 0, /* tp_base */
115 0, /* tp_dict */
116 0, /* tp_descr_get */
117 0, /* tp_descr_set */
118 0, /* tp_dictoffset */
119 0, /* tp_init */
120 0, /* tp_alloc */
121 0, /* tp_new */
122 };
123
124 void frameparams_module_init(PyObject* mod) {
125 Py_TYPE(&FrameParametersType) = &PyType_Type;
126 if (PyType_Ready(&FrameParametersType) < 0) {
127 return;
128 }
129
130 Py_IncRef((PyObject*)&FrameParametersType);
131 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
132 }
@@ -0,0 +1,194 b''
1 /**
2 * Copyright (c) 2016-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /* ====== Dependencies ======= */
12 #include <stddef.h> /* size_t */
13 #include <stdlib.h> /* malloc, calloc, free */
14 #include "pool.h"
15
16 /* ====== Compiler specifics ====== */
17 #if defined(_MSC_VER)
18 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
19 #endif
20
21
22 #ifdef ZSTD_MULTITHREAD
23
24 #include "threading.h" /* pthread adaptation */
25
26 /* A job is a function and an opaque argument */
27 typedef struct POOL_job_s {
28 POOL_function function;
29 void *opaque;
30 } POOL_job;
31
32 struct POOL_ctx_s {
33 /* Keep track of the threads */
34 pthread_t *threads;
35 size_t numThreads;
36
37 /* The queue is a circular buffer */
38 POOL_job *queue;
39 size_t queueHead;
40 size_t queueTail;
41 size_t queueSize;
42 /* The mutex protects the queue */
43 pthread_mutex_t queueMutex;
44 /* Condition variable for pushers to wait on when the queue is full */
45 pthread_cond_t queuePushCond;
46 /* Condition variables for poppers to wait on when the queue is empty */
47 pthread_cond_t queuePopCond;
48 /* Indicates if the queue is shutting down */
49 int shutdown;
50 };
51
52 /* POOL_thread() :
53 Work thread for the thread pool.
54 Waits for jobs and executes them.
55 @returns : NULL on failure else non-null.
56 */
57 static void* POOL_thread(void* opaque) {
58 POOL_ctx* const ctx = (POOL_ctx*)opaque;
59 if (!ctx) { return NULL; }
60 for (;;) {
61 /* Lock the mutex and wait for a non-empty queue or until shutdown */
62 pthread_mutex_lock(&ctx->queueMutex);
63 while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
64 pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
65 }
66 /* empty => shutting down: so stop */
67 if (ctx->queueHead == ctx->queueTail) {
68 pthread_mutex_unlock(&ctx->queueMutex);
69 return opaque;
70 }
71 /* Pop a job off the queue */
72 { POOL_job const job = ctx->queue[ctx->queueHead];
73 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
74 /* Unlock the mutex, signal a pusher, and run the job */
75 pthread_mutex_unlock(&ctx->queueMutex);
76 pthread_cond_signal(&ctx->queuePushCond);
77 job.function(job.opaque);
78 }
79 }
80 /* Unreachable */
81 }
82
83 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
84 POOL_ctx *ctx;
85 /* Check the parameters */
86 if (!numThreads || !queueSize) { return NULL; }
87 /* Allocate the context and zero initialize */
88 ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
89 if (!ctx) { return NULL; }
90 /* Initialize the job queue.
91 * It needs one extra space since one space is wasted to differentiate empty
92 * and full queues.
93 */
94 ctx->queueSize = queueSize + 1;
95 ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
96 ctx->queueHead = 0;
97 ctx->queueTail = 0;
98 pthread_mutex_init(&ctx->queueMutex, NULL);
99 pthread_cond_init(&ctx->queuePushCond, NULL);
100 pthread_cond_init(&ctx->queuePopCond, NULL);
101 ctx->shutdown = 0;
102 /* Allocate space for the thread handles */
103 ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
104 ctx->numThreads = 0;
105 /* Check for errors */
106 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
107 /* Initialize the threads */
108 { size_t i;
109 for (i = 0; i < numThreads; ++i) {
110 if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
111 ctx->numThreads = i;
112 POOL_free(ctx);
113 return NULL;
114 } }
115 ctx->numThreads = numThreads;
116 }
117 return ctx;
118 }
119
120 /*! POOL_join() :
121 Shutdown the queue, wake any sleeping threads, and join all of the threads.
122 */
123 static void POOL_join(POOL_ctx *ctx) {
124 /* Shut down the queue */
125 pthread_mutex_lock(&ctx->queueMutex);
126 ctx->shutdown = 1;
127 pthread_mutex_unlock(&ctx->queueMutex);
128 /* Wake up sleeping threads */
129 pthread_cond_broadcast(&ctx->queuePushCond);
130 pthread_cond_broadcast(&ctx->queuePopCond);
131 /* Join all of the threads */
132 { size_t i;
133 for (i = 0; i < ctx->numThreads; ++i) {
134 pthread_join(ctx->threads[i], NULL);
135 } }
136 }
137
138 void POOL_free(POOL_ctx *ctx) {
139 if (!ctx) { return; }
140 POOL_join(ctx);
141 pthread_mutex_destroy(&ctx->queueMutex);
142 pthread_cond_destroy(&ctx->queuePushCond);
143 pthread_cond_destroy(&ctx->queuePopCond);
144 if (ctx->queue) free(ctx->queue);
145 if (ctx->threads) free(ctx->threads);
146 free(ctx);
147 }
148
149 void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
150 POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
151 if (!ctx) { return; }
152
153 pthread_mutex_lock(&ctx->queueMutex);
154 { POOL_job const job = {function, opaque};
155 /* Wait until there is space in the queue for the new job */
156 size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
157 while (ctx->queueHead == newTail && !ctx->shutdown) {
158 pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
159 newTail = (ctx->queueTail + 1) % ctx->queueSize;
160 }
161 /* The queue is still going => there is space */
162 if (!ctx->shutdown) {
163 ctx->queue[ctx->queueTail] = job;
164 ctx->queueTail = newTail;
165 }
166 }
167 pthread_mutex_unlock(&ctx->queueMutex);
168 pthread_cond_signal(&ctx->queuePopCond);
169 }
170
171 #else /* ZSTD_MULTITHREAD not defined */
172 /* No multi-threading support */
173
174 /* We don't need any data, but if it is empty malloc() might return NULL. */
175 struct POOL_ctx_s {
176 int data;
177 };
178
179 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
180 (void)numThreads;
181 (void)queueSize;
182 return (POOL_ctx *)malloc(sizeof(POOL_ctx));
183 }
184
185 void POOL_free(POOL_ctx *ctx) {
186 if (ctx) free(ctx);
187 }
188
189 void POOL_add(void *ctx, POOL_function function, void *opaque) {
190 (void)ctx;
191 function(opaque);
192 }
193
194 #endif /* ZSTD_MULTITHREAD */
@@ -0,0 +1,56 b''
1 /**
2 * Copyright (c) 2016-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9 #ifndef POOL_H
10 #define POOL_H
11
12 #if defined (__cplusplus)
13 extern "C" {
14 #endif
15
16
17 #include <stddef.h> /* size_t */
18
19 typedef struct POOL_ctx_s POOL_ctx;
20
21 /*! POOL_create() :
22 Create a thread pool with at most `numThreads` threads.
23 `numThreads` must be at least 1.
24 The maximum number of queued jobs before blocking is `queueSize`.
25 `queueSize` must be at least 1.
26 @return : The POOL_ctx pointer on success else NULL.
27 */
28 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
29
30 /*! POOL_free() :
31 Free a thread pool returned by POOL_create().
32 */
33 void POOL_free(POOL_ctx *ctx);
34
35 /*! POOL_function :
36 The function type that can be added to a thread pool.
37 */
38 typedef void (*POOL_function)(void *);
39 /*! POOL_add_function :
40 The function type for a generic thread pool add function.
41 */
42 typedef void (*POOL_add_function)(void *, POOL_function, void *);
43
44 /*! POOL_add() :
45 Add the job `function(opaque)` to the thread pool.
46 Possibly blocks until there is room in the queue.
47 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
48 */
49 void POOL_add(void *ctx, POOL_function function, void *opaque);
50
51
52 #if defined (__cplusplus)
53 }
54 #endif
55
56 #endif
@@ -0,0 +1,79 b''
1
2 /**
3 * Copyright (c) 2016 Tino Reichardt
4 * All rights reserved.
5 *
6 * This source code is licensed under the BSD-style license found in the
7 * LICENSE file in the root directory of this source tree. An additional grant
8 * of patent rights can be found in the PATENTS file in the same directory.
9 *
10 * You can contact the author at:
11 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
12 */
13
14 /**
15 * This file will hold wrapper for systems, which do not support pthreads
16 */
17
18 /* ====== Compiler specifics ====== */
19 #if defined(_MSC_VER)
20 # pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */
21 #endif
22
23
24 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
25
26 /**
27 * Windows minimalist Pthread Wrapper, based on :
28 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
29 */
30
31
32 /* === Dependencies === */
33 #include <process.h>
34 #include <errno.h>
35 #include "threading.h"
36
37
38 /* === Implementation === */
39
40 static unsigned __stdcall worker(void *arg)
41 {
42 pthread_t* const thread = (pthread_t*) arg;
43 thread->arg = thread->start_routine(thread->arg);
44 return 0;
45 }
46
47 int pthread_create(pthread_t* thread, const void* unused,
48 void* (*start_routine) (void*), void* arg)
49 {
50 (void)unused;
51 thread->arg = arg;
52 thread->start_routine = start_routine;
53 thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
54
55 if (!thread->handle)
56 return errno;
57 else
58 return 0;
59 }
60
61 int _pthread_join(pthread_t * thread, void **value_ptr)
62 {
63 DWORD result;
64
65 if (!thread->handle) return 0;
66
67 result = WaitForSingleObject(thread->handle, INFINITE);
68 switch (result) {
69 case WAIT_OBJECT_0:
70 if (value_ptr) *value_ptr = thread->arg;
71 return 0;
72 case WAIT_ABANDONED:
73 return EINVAL;
74 default:
75 return GetLastError();
76 }
77 }
78
79 #endif /* ZSTD_MULTITHREAD */
@@ -0,0 +1,104 b''
1
2 /**
3 * Copyright (c) 2016 Tino Reichardt
4 * All rights reserved.
5 *
6 * This source code is licensed under the BSD-style license found in the
7 * LICENSE file in the root directory of this source tree. An additional grant
8 * of patent rights can be found in the PATENTS file in the same directory.
9 *
10 * You can contact the author at:
11 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
12 */
13
14 #ifndef THREADING_H_938743
15 #define THREADING_H_938743
16
17 #if defined (__cplusplus)
18 extern "C" {
19 #endif
20
21 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
22
23 /**
24 * Windows minimalist Pthread Wrapper, based on :
25 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
26 */
27 #ifdef WINVER
28 # undef WINVER
29 #endif
30 #define WINVER 0x0600
31
32 #ifdef _WIN32_WINNT
33 # undef _WIN32_WINNT
34 #endif
35 #define _WIN32_WINNT 0x0600
36
37 #ifndef WIN32_LEAN_AND_MEAN
38 # define WIN32_LEAN_AND_MEAN
39 #endif
40
41 #include <windows.h>
42
43 /* mutex */
44 #define pthread_mutex_t CRITICAL_SECTION
45 #define pthread_mutex_init(a,b) InitializeCriticalSection((a))
46 #define pthread_mutex_destroy(a) DeleteCriticalSection((a))
47 #define pthread_mutex_lock(a) EnterCriticalSection((a))
48 #define pthread_mutex_unlock(a) LeaveCriticalSection((a))
49
50 /* condition variable */
51 #define pthread_cond_t CONDITION_VARIABLE
52 #define pthread_cond_init(a, b) InitializeConditionVariable((a))
53 #define pthread_cond_destroy(a) /* No delete */
54 #define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
55 #define pthread_cond_signal(a) WakeConditionVariable((a))
56 #define pthread_cond_broadcast(a) WakeAllConditionVariable((a))
57
58 /* pthread_create() and pthread_join() */
59 typedef struct {
60 HANDLE handle;
61 void* (*start_routine)(void*);
62 void* arg;
63 } pthread_t;
64
65 int pthread_create(pthread_t* thread, const void* unused,
66 void* (*start_routine) (void*), void* arg);
67
68 #define pthread_join(a, b) _pthread_join(&(a), (b))
69 int _pthread_join(pthread_t* thread, void** value_ptr);
70
71 /**
72 * add here more wrappers as required
73 */
74
75
76 #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection mathod */
77 /* === POSIX Systems === */
78 # include <pthread.h>
79
80 #else /* ZSTD_MULTITHREAD not defined */
81 /* No multithreading support */
82
83 #define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
84 #define pthread_mutex_init(a,b)
85 #define pthread_mutex_destroy(a)
86 #define pthread_mutex_lock(a)
87 #define pthread_mutex_unlock(a)
88
89 #define pthread_cond_t int
90 #define pthread_cond_init(a,b)
91 #define pthread_cond_destroy(a)
92 #define pthread_cond_wait(a,b)
93 #define pthread_cond_signal(a)
94 #define pthread_cond_broadcast(a)
95
96 /* do not use pthread_t */
97
98 #endif /* ZSTD_MULTITHREAD */
99
100 #if defined (__cplusplus)
101 }
102 #endif
103
104 #endif /* THREADING_H_938743 */
This diff has been collapsed as it changes many lines, (740 lines changed) Show them Hide them
@@ -0,0 +1,740 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /* ====== Tuning parameters ====== */
12 #define ZSTDMT_NBTHREADS_MAX 128
13
14
15 /* ====== Compiler specifics ====== */
16 #if defined(_MSC_VER)
17 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
18 #endif
19
20
21 /* ====== Dependencies ====== */
22 #include <stdlib.h> /* malloc */
23 #include <string.h> /* memcpy */
24 #include "pool.h" /* threadpool */
25 #include "threading.h" /* mutex */
26 #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27 #include "zstdmt_compress.h"
28 #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
29 #include "xxhash.h"
30
31
32 /* ====== Debug ====== */
33 #if 0
34
35 # include <stdio.h>
36 # include <unistd.h>
37 # include <sys/times.h>
38 static unsigned g_debugLevel = 3;
39 # define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
40 # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
41
42 # define DEBUG_PRINTHEX(l,p,n) { \
43 unsigned debug_u; \
44 for (debug_u=0; debug_u<(n); debug_u++) \
45 DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
46 DEBUGLOGRAW(l, " \n"); \
47 }
48
49 static unsigned long long GetCurrentClockTimeMicroseconds()
50 {
51 static clock_t _ticksPerSecond = 0;
52 if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
53
54 struct tms junk; clock_t newTicks = (clock_t) times(&junk);
55 return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
56 }
57
58 #define MUTEX_WAIT_TIME_DLEVEL 5
59 #define PTHREAD_MUTEX_LOCK(mutex) \
60 if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
61 unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \
62 pthread_mutex_lock(mutex); \
63 unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
64 unsigned long long elapsedTime = (afterTime-beforeTime); \
65 if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
66 DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
67 elapsedTime, #mutex); \
68 } \
69 } else pthread_mutex_lock(mutex);
70
71 #else
72
73 # define DEBUGLOG(l, ...) {} /* disabled */
74 # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
75 # define DEBUG_PRINTHEX(l,p,n) {}
76
77 #endif
78
79
80 /* ===== Buffer Pool ===== */
81
82 typedef struct buffer_s {
83 void* start;
84 size_t size;
85 } buffer_t;
86
87 static const buffer_t g_nullBuffer = { NULL, 0 };
88
89 typedef struct ZSTDMT_bufferPool_s {
90 unsigned totalBuffers;
91 unsigned nbBuffers;
92 buffer_t bTable[1]; /* variable size */
93 } ZSTDMT_bufferPool;
94
95 static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
96 {
97 unsigned const maxNbBuffers = 2*nbThreads + 2;
98 ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
99 if (bufPool==NULL) return NULL;
100 bufPool->totalBuffers = maxNbBuffers;
101 bufPool->nbBuffers = 0;
102 return bufPool;
103 }
104
105 static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
106 {
107 unsigned u;
108 if (!bufPool) return; /* compatibility with free on NULL */
109 for (u=0; u<bufPool->totalBuffers; u++)
110 free(bufPool->bTable[u].start);
111 free(bufPool);
112 }
113
114 /* assumption : invocation from main thread only ! */
115 static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
116 {
117 if (pool->nbBuffers) { /* try to use an existing buffer */
118 buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
119 size_t const availBufferSize = buf.size;
120 if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */
121 return buf;
122 free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
123 }
124 /* create new buffer */
125 { buffer_t buffer;
126 void* const start = malloc(bSize);
127 if (start==NULL) bSize = 0;
128 buffer.start = start; /* note : start can be NULL if malloc fails ! */
129 buffer.size = bSize;
130 return buffer;
131 }
132 }
133
134 /* store buffer for later re-use, up to pool capacity */
135 static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
136 {
137 if (buf.start == NULL) return; /* release on NULL */
138 if (pool->nbBuffers < pool->totalBuffers) {
139 pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
140 return;
141 }
142 /* Reached bufferPool capacity (should not happen) */
143 free(buf.start);
144 }
145
146
147 /* ===== CCtx Pool ===== */
148
149 typedef struct {
150 unsigned totalCCtx;
151 unsigned availCCtx;
152 ZSTD_CCtx* cctx[1]; /* variable size */
153 } ZSTDMT_CCtxPool;
154
155 /* assumption : CCtxPool invocation only from main thread */
156
157 /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
158 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
159 {
160 unsigned u;
161 for (u=0; u<pool->totalCCtx; u++)
162 ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
163 free(pool);
164 }
165
166 /* ZSTDMT_createCCtxPool() :
167 * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
168 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
169 {
170 ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
171 if (!cctxPool) return NULL;
172 cctxPool->totalCCtx = nbThreads;
173 cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
174 cctxPool->cctx[0] = ZSTD_createCCtx();
175 if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
176 DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
177 return cctxPool;
178 }
179
180 static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
181 {
182 if (pool->availCCtx) {
183 pool->availCCtx--;
184 return pool->cctx[pool->availCCtx];
185 }
186 return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
187 }
188
189 static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
190 {
191 if (cctx==NULL) return; /* compatibility with release on NULL */
192 if (pool->availCCtx < pool->totalCCtx)
193 pool->cctx[pool->availCCtx++] = cctx;
194 else
195 /* pool overflow : should not happen, since totalCCtx==nbThreads */
196 ZSTD_freeCCtx(cctx);
197 }
198
199
200 /* ===== Thread worker ===== */
201
202 typedef struct {
203 buffer_t buffer;
204 size_t filled;
205 } inBuff_t;
206
207 typedef struct {
208 ZSTD_CCtx* cctx;
209 buffer_t src;
210 const void* srcStart;
211 size_t srcSize;
212 size_t dictSize;
213 buffer_t dstBuff;
214 size_t cSize;
215 size_t dstFlushed;
216 unsigned firstChunk;
217 unsigned lastChunk;
218 unsigned jobCompleted;
219 unsigned jobScanned;
220 pthread_mutex_t* jobCompleted_mutex;
221 pthread_cond_t* jobCompleted_cond;
222 ZSTD_parameters params;
223 ZSTD_CDict* cdict;
224 unsigned long long fullFrameSize;
225 } ZSTDMT_jobDescription;
226
227 /* ZSTDMT_compressChunk() : POOL_function type */
228 void ZSTDMT_compressChunk(void* jobDescription)
229 {
230 ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
231 const void* const src = (const char*)job->srcStart + job->dictSize;
232 buffer_t const dstBuff = job->dstBuff;
233 DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
234 if (job->cdict) {
235 size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
236 if (job->cdict) DEBUGLOG(3, "using CDict ");
237 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
238 } else {
239 size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
240 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
241 ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
242 }
243 if (!job->firstChunk) { /* flush frame header */
244 size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
245 if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
246 ZSTD_invalidateRepCodes(job->cctx);
247 }
248
249 DEBUGLOG(4, "Compressing : ");
250 DEBUG_PRINTHEX(4, job->srcStart, 12);
251 job->cSize = (job->lastChunk) ? /* last chunk signal */
252 ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
253 ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
254 DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
255
256 _endJob:
257 PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
258 job->jobCompleted = 1;
259 job->jobScanned = 0;
260 pthread_cond_signal(job->jobCompleted_cond);
261 pthread_mutex_unlock(job->jobCompleted_mutex);
262 }
263
264
265 /* ------------------------------------------ */
266 /* ===== Multi-threaded compression ===== */
267 /* ------------------------------------------ */
268
269 struct ZSTDMT_CCtx_s {
270 POOL_ctx* factory;
271 ZSTDMT_bufferPool* buffPool;
272 ZSTDMT_CCtxPool* cctxPool;
273 pthread_mutex_t jobCompleted_mutex;
274 pthread_cond_t jobCompleted_cond;
275 size_t targetSectionSize;
276 size_t marginSize;
277 size_t inBuffSize;
278 size_t dictSize;
279 size_t targetDictSize;
280 inBuff_t inBuff;
281 ZSTD_parameters params;
282 XXH64_state_t xxhState;
283 unsigned nbThreads;
284 unsigned jobIDMask;
285 unsigned doneJobID;
286 unsigned nextJobID;
287 unsigned frameEnded;
288 unsigned allJobsCompleted;
289 unsigned overlapRLog;
290 unsigned long long frameContentSize;
291 size_t sectionSize;
292 ZSTD_CDict* cdict;
293 ZSTD_CStream* cstream;
294 ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
295 };
296
297 ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
298 {
299 ZSTDMT_CCtx* cctx;
300 U32 const minNbJobs = nbThreads + 2;
301 U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
302 U32 const nbJobs = 1 << nbJobsLog2;
303 DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
304 nbThreads, minNbJobs, nbJobsLog2, nbJobs);
305 if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
306 cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
307 if (!cctx) return NULL;
308 cctx->nbThreads = nbThreads;
309 cctx->jobIDMask = nbJobs - 1;
310 cctx->allJobsCompleted = 1;
311 cctx->sectionSize = 0;
312 cctx->overlapRLog = 3;
313 cctx->factory = POOL_create(nbThreads, 1);
314 cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
315 cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
316 if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */
317 ZSTDMT_freeCCtx(cctx);
318 return NULL;
319 }
320 if (nbThreads==1) {
321 cctx->cstream = ZSTD_createCStream();
322 if (!cctx->cstream) {
323 ZSTDMT_freeCCtx(cctx); return NULL;
324 } }
325 pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
326 pthread_cond_init(&cctx->jobCompleted_cond, NULL);
327 DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
328 return cctx;
329 }
330
331 /* ZSTDMT_releaseAllJobResources() :
332 * Ensure all workers are killed first. */
333 static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
334 {
335 unsigned jobID;
336 for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
337 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
338 mtctx->jobs[jobID].dstBuff = g_nullBuffer;
339 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
340 mtctx->jobs[jobID].src = g_nullBuffer;
341 ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
342 mtctx->jobs[jobID].cctx = NULL;
343 }
344 memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
345 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
346 mtctx->inBuff.buffer = g_nullBuffer;
347 mtctx->allJobsCompleted = 1;
348 }
349
350 size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
351 {
352 if (mtctx==NULL) return 0; /* compatible with free on NULL */
353 POOL_free(mtctx->factory);
354 if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
355 ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
356 ZSTDMT_freeCCtxPool(mtctx->cctxPool);
357 ZSTD_freeCDict(mtctx->cdict);
358 ZSTD_freeCStream(mtctx->cstream);
359 pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
360 pthread_cond_destroy(&mtctx->jobCompleted_cond);
361 free(mtctx);
362 return 0;
363 }
364
365 size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
366 {
367 switch(parameter)
368 {
369 case ZSTDMT_p_sectionSize :
370 mtctx->sectionSize = value;
371 return 0;
372 case ZSTDMT_p_overlapSectionLog :
373 DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
374 mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
375 return 0;
376 default :
377 return ERROR(compressionParameter_unsupported);
378 }
379 }
380
381
382 /* ------------------------------------------ */
383 /* ===== Multi-threaded compression ===== */
384 /* ------------------------------------------ */
385
386 size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
387 void* dst, size_t dstCapacity,
388 const void* src, size_t srcSize,
389 int compressionLevel)
390 {
391 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
392 size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
393 unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */;
394 unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
395 size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
396 size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
397 size_t remainingSrcSize = srcSize;
398 const char* const srcStart = (const char*)src;
399 size_t frameStartPos = 0;
400
401 DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
402 DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
403 params.fParams.contentSizeFlag = 1;
404
405 if (nbChunks==1) { /* fallback to single-thread mode */
406 ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
407 return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
408 }
409
410 { unsigned u;
411 for (u=0; u<nbChunks; u++) {
412 size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
413 size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity;
414 buffer_t const dstAsBuffer = { dst, dstCapacity };
415 buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer;
416 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
417
418 if ((cctx==NULL) || (dstBuffer.start==NULL)) {
419 mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
420 mtctx->jobs[u].jobCompleted = 1;
421 nbChunks = u+1;
422 break; /* let's wait for previous jobs to complete, but don't start new ones */
423 }
424
425 mtctx->jobs[u].srcStart = srcStart + frameStartPos;
426 mtctx->jobs[u].srcSize = chunkSize;
427 mtctx->jobs[u].fullFrameSize = srcSize;
428 mtctx->jobs[u].params = params;
429 mtctx->jobs[u].dstBuff = dstBuffer;
430 mtctx->jobs[u].cctx = cctx;
431 mtctx->jobs[u].firstChunk = (u==0);
432 mtctx->jobs[u].lastChunk = (u==nbChunks-1);
433 mtctx->jobs[u].jobCompleted = 0;
434 mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
435 mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
436
437 DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
438 DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
439 POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
440
441 frameStartPos += chunkSize;
442 remainingSrcSize -= chunkSize;
443 } }
444 /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
445
446 { unsigned chunkID;
447 size_t error = 0, dstPos = 0;
448 for (chunkID=0; chunkID<nbChunks; chunkID++) {
449 DEBUGLOG(3, "waiting for chunk %u ", chunkID);
450 PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
451 while (mtctx->jobs[chunkID].jobCompleted==0) {
452 DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
453 pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
454 }
455 pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
456 DEBUGLOG(3, "ready to write chunk %u ", chunkID);
457
458 ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
459 mtctx->jobs[chunkID].cctx = NULL;
460 mtctx->jobs[chunkID].srcStart = NULL;
461 { size_t const cSize = mtctx->jobs[chunkID].cSize;
462 if (ZSTD_isError(cSize)) error = cSize;
463 if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
464 if (chunkID) { /* note : chunk 0 is already written directly into dst */
465 if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);
466 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
467 mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
468 }
469 dstPos += cSize ;
470 }
471 }
472 if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
473 return error ? error : dstPos;
474 }
475
476 }
477
478
479 /* ====================================== */
480 /* ======= Streaming API ======= */
481 /* ====================================== */
482
483 static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
484 while (zcs->doneJobID < zcs->nextJobID) {
485 unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
486 PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
487 while (zcs->jobs[jobID].jobCompleted==0) {
488 DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
489 pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
490 }
491 pthread_mutex_unlock(&zcs->jobCompleted_mutex);
492 zcs->doneJobID++;
493 }
494 }
495
496
497 static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
498 const void* dict, size_t dictSize, unsigned updateDict,
499 ZSTD_parameters params, unsigned long long pledgedSrcSize)
500 {
501 ZSTD_customMem const cmem = { NULL, NULL, NULL };
502 DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
503 if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
504 if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
505 ZSTDMT_waitForAllJobsCompleted(zcs);
506 ZSTDMT_releaseAllJobResources(zcs);
507 zcs->allJobsCompleted = 1;
508 }
509 zcs->params = params;
510 if (updateDict) {
511 ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
512 if (dict && dictSize) {
513 zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem);
514 if (zcs->cdict == NULL) return ERROR(memory_allocation);
515 } }
516 zcs->frameContentSize = pledgedSrcSize;
517 zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
518 DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
519 DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
520 zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
521 zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
522 zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
523 DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
524 zcs->marginSize = zcs->targetSectionSize >> 2;
525 zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
526 zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
527 if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
528 zcs->inBuff.filled = 0;
529 zcs->dictSize = 0;
530 zcs->doneJobID = 0;
531 zcs->nextJobID = 0;
532 zcs->frameEnded = 0;
533 zcs->allJobsCompleted = 0;
534 if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
535 return 0;
536 }
537
538 size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
539 const void* dict, size_t dictSize,
540 ZSTD_parameters params, unsigned long long pledgedSrcSize)
541 {
542 return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
543 }
544
545 /* ZSTDMT_resetCStream() :
546 * pledgedSrcSize is optional and can be zero == unknown */
547 size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
548 {
549 if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
550 return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
551 }
552
553 size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
554 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
555 return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
556 }
557
558
559 static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
560 {
561 size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
562 buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
563 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
564 unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
565
566 if ((cctx==NULL) || (dstBuffer.start==NULL)) {
567 zcs->jobs[jobID].jobCompleted = 1;
568 zcs->nextJobID++;
569 ZSTDMT_waitForAllJobsCompleted(zcs);
570 ZSTDMT_releaseAllJobResources(zcs);
571 return ERROR(memory_allocation);
572 }
573
574 DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
575 zcs->jobs[jobID].src = zcs->inBuff.buffer;
576 zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
577 zcs->jobs[jobID].srcSize = srcSize;
578 zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
579 zcs->jobs[jobID].params = zcs->params;
580 if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
581 zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
582 zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
583 zcs->jobs[jobID].dstBuff = dstBuffer;
584 zcs->jobs[jobID].cctx = cctx;
585 zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
586 zcs->jobs[jobID].lastChunk = endFrame;
587 zcs->jobs[jobID].jobCompleted = 0;
588 zcs->jobs[jobID].dstFlushed = 0;
589 zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
590 zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
591
592 /* get a new buffer for next input */
593 if (!endFrame) {
594 size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
595 zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
596 if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
597 zcs->jobs[jobID].jobCompleted = 1;
598 zcs->nextJobID++;
599 ZSTDMT_waitForAllJobsCompleted(zcs);
600 ZSTDMT_releaseAllJobResources(zcs);
601 return ERROR(memory_allocation);
602 }
603 DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
604 zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
605 DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
606 memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
607 DEBUGLOG(5, "new inBuff pre-filled");
608 zcs->dictSize = newDictSize;
609 } else {
610 zcs->inBuff.buffer = g_nullBuffer;
611 zcs->inBuff.filled = 0;
612 zcs->dictSize = 0;
613 zcs->frameEnded = 1;
614 if (zcs->nextJobID == 0)
615 zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
616 }
617
618 DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
619 POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
620 zcs->nextJobID++;
621 return 0;
622 }
623
624
625 /* ZSTDMT_flushNextJob() :
626 * output : will be updated with amount of data flushed .
627 * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
628 * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
629 static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
630 {
631 unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
632 if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
633 PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
634 while (zcs->jobs[wJobID].jobCompleted==0) {
635 DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
636 if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
637 pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
638 }
639 pthread_mutex_unlock(&zcs->jobCompleted_mutex);
640 /* compression job completed : output can be flushed */
641 { ZSTDMT_jobDescription job = zcs->jobs[wJobID];
642 if (!job.jobScanned) {
643 if (ZSTD_isError(job.cSize)) {
644 DEBUGLOG(5, "compression error detected ");
645 ZSTDMT_waitForAllJobsCompleted(zcs);
646 ZSTDMT_releaseAllJobResources(zcs);
647 return job.cSize;
648 }
649 ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
650 zcs->jobs[wJobID].cctx = NULL;
651 DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
652 if (zcs->params.fParams.checksumFlag) {
653 XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
654 if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
655 U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
656 DEBUGLOG(4, "writing checksum : %08X \n", checksum);
657 MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
658 job.cSize += 4;
659 zcs->jobs[wJobID].cSize += 4;
660 } }
661 ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
662 zcs->jobs[wJobID].srcStart = NULL;
663 zcs->jobs[wJobID].src = g_nullBuffer;
664 zcs->jobs[wJobID].jobScanned = 1;
665 }
666 { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
667 DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
668 memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
669 output->pos += toWrite;
670 job.dstFlushed += toWrite;
671 }
672 if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
673 ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
674 zcs->jobs[wJobID].dstBuff = g_nullBuffer;
675 zcs->jobs[wJobID].jobCompleted = 0;
676 zcs->doneJobID++;
677 } else {
678 zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
679 }
680 /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
681 if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
682 if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
683 zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
684 return 0; /* everything flushed */
685 } }
686
687
688 size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
689 {
690 size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
691 if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
692 if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
693
694 /* fill input buffer */
695 { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
696 memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
697 input->pos += toLoad;
698 zcs->inBuff.filled += toLoad;
699 }
700
701 if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
702 && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
703 CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
704 }
705
706 /* check for data to flush */
707 CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
708
709 /* recommended next input size : fill current input buffer */
710 return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
711 }
712
713
714 static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
715 {
716 size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
717
718 if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
719 if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
720 && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
721 CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
722 }
723
724 /* check if there is any data available to flush */
725 DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
726 return ZSTDMT_flushNextJob(zcs, output, 1);
727 }
728
729
730 size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
731 {
732 if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
733 return ZSTDMT_flushStream_internal(zcs, output, 0);
734 }
735
736 size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
737 {
738 if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
739 return ZSTDMT_flushStream_internal(zcs, output, 1);
740 }
@@ -0,0 +1,78 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 #ifndef ZSTDMT_COMPRESS_H
11 #define ZSTDMT_COMPRESS_H
12
13 #if defined (__cplusplus)
14 extern "C" {
15 #endif
16
17
18 /* Note : All prototypes defined in this file shall be considered experimental.
19 * There is no guarantee of API continuity (yet) on any of these prototypes */
20
21 /* === Dependencies === */
22 #include <stddef.h> /* size_t */
23 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
24 #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
25
26
27 /* === Simple one-pass functions === */
28
29 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
30 ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
31 ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx);
32
33 ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx,
34 void* dst, size_t dstCapacity,
35 const void* src, size_t srcSize,
36 int compressionLevel);
37
38
39 /* === Streaming functions === */
40
41 ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
42 ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
43
44 ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
45
46 ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
47 ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
48
49
50 /* === Advanced functions and parameters === */
51
52 #ifndef ZSTDMT_SECTION_SIZE_MIN
53 # define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
54 #endif
55
56 ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */
57 ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
58
59 /* ZSDTMT_parameter :
60 * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
61 typedef enum {
62 ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
63 ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
64 } ZSDTMT_parameter;
65
66 /* ZSTDMT_setMTCtxParameter() :
67 * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
68 * The function must be called typically after ZSTD_createCCtx().
69 * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
70 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
71 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
72
73
74 #if defined (__cplusplus)
75 }
76 #endif
77
78 #endif /* ZSTDMT_COMPRESS_H */
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -1,90 +1,117 b''
1 Version History
1 Version History
2 ===============
2 ===============
3
3
4 0.7.0 (released 2017-02-07)
5 ---------------------------
6
7 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
8 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
9 decompression of *content-only dictionary chains*.
10 * CFFI module fully implemented; all tests run against both C extension and
11 CFFI implementation.
12 * Vendored version of zstd updated to 1.1.3.
13 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
14 to avoid extra memory allocation of dict data.
15 * Add function names to error messages (by using ":name" in PyArg_Parse*
16 functions).
17 * Reuse decompression context across operations. Previously, we created a
18 new ZSTD_DCtx for each decompress(). This was measured to slow down
19 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
20 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
21 things faster in the process.
22 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
23 of bytes written.
24 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
25 written to the underlying output object.
26 * CompressionParameters instances now expose their values as attributes.
27 * CompressionParameters instances no longer are subscriptable nor behave
28 as tuples (backwards incompatible). Use attributes to obtain values.
29 * DictParameters instances now expose their values as attributes.
30
4 0.6.0 (released 2017-01-14)
31 0.6.0 (released 2017-01-14)
5 ---------------------------
32 ---------------------------
6
33
7 * Support for legacy zstd protocols (build time opt in feature).
34 * Support for legacy zstd protocols (build time opt in feature).
8 * Automation improvements to test against Python 3.6, latest versions
35 * Automation improvements to test against Python 3.6, latest versions
9 of Tox, more deterministic AppVeyor behavior.
36 of Tox, more deterministic AppVeyor behavior.
10 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
37 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
11 source code manually.
38 source code manually.
12 * Vendored version of zstd updated to 1.1.2.
39 * Vendored version of zstd updated to 1.1.2.
13 * Documentation improvements.
40 * Documentation improvements.
14 * Introduce a bench.py script for performing (crude) benchmarks.
41 * Introduce a bench.py script for performing (crude) benchmarks.
15 * ZSTD_CCtx instances are now reused across multiple compress() operations.
42 * ZSTD_CCtx instances are now reused across multiple compress() operations.
16 * ZstdCompressor.write_to() now has a flush() method.
43 * ZstdCompressor.write_to() now has a flush() method.
17 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
44 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
18 flush a block (as opposed to ending the stream).
45 flush a block (as opposed to ending the stream).
19 * Disallow compress(b'') when writing content sizes by default (issue #11).
46 * Disallow compress(b'') when writing content sizes by default (issue #11).
20
47
21 0.5.2 (released 2016-11-12)
48 0.5.2 (released 2016-11-12)
22 ---------------------------
49 ---------------------------
23
50
24 * more packaging fixes for source distribution
51 * more packaging fixes for source distribution
25
52
26 0.5.1 (released 2016-11-12)
53 0.5.1 (released 2016-11-12)
27 ---------------------------
54 ---------------------------
28
55
29 * setup_zstd.py is included in the source distribution
56 * setup_zstd.py is included in the source distribution
30
57
31 0.5.0 (released 2016-11-10)
58 0.5.0 (released 2016-11-10)
32 ---------------------------
59 ---------------------------
33
60
34 * Vendored version of zstd updated to 1.1.1.
61 * Vendored version of zstd updated to 1.1.1.
35 * Continuous integration for Python 3.6 and 3.7
62 * Continuous integration for Python 3.6 and 3.7
36 * Continuous integration for Conda
63 * Continuous integration for Conda
37 * Added compression and decompression APIs providing similar interfaces
64 * Added compression and decompression APIs providing similar interfaces
38 to the standard library ``zlib`` and ``bz2`` modules. This allows
65 to the standard library ``zlib`` and ``bz2`` modules. This allows
39 coding to a common interface.
66 coding to a common interface.
40 * ``zstd.__version__` is now defined.
67 * ``zstd.__version__` is now defined.
41 * ``read_from()`` on various APIs now accepts objects implementing the buffer
68 * ``read_from()`` on various APIs now accepts objects implementing the buffer
42 protocol.
69 protocol.
43 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
70 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
44 to pass in an existing buffer with a header without having to create a
71 to pass in an existing buffer with a header without having to create a
45 slice or a new object.
72 slice or a new object.
46 * Implemented ``ZstdCompressionDict.as_bytes()``.
73 * Implemented ``ZstdCompressionDict.as_bytes()``.
47 * Python's memory allocator is now used instead of ``malloc()``.
74 * Python's memory allocator is now used instead of ``malloc()``.
48 * Low-level zstd data structures are reused in more instances, cutting down
75 * Low-level zstd data structures are reused in more instances, cutting down
49 on overhead for certain operations.
76 on overhead for certain operations.
50 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
77 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
51 has now been refactored into a standalone ``setup_zstd.py`` file. This
78 has now been refactored into a standalone ``setup_zstd.py`` file. This
52 allows other projects with ``setup.py`` files to reuse the
79 allows other projects with ``setup.py`` files to reuse the
53 ``distutils`` code for this project without copying code.
80 ``distutils`` code for this project without copying code.
54 * The monolithic ``zstd.c`` file has been split into a header file defining
81 * The monolithic ``zstd.c`` file has been split into a header file defining
55 types and separate ``.c`` source files for the implementation.
82 types and separate ``.c`` source files for the implementation.
56
83
57 History of the Project
84 History of the Project
58 ======================
85 ======================
59
86
60 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
87 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
61 Python extension for use by the Mercurial project. A very hacky prototype
88 Python extension for use by the Mercurial project. A very hacky prototype
62 is sent to the mercurial-devel list for RFC.
89 is sent to the mercurial-devel list for RFC.
63
90
64 2016-09-03 - Most functionality from Zstandard C API implemented. Source
91 2016-09-03 - Most functionality from Zstandard C API implemented. Source
65 code published on https://github.com/indygreg/python-zstandard. Travis-CI
92 code published on https://github.com/indygreg/python-zstandard. Travis-CI
66 automation configured. 0.0.1 release on PyPI.
93 automation configured. 0.0.1 release on PyPI.
67
94
68 2016-09-05 - After the API was rounded out a bit and support for Python
95 2016-09-05 - After the API was rounded out a bit and support for Python
69 2.6 and 2.7 was added, version 0.1 was released to PyPI.
96 2.6 and 2.7 was added, version 0.1 was released to PyPI.
70
97
71 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
98 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
72 was released to PyPI.
99 was released to PyPI.
73
100
74 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
101 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
75 now accepts arguments controlling frame parameters. The source size can now
102 now accepts arguments controlling frame parameters. The source size can now
76 be declared when performing streaming compression. ZstdDecompressor.decompress()
103 be declared when performing streaming compression. ZstdDecompressor.decompress()
77 is implemented. Compression dictionaries are now cached when using the simple
104 is implemented. Compression dictionaries are now cached when using the simple
78 compression and decompression APIs. Memory size APIs added.
105 compression and decompression APIs. Memory size APIs added.
79 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
106 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
80 implemented. This rounds out the major compression/decompression APIs planned
107 implemented. This rounds out the major compression/decompression APIs planned
81 by the author.
108 by the author.
82
109
83 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
110 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
84 decoding a zstd frame (issue #2).
111 decoding a zstd frame (issue #2).
85
112
86 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
113 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
87 write buffer sizes, and a few bug fixes involving failure to read/write
114 write buffer sizes, and a few bug fixes involving failure to read/write
88 all data when buffer sizes were too small to hold remaining data.
115 all data when buffer sizes were too small to hold remaining data.
89
116
90 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
117 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
@@ -1,829 +1,943 b''
1 ================
1 ================
2 python-zstandard
2 python-zstandard
3 ================
3 ================
4
4
5 This project provides Python bindings for interfacing with the
5 This project provides Python bindings for interfacing with the
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 and CFFI interface is provided.
7 and CFFI interface are provided.
8
8
9 The primary goal of the extension is to provide a Pythonic interface to
9 The primary goal of the project is to provide a rich interface to the
10 the underlying C API. This means exposing most of the features and flexibility
10 underlying C API through a Pythonic interface while not sacrificing
11 performance. This means exposing most of the features and flexibility
11 of the C API while not sacrificing usability or safety that Python provides.
12 of the C API while not sacrificing usability or safety that Python provides.
12
13
13 The canonical home for this project is
14 The canonical home for this project is
14 https://github.com/indygreg/python-zstandard.
15 https://github.com/indygreg/python-zstandard.
15
16
16 | |ci-status| |win-ci-status|
17 | |ci-status| |win-ci-status|
17
18
18 State of Project
19 State of Project
19 ================
20 ================
20
21
21 The project is officially in beta state. The author is reasonably satisfied
22 The project is officially in beta state. The author is reasonably satisfied
22 with the current API and that functionality works as advertised. There
23 with the current API and that functionality works as advertised. There
23 may be some backwards incompatible changes before 1.0. Though the author
24 may be some backwards incompatible changes before 1.0. Though the author
24 does not intend to make any major changes to the Python API.
25 does not intend to make any major changes to the Python API.
25
26
27 This project is vendored and distributed with Mercurial 4.1, where it is
28 used in a production capacity.
29
26 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
30 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
27 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
31 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
28 confident the extension is stable and works as advertised on these
32 confident the extension is stable and works as advertised on these
29 platforms.
33 platforms.
30
34
31 Expected Changes
35 Expected Changes
32 ----------------
36 ----------------
33
37
34 The author is reasonably confident in the current state of what's
38 The author is reasonably confident in the current state of what's
35 implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types.
39 implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types.
36 Those APIs likely won't change significantly. Some low-level behavior
40 Those APIs likely won't change significantly. Some low-level behavior
37 (such as naming and types expected by arguments) may change.
41 (such as naming and types expected by arguments) may change.
38
42
39 There will likely be arguments added to control the input and output
43 There will likely be arguments added to control the input and output
40 buffer sizes (currently, certain operations read and write in chunk
44 buffer sizes (currently, certain operations read and write in chunk
41 sizes using zstd's preferred defaults).
45 sizes using zstd's preferred defaults).
42
46
43 There should be an API that accepts an object that conforms to the buffer
47 There should be an API that accepts an object that conforms to the buffer
44 interface and returns an iterator over compressed or decompressed output.
48 interface and returns an iterator over compressed or decompressed output.
45
49
46 The author is on the fence as to whether to support the extremely
50 The author is on the fence as to whether to support the extremely
47 low level compression and decompression APIs. It could be useful to
51 low level compression and decompression APIs. It could be useful to
48 support compression without the framing headers. But the author doesn't
52 support compression without the framing headers. But the author doesn't
49 believe it a high priority at this time.
53 believe it a high priority at this time.
50
54
51 The CFFI bindings are half-baked and need to be finished.
55 The CFFI bindings are feature complete and all tests run against both
56 the C extension and CFFI bindings to ensure behavior parity.
52
57
53 Requirements
58 Requirements
54 ============
59 ============
55
60
56 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, and 3.5
61 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
57 on common platforms (Linux, Windows, and OS X). Only x86_64 is currently
62 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
58 well-tested as an architecture.
63 currently well-tested as an architecture.
59
64
60 Installing
65 Installing
61 ==========
66 ==========
62
67
63 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
68 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
64 So, to install this package::
69 So, to install this package::
65
70
66 $ pip install zstandard
71 $ pip install zstandard
67
72
68 Binary wheels are made available for some platforms. If you need to
73 Binary wheels are made available for some platforms. If you need to
69 install from a source distribution, all you should need is a working C
74 install from a source distribution, all you should need is a working C
70 compiler and the Python development headers/libraries. On many Linux
75 compiler and the Python development headers/libraries. On many Linux
71 distributions, you can install a ``python-dev`` or ``python-devel``
76 distributions, you can install a ``python-dev`` or ``python-devel``
72 package to provide these dependencies.
77 package to provide these dependencies.
73
78
74 Packages are also uploaded to Anaconda Cloud at
79 Packages are also uploaded to Anaconda Cloud at
75 https://anaconda.org/indygreg/zstandard. See that URL for how to install
80 https://anaconda.org/indygreg/zstandard. See that URL for how to install
76 this package with ``conda``.
81 this package with ``conda``.
77
82
78 Performance
83 Performance
79 ===========
84 ===========
80
85
81 Very crude and non-scientific benchmarking (most benchmarks fall in this
86 Very crude and non-scientific benchmarking (most benchmarks fall in this
82 category because proper benchmarking is hard) show that the Python bindings
87 category because proper benchmarking is hard) show that the Python bindings
83 perform within 10% of the native C implementation.
88 perform within 10% of the native C implementation.
84
89
85 The following table compares the performance of compressing and decompressing
90 The following table compares the performance of compressing and decompressing
86 a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values
91 a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values
87 obtained with the ``zstd`` program are on the left. The remaining columns detail
92 obtained with the ``zstd`` program are on the left. The remaining columns detail
88 performance of various compression APIs in the Python bindings.
93 performance of various compression APIs in the Python bindings.
89
94
90 +-------+-----------------+-----------------+-----------------+---------------+
95 +-------+-----------------+-----------------+-----------------+---------------+
91 | Level | Native | Simple | Stream In | Stream Out |
96 | Level | Native | Simple | Stream In | Stream Out |
92 | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp |
97 | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp |
93 +=======+=================+=================+=================+===============+
98 +=======+=================+=================+=================+===============+
94 | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s |
99 | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s |
95 +-------+-----------------+-----------------+-----------------+---------------+
100 +-------+-----------------+-----------------+-----------------+---------------+
96 | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s |
101 | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s |
97 +-------+-----------------+-----------------+-----------------+---------------+
102 +-------+-----------------+-----------------+-----------------+---------------+
98 | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s |
103 | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s |
99 +-------+-----------------+-----------------+-----------------+---------------+
104 +-------+-----------------+-----------------+-----------------+---------------+
100 | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s |
105 | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s |
101 +-------+-----------------+-----------------+-----------------+---------------+
106 +-------+-----------------+-----------------+-----------------+---------------+
102
107
103 Again, these are very unscientific. But it shows that Python is capable of
108 Again, these are very unscientific. But it shows that Python is capable of
104 compressing at several hundred MB/s and decompressing at over 1 GB/s.
109 compressing at several hundred MB/s and decompressing at over 1 GB/s.
105
110
106 Comparison to Other Python Bindings
111 Comparison to Other Python Bindings
107 ===================================
112 ===================================
108
113
109 https://pypi.python.org/pypi/zstd is an alternative Python binding to
114 https://pypi.python.org/pypi/zstd is an alternate Python binding to
110 Zstandard. At the time this was written, the latest release of that
115 Zstandard. At the time this was written, the latest release of that
111 package (1.0.0.2) had the following significant differences from this package:
116 package (1.1.2) only exposed the simple APIs for compression and decompression.
112
117 This package exposes much more of the zstd API, including streaming and
113 * It only exposes the simple API for compression and decompression operations.
118 dictionary compression. This package also has CFFI support.
114 This extension exposes the streaming API, dictionary training, and more.
115 * It adds a custom framing header to compressed data and there is no way to
116 disable it. This means that data produced with that module cannot be used by
117 other Zstandard implementations.
118
119
119 Bundling of Zstandard Source Code
120 Bundling of Zstandard Source Code
120 =================================
121 =================================
121
122
122 The source repository for this project contains a vendored copy of the
123 The source repository for this project contains a vendored copy of the
123 Zstandard source code. This is done for a few reasons.
124 Zstandard source code. This is done for a few reasons.
124
125
125 First, Zstandard is relatively new and not yet widely available as a system
126 First, Zstandard is relatively new and not yet widely available as a system
126 package. Providing a copy of the source code enables the Python C extension
127 package. Providing a copy of the source code enables the Python C extension
127 to be compiled without requiring the user to obtain the Zstandard source code
128 to be compiled without requiring the user to obtain the Zstandard source code
128 separately.
129 separately.
129
130
130 Second, Zstandard has both a stable *public* API and an *experimental* API.
131 Second, Zstandard has both a stable *public* API and an *experimental* API.
131 The *experimental* API is actually quite useful (contains functionality for
132 The *experimental* API is actually quite useful (contains functionality for
132 training dictionaries for example), so it is something we wish to expose to
133 training dictionaries for example), so it is something we wish to expose to
133 Python. However, the *experimental* API is only available via static linking.
134 Python. However, the *experimental* API is only available via static linking.
134 Furthermore, the *experimental* API can change at any time. So, control over
135 Furthermore, the *experimental* API can change at any time. So, control over
135 the exact version of the Zstandard library linked against is important to
136 the exact version of the Zstandard library linked against is important to
136 ensure known behavior.
137 ensure known behavior.
137
138
138 Instructions for Building and Testing
139 Instructions for Building and Testing
139 =====================================
140 =====================================
140
141
141 Once you have the source code, the extension can be built via setup.py::
142 Once you have the source code, the extension can be built via setup.py::
142
143
143 $ python setup.py build_ext
144 $ python setup.py build_ext
144
145
145 We recommend testing with ``nose``::
146 We recommend testing with ``nose``::
146
147
147 $ nosetests
148 $ nosetests
148
149
149 A Tox configuration is present to test against multiple Python versions::
150 A Tox configuration is present to test against multiple Python versions::
150
151
151 $ tox
152 $ tox
152
153
153 Tests use the ``hypothesis`` Python package to perform fuzzing. If you
154 Tests use the ``hypothesis`` Python package to perform fuzzing. If you
154 don't have it, those tests won't run.
155 don't have it, those tests won't run.
155
156
156 There is also an experimental CFFI module. You need the ``cffi`` Python
157 There is also an experimental CFFI module. You need the ``cffi`` Python
157 package installed to build and test that.
158 package installed to build and test that.
158
159
159 To create a virtualenv with all development dependencies, do something
160 To create a virtualenv with all development dependencies, do something
160 like the following::
161 like the following::
161
162
162 # Python 2
163 # Python 2
163 $ virtualenv venv
164 $ virtualenv venv
164
165
165 # Python 3
166 # Python 3
166 $ python3 -m venv venv
167 $ python3 -m venv venv
167
168
168 $ source venv/bin/activate
169 $ source venv/bin/activate
169 $ pip install cffi hypothesis nose tox
170 $ pip install cffi hypothesis nose tox
170
171
171 API
172 API
172 ===
173 ===
173
174
174 The compiled C extension provides a ``zstd`` Python module. This module
175 The compiled C extension provides a ``zstd`` Python module. This module
175 exposes the following interfaces.
176 exposes the following interfaces.
176
177
177 ZstdCompressor
178 ZstdCompressor
178 --------------
179 --------------
179
180
180 The ``ZstdCompressor`` class provides an interface for performing
181 The ``ZstdCompressor`` class provides an interface for performing
181 compression operations.
182 compression operations.
182
183
183 Each instance is associated with parameters that control compression
184 Each instance is associated with parameters that control compression
184 behavior. These come from the following named arguments (all optional):
185 behavior. These come from the following named arguments (all optional):
185
186
186 level
187 level
187 Integer compression level. Valid values are between 1 and 22.
188 Integer compression level. Valid values are between 1 and 22.
188 dict_data
189 dict_data
189 Compression dictionary to use.
190 Compression dictionary to use.
190
191
191 Note: When using dictionary data and ``compress()`` is called multiple
192 Note: When using dictionary data and ``compress()`` is called multiple
192 times, the ``CompressionParameters`` derived from an integer compression
193 times, the ``CompressionParameters`` derived from an integer compression
193 ``level`` and the first compressed data's size will be reused for all
194 ``level`` and the first compressed data's size will be reused for all
194 subsequent operations. This may not be desirable if source data size
195 subsequent operations. This may not be desirable if source data size
195 varies significantly.
196 varies significantly.
196 compression_params
197 compression_params
197 A ``CompressionParameters`` instance (overrides the ``level`` value).
198 A ``CompressionParameters`` instance (overrides the ``level`` value).
198 write_checksum
199 write_checksum
199 Whether a 4 byte checksum should be written with the compressed data.
200 Whether a 4 byte checksum should be written with the compressed data.
200 Defaults to False. If True, the decompressor can verify that decompressed
201 Defaults to False. If True, the decompressor can verify that decompressed
201 data matches the original input data.
202 data matches the original input data.
202 write_content_size
203 write_content_size
203 Whether the size of the uncompressed data will be written into the
204 Whether the size of the uncompressed data will be written into the
204 header of compressed data. Defaults to False. The data will only be
205 header of compressed data. Defaults to False. The data will only be
205 written if the compressor knows the size of the input data. This is
206 written if the compressor knows the size of the input data. This is
206 likely not true for streaming compression.
207 likely not true for streaming compression.
207 write_dict_id
208 write_dict_id
208 Whether to write the dictionary ID into the compressed data.
209 Whether to write the dictionary ID into the compressed data.
209 Defaults to True. The dictionary ID is only written if a dictionary
210 Defaults to True. The dictionary ID is only written if a dictionary
210 is being used.
211 is being used.
211
212
212 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
213 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
213 instances can be called from multiple Python threads simultaneously. In other
214 instances can be called from multiple Python threads simultaneously. In other
214 words, assume instances are not thread safe unless stated otherwise.
215 words, assume instances are not thread safe unless stated otherwise.
215
216
216 Simple API
217 Simple API
217 ^^^^^^^^^^
218 ^^^^^^^^^^
218
219
219 ``compress(data)`` compresses and returns data as a one-shot operation.::
220 ``compress(data)`` compresses and returns data as a one-shot operation.::
220
221
221 cctx = zstd.ZstdCompressor()
222 cctx = zstd.ZstdCompressor()
222 compressed = cctx.compress(b'data to compress')
223 compressed = cctx.compress(b'data to compress')
223
224
224 Unless ``compression_params`` or ``dict_data`` are passed to the
225 Unless ``compression_params`` or ``dict_data`` are passed to the
225 ``ZstdCompressor``, each invocation of ``compress()`` will calculate the
226 ``ZstdCompressor``, each invocation of ``compress()`` will calculate the
226 optimal compression parameters for the configured compression ``level`` and
227 optimal compression parameters for the configured compression ``level`` and
227 input data size (some parameters are fine-tuned for small input sizes).
228 input data size (some parameters are fine-tuned for small input sizes).
228
229
229 If a compression dictionary is being used, the compression parameters
230 If a compression dictionary is being used, the compression parameters
230 determined from the first input's size will be reused for subsequent
231 determined from the first input's size will be reused for subsequent
231 operations.
232 operations.
232
233
233 There is currently a deficiency in zstd's C APIs that makes it difficult
234 There is currently a deficiency in zstd's C APIs that makes it difficult
234 to round trip empty inputs when ``write_content_size=True``. Attempting
235 to round trip empty inputs when ``write_content_size=True``. Attempting
235 this will raise a ``ValueError`` unless ``allow_empty=True`` is passed
236 this will raise a ``ValueError`` unless ``allow_empty=True`` is passed
236 to ``compress()``.
237 to ``compress()``.
237
238
238 Streaming Input API
239 Streaming Input API
239 ^^^^^^^^^^^^^^^^^^^
240 ^^^^^^^^^^^^^^^^^^^
240
241
241 ``write_to(fh)`` (which behaves as a context manager) allows you to *stream*
242 ``write_to(fh)`` (which behaves as a context manager) allows you to *stream*
242 data into a compressor.::
243 data into a compressor.::
243
244
244 cctx = zstd.ZstdCompressor(level=10)
245 cctx = zstd.ZstdCompressor(level=10)
245 with cctx.write_to(fh) as compressor:
246 with cctx.write_to(fh) as compressor:
246 compressor.write(b'chunk 0')
247 compressor.write(b'chunk 0')
247 compressor.write(b'chunk 1')
248 compressor.write(b'chunk 1')
248 ...
249 ...
249
250
250 The argument to ``write_to()`` must have a ``write(data)`` method. As
251 The argument to ``write_to()`` must have a ``write(data)`` method. As
251 compressed data is available, ``write()`` will be called with the compressed
252 compressed data is available, ``write()`` will be called with the compressed
252 data as its argument. Many common Python types implement ``write()``, including
253 data as its argument. Many common Python types implement ``write()``, including
253 open file handles and ``io.BytesIO``.
254 open file handles and ``io.BytesIO``.
254
255
255 ``write_to()`` returns an object representing a streaming compressor instance.
256 ``write_to()`` returns an object representing a streaming compressor instance.
256 It **must** be used as a context manager. That object's ``write(data)`` method
257 It **must** be used as a context manager. That object's ``write(data)`` method
257 is used to feed data into the compressor.
258 is used to feed data into the compressor.
258
259
259 A ``flush()`` method can be called to evict whatever data remains within the
260 A ``flush()`` method can be called to evict whatever data remains within the
260 compressor's internal state into the output object. This may result in 0 or
261 compressor's internal state into the output object. This may result in 0 or
261 more ``write()`` calls to the output object.
262 more ``write()`` calls to the output object.
262
263
264 Both ``write()`` and ``flush()`` return the number of bytes written to the
265 object's ``write()``. In many cases, small inputs do not accumulate enough
266 data to cause a write and ``write()`` will return ``0``.
267
263 If the size of the data being fed to this streaming compressor is known,
268 If the size of the data being fed to this streaming compressor is known,
264 you can declare it before compression begins::
269 you can declare it before compression begins::
265
270
266 cctx = zstd.ZstdCompressor()
271 cctx = zstd.ZstdCompressor()
267 with cctx.write_to(fh, size=data_len) as compressor:
272 with cctx.write_to(fh, size=data_len) as compressor:
268 compressor.write(chunk0)
273 compressor.write(chunk0)
269 compressor.write(chunk1)
274 compressor.write(chunk1)
270 ...
275 ...
271
276
272 Declaring the size of the source data allows compression parameters to
277 Declaring the size of the source data allows compression parameters to
273 be tuned. And if ``write_content_size`` is used, it also results in the
278 be tuned. And if ``write_content_size`` is used, it also results in the
274 content size being written into the frame header of the output data.
279 content size being written into the frame header of the output data.
275
280
276 The size of chunks being ``write()`` to the destination can be specified::
281 The size of chunks being ``write()`` to the destination can be specified::
277
282
278 cctx = zstd.ZstdCompressor()
283 cctx = zstd.ZstdCompressor()
279 with cctx.write_to(fh, write_size=32768) as compressor:
284 with cctx.write_to(fh, write_size=32768) as compressor:
280 ...
285 ...
281
286
282 To see how much memory is being used by the streaming compressor::
287 To see how much memory is being used by the streaming compressor::
283
288
284 cctx = zstd.ZstdCompressor()
289 cctx = zstd.ZstdCompressor()
285 with cctx.write_to(fh) as compressor:
290 with cctx.write_to(fh) as compressor:
286 ...
291 ...
287 byte_size = compressor.memory_size()
292 byte_size = compressor.memory_size()
288
293
289 Streaming Output API
294 Streaming Output API
290 ^^^^^^^^^^^^^^^^^^^^
295 ^^^^^^^^^^^^^^^^^^^^
291
296
292 ``read_from(reader)`` provides a mechanism to stream data out of a compressor
297 ``read_from(reader)`` provides a mechanism to stream data out of a compressor
293 as an iterator of data chunks.::
298 as an iterator of data chunks.::
294
299
295 cctx = zstd.ZstdCompressor()
300 cctx = zstd.ZstdCompressor()
296 for chunk in cctx.read_from(fh):
301 for chunk in cctx.read_from(fh):
297 # Do something with emitted data.
302 # Do something with emitted data.
298
303
299 ``read_from()`` accepts an object that has a ``read(size)`` method or conforms
304 ``read_from()`` accepts an object that has a ``read(size)`` method or conforms
300 to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that
305 to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that
301 provide the buffer protocol.)
306 provide the buffer protocol.)
302
307
303 Uncompressed data is fetched from the source either by calling ``read(size)``
308 Uncompressed data is fetched from the source either by calling ``read(size)``
304 or by fetching a slice of data from the object directly (in the case where
309 or by fetching a slice of data from the object directly (in the case where
305 the buffer protocol is being used). The returned iterator consists of chunks
310 the buffer protocol is being used). The returned iterator consists of chunks
306 of compressed data.
311 of compressed data.
307
312
308 If reading from the source via ``read()``, ``read()`` will be called until
313 If reading from the source via ``read()``, ``read()`` will be called until
309 it raises or returns an empty bytes (``b''``). It is perfectly valid for
314 it raises or returns an empty bytes (``b''``). It is perfectly valid for
310 the source to deliver fewer bytes than were what requested by ``read(size)``.
315 the source to deliver fewer bytes than were what requested by ``read(size)``.
311
316
312 Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument
317 Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument
313 declaring the size of the input stream::
318 declaring the size of the input stream::
314
319
315 cctx = zstd.ZstdCompressor()
320 cctx = zstd.ZstdCompressor()
316 for chunk in cctx.read_from(fh, size=some_int):
321 for chunk in cctx.read_from(fh, size=some_int):
317 pass
322 pass
318
323
319 You can also control the size that data is ``read()`` from the source and
324 You can also control the size that data is ``read()`` from the source and
320 the ideal size of output chunks::
325 the ideal size of output chunks::
321
326
322 cctx = zstd.ZstdCompressor()
327 cctx = zstd.ZstdCompressor()
323 for chunk in cctx.read_from(fh, read_size=16384, write_size=8192):
328 for chunk in cctx.read_from(fh, read_size=16384, write_size=8192):
324 pass
329 pass
325
330
326 Unlike ``write_to()``, ``read_from()`` does not give direct control over the
331 Unlike ``write_to()``, ``read_from()`` does not give direct control over the
327 sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever
332 sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever
328 the object being read from delivers. These will often be of a uniform size.
333 the object being read from delivers. These will often be of a uniform size.
329
334
330 Stream Copying API
335 Stream Copying API
331 ^^^^^^^^^^^^^^^^^^
336 ^^^^^^^^^^^^^^^^^^
332
337
333 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
338 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
334 compressing it.::
339 compressing it.::
335
340
336 cctx = zstd.ZstdCompressor()
341 cctx = zstd.ZstdCompressor()
337 cctx.copy_stream(ifh, ofh)
342 cctx.copy_stream(ifh, ofh)
338
343
339 For example, say you wish to compress a file::
344 For example, say you wish to compress a file::
340
345
341 cctx = zstd.ZstdCompressor()
346 cctx = zstd.ZstdCompressor()
342 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
347 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
343 cctx.copy_stream(ifh, ofh)
348 cctx.copy_stream(ifh, ofh)
344
349
345 It is also possible to declare the size of the source stream::
350 It is also possible to declare the size of the source stream::
346
351
347 cctx = zstd.ZstdCompressor()
352 cctx = zstd.ZstdCompressor()
348 cctx.copy_stream(ifh, ofh, size=len_of_input)
353 cctx.copy_stream(ifh, ofh, size=len_of_input)
349
354
350 You can also specify how large the chunks that are ``read()`` and ``write()``
355 You can also specify how large the chunks that are ``read()`` and ``write()``
351 from and to the streams::
356 from and to the streams::
352
357
353 cctx = zstd.ZstdCompressor()
358 cctx = zstd.ZstdCompressor()
354 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
359 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
355
360
356 The stream copier returns a 2-tuple of bytes read and written::
361 The stream copier returns a 2-tuple of bytes read and written::
357
362
358 cctx = zstd.ZstdCompressor()
363 cctx = zstd.ZstdCompressor()
359 read_count, write_count = cctx.copy_stream(ifh, ofh)
364 read_count, write_count = cctx.copy_stream(ifh, ofh)
360
365
361 Compressor API
366 Compressor API
362 ^^^^^^^^^^^^^^
367 ^^^^^^^^^^^^^^
363
368
364 ``compressobj()`` returns an object that exposes ``compress(data)`` and
369 ``compressobj()`` returns an object that exposes ``compress(data)`` and
365 ``flush()`` methods. Each returns compressed data or an empty bytes.
370 ``flush()`` methods. Each returns compressed data or an empty bytes.
366
371
367 The purpose of ``compressobj()`` is to provide an API-compatible interface
372 The purpose of ``compressobj()`` is to provide an API-compatible interface
368 with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to
373 with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to
369 swap in different compressor objects while using the same API.
374 swap in different compressor objects while using the same API.
370
375
371 ``flush()`` accepts an optional argument indicating how to end the stream.
376 ``flush()`` accepts an optional argument indicating how to end the stream.
372 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
377 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
373 Once this type of flush is performed, ``compress()`` and ``flush()`` can
378 Once this type of flush is performed, ``compress()`` and ``flush()`` can
374 no longer be called. This type of flush **must** be called to end the
379 no longer be called. This type of flush **must** be called to end the
375 compression context. If not called, returned data may be incomplete.
380 compression context. If not called, returned data may be incomplete.
376
381
377 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
382 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
378 zstd block. Flushes of this type can be performed multiple times. The next
383 zstd block. Flushes of this type can be performed multiple times. The next
379 call to ``compress()`` will begin a new zstd block.
384 call to ``compress()`` will begin a new zstd block.
380
385
381 Here is how this API should be used::
386 Here is how this API should be used::
382
387
383 cctx = zstd.ZstdCompressor()
388 cctx = zstd.ZstdCompressor()
384 cobj = cctx.compressobj()
389 cobj = cctx.compressobj()
385 data = cobj.compress(b'raw input 0')
390 data = cobj.compress(b'raw input 0')
386 data = cobj.compress(b'raw input 1')
391 data = cobj.compress(b'raw input 1')
387 data = cobj.flush()
392 data = cobj.flush()
388
393
389 Or to flush blocks::
394 Or to flush blocks::
390
395
391 cctx.zstd.ZstdCompressor()
396 cctx.zstd.ZstdCompressor()
392 cobj = cctx.compressobj()
397 cobj = cctx.compressobj()
393 data = cobj.compress(b'chunk in first block')
398 data = cobj.compress(b'chunk in first block')
394 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
399 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
395 data = cobj.compress(b'chunk in second block')
400 data = cobj.compress(b'chunk in second block')
396 data = cobj.flush()
401 data = cobj.flush()
397
402
398 For best performance results, keep input chunks under 256KB. This avoids
403 For best performance results, keep input chunks under 256KB. This avoids
399 extra allocations for a large output object.
404 extra allocations for a large output object.
400
405
401 It is possible to declare the input size of the data that will be fed into
406 It is possible to declare the input size of the data that will be fed into
402 the compressor::
407 the compressor::
403
408
404 cctx = zstd.ZstdCompressor()
409 cctx = zstd.ZstdCompressor()
405 cobj = cctx.compressobj(size=6)
410 cobj = cctx.compressobj(size=6)
406 data = cobj.compress(b'foobar')
411 data = cobj.compress(b'foobar')
407 data = cobj.flush()
412 data = cobj.flush()
408
413
409 ZstdDecompressor
414 ZstdDecompressor
410 ----------------
415 ----------------
411
416
412 The ``ZstdDecompressor`` class provides an interface for performing
417 The ``ZstdDecompressor`` class provides an interface for performing
413 decompression.
418 decompression.
414
419
415 Each instance is associated with parameters that control decompression. These
420 Each instance is associated with parameters that control decompression. These
416 come from the following named arguments (all optional):
421 come from the following named arguments (all optional):
417
422
418 dict_data
423 dict_data
419 Compression dictionary to use.
424 Compression dictionary to use.
420
425
421 The interface of this class is very similar to ``ZstdCompressor`` (by design).
426 The interface of this class is very similar to ``ZstdCompressor`` (by design).
422
427
423 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
428 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
424 instances can be called from multiple Python threads simultaneously. In other
429 instances can be called from multiple Python threads simultaneously. In other
425 words, assume instances are not thread safe unless stated otherwise.
430 words, assume instances are not thread safe unless stated otherwise.
426
431
427 Simple API
432 Simple API
428 ^^^^^^^^^^
433 ^^^^^^^^^^
429
434
430 ``decompress(data)`` can be used to decompress an entire compressed zstd
435 ``decompress(data)`` can be used to decompress an entire compressed zstd
431 frame in a single operation.::
436 frame in a single operation.::
432
437
433 dctx = zstd.ZstdDecompressor()
438 dctx = zstd.ZstdDecompressor()
434 decompressed = dctx.decompress(data)
439 decompressed = dctx.decompress(data)
435
440
436 By default, ``decompress(data)`` will only work on data written with the content
441 By default, ``decompress(data)`` will only work on data written with the content
437 size encoded in its header. This can be achieved by creating a
442 size encoded in its header. This can be achieved by creating a
438 ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without
443 ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without
439 an embedded content size is seen, ``zstd.ZstdError`` will be raised.
444 an embedded content size is seen, ``zstd.ZstdError`` will be raised.
440
445
441 If the compressed data doesn't have its content size embedded within it,
446 If the compressed data doesn't have its content size embedded within it,
442 decompression can be attempted by specifying the ``max_output_size``
447 decompression can be attempted by specifying the ``max_output_size``
443 argument.::
448 argument.::
444
449
445 dctx = zstd.ZstdDecompressor()
450 dctx = zstd.ZstdDecompressor()
446 uncompressed = dctx.decompress(data, max_output_size=1048576)
451 uncompressed = dctx.decompress(data, max_output_size=1048576)
447
452
448 Ideally, ``max_output_size`` will be identical to the decompressed output
453 Ideally, ``max_output_size`` will be identical to the decompressed output
449 size.
454 size.
450
455
451 If ``max_output_size`` is too small to hold the decompressed data,
456 If ``max_output_size`` is too small to hold the decompressed data,
452 ``zstd.ZstdError`` will be raised.
457 ``zstd.ZstdError`` will be raised.
453
458
454 If ``max_output_size`` is larger than the decompressed data, the allocated
459 If ``max_output_size`` is larger than the decompressed data, the allocated
455 output buffer will be resized to only use the space required.
460 output buffer will be resized to only use the space required.
456
461
457 Please note that an allocation of the requested ``max_output_size`` will be
462 Please note that an allocation of the requested ``max_output_size`` will be
458 performed every time the method is called. Setting to a very large value could
463 performed every time the method is called. Setting to a very large value could
459 result in a lot of work for the memory allocator and may result in
464 result in a lot of work for the memory allocator and may result in
460 ``MemoryError`` being raised if the allocation fails.
465 ``MemoryError`` being raised if the allocation fails.
461
466
462 If the exact size of decompressed data is unknown, it is **strongly**
467 If the exact size of decompressed data is unknown, it is **strongly**
463 recommended to use a streaming API.
468 recommended to use a streaming API.
464
469
465 Streaming Input API
470 Streaming Input API
466 ^^^^^^^^^^^^^^^^^^^
471 ^^^^^^^^^^^^^^^^^^^
467
472
468 ``write_to(fh)`` can be used to incrementally send compressed data to a
473 ``write_to(fh)`` can be used to incrementally send compressed data to a
469 decompressor.::
474 decompressor.::
470
475
471 dctx = zstd.ZstdDecompressor()
476 dctx = zstd.ZstdDecompressor()
472 with dctx.write_to(fh) as decompressor:
477 with dctx.write_to(fh) as decompressor:
473 decompressor.write(compressed_data)
478 decompressor.write(compressed_data)
474
479
475 This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
480 This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
476 the decompressor by calling ``write(data)`` and decompressed output is written
481 the decompressor by calling ``write(data)`` and decompressed output is written
477 to the output object by calling its ``write(data)`` method.
482 to the output object by calling its ``write(data)`` method.
478
483
484 Calls to ``write()`` will return the number of bytes written to the output
485 object. Not all inputs will result in bytes being written, so return values
486 of ``0`` are possible.
487
479 The size of chunks being ``write()`` to the destination can be specified::
488 The size of chunks being ``write()`` to the destination can be specified::
480
489
481 dctx = zstd.ZstdDecompressor()
490 dctx = zstd.ZstdDecompressor()
482 with dctx.write_to(fh, write_size=16384) as decompressor:
491 with dctx.write_to(fh, write_size=16384) as decompressor:
483 pass
492 pass
484
493
485 You can see how much memory is being used by the decompressor::
494 You can see how much memory is being used by the decompressor::
486
495
487 dctx = zstd.ZstdDecompressor()
496 dctx = zstd.ZstdDecompressor()
488 with dctx.write_to(fh) as decompressor:
497 with dctx.write_to(fh) as decompressor:
489 byte_size = decompressor.memory_size()
498 byte_size = decompressor.memory_size()
490
499
491 Streaming Output API
500 Streaming Output API
492 ^^^^^^^^^^^^^^^^^^^^
501 ^^^^^^^^^^^^^^^^^^^^
493
502
494 ``read_from(fh)`` provides a mechanism to stream decompressed data out of a
503 ``read_from(fh)`` provides a mechanism to stream decompressed data out of a
495 compressed source as an iterator of data chunks.::
504 compressed source as an iterator of data chunks.::
496
505
497 dctx = zstd.ZstdDecompressor()
506 dctx = zstd.ZstdDecompressor()
498 for chunk in dctx.read_from(fh):
507 for chunk in dctx.read_from(fh):
499 # Do something with original data.
508 # Do something with original data.
500
509
501 ``read_from()`` accepts a) an object with a ``read(size)`` method that will
510 ``read_from()`` accepts a) an object with a ``read(size)`` method that will
502 return compressed bytes b) an object conforming to the buffer protocol that
511 return compressed bytes b) an object conforming to the buffer protocol that
503 can expose its data as a contiguous range of bytes. The ``bytes`` and
512 can expose its data as a contiguous range of bytes. The ``bytes`` and
504 ``memoryview`` types expose this buffer protocol.
513 ``memoryview`` types expose this buffer protocol.
505
514
506 ``read_from()`` returns an iterator whose elements are chunks of the
515 ``read_from()`` returns an iterator whose elements are chunks of the
507 decompressed data.
516 decompressed data.
508
517
509 The size of requested ``read()`` from the source can be specified::
518 The size of requested ``read()`` from the source can be specified::
510
519
511 dctx = zstd.ZstdDecompressor()
520 dctx = zstd.ZstdDecompressor()
512 for chunk in dctx.read_from(fh, read_size=16384):
521 for chunk in dctx.read_from(fh, read_size=16384):
513 pass
522 pass
514
523
515 It is also possible to skip leading bytes in the input data::
524 It is also possible to skip leading bytes in the input data::
516
525
517 dctx = zstd.ZstdDecompressor()
526 dctx = zstd.ZstdDecompressor()
518 for chunk in dctx.read_from(fh, skip_bytes=1):
527 for chunk in dctx.read_from(fh, skip_bytes=1):
519 pass
528 pass
520
529
521 Skipping leading bytes is useful if the source data contains extra
530 Skipping leading bytes is useful if the source data contains extra
522 *header* data but you want to avoid the overhead of making a buffer copy
531 *header* data but you want to avoid the overhead of making a buffer copy
523 or allocating a new ``memoryview`` object in order to decompress the data.
532 or allocating a new ``memoryview`` object in order to decompress the data.
524
533
525 Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator
534 Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator
526 controls when data is decompressed. If the iterator isn't consumed,
535 controls when data is decompressed. If the iterator isn't consumed,
527 decompression is put on hold.
536 decompression is put on hold.
528
537
529 When ``read_from()`` is passed an object conforming to the buffer protocol,
538 When ``read_from()`` is passed an object conforming to the buffer protocol,
530 the behavior may seem similar to what occurs when the simple decompression
539 the behavior may seem similar to what occurs when the simple decompression
531 API is used. However, this API works when the decompressed size is unknown.
540 API is used. However, this API works when the decompressed size is unknown.
532 Furthermore, if feeding large inputs, the decompressor will work in chunks
541 Furthermore, if feeding large inputs, the decompressor will work in chunks
533 instead of performing a single operation.
542 instead of performing a single operation.
534
543
535 Stream Copying API
544 Stream Copying API
536 ^^^^^^^^^^^^^^^^^^
545 ^^^^^^^^^^^^^^^^^^
537
546
538 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
547 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
539 performing decompression.::
548 performing decompression.::
540
549
541 dctx = zstd.ZstdDecompressor()
550 dctx = zstd.ZstdDecompressor()
542 dctx.copy_stream(ifh, ofh)
551 dctx.copy_stream(ifh, ofh)
543
552
544 e.g. to decompress a file to another file::
553 e.g. to decompress a file to another file::
545
554
546 dctx = zstd.ZstdDecompressor()
555 dctx = zstd.ZstdDecompressor()
547 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
556 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
548 dctx.copy_stream(ifh, ofh)
557 dctx.copy_stream(ifh, ofh)
549
558
550 The size of chunks being ``read()`` and ``write()`` from and to the streams
559 The size of chunks being ``read()`` and ``write()`` from and to the streams
551 can be specified::
560 can be specified::
552
561
553 dctx = zstd.ZstdDecompressor()
562 dctx = zstd.ZstdDecompressor()
554 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
563 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
555
564
556 Decompressor API
565 Decompressor API
557 ^^^^^^^^^^^^^^^^
566 ^^^^^^^^^^^^^^^^
558
567
559 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
568 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
560 methods. Compressed data chunks are fed into ``decompress(data)`` and
569 methods. Compressed data chunks are fed into ``decompress(data)`` and
561 uncompressed output (or an empty bytes) is returned. Output from subsequent
570 uncompressed output (or an empty bytes) is returned. Output from subsequent
562 calls needs to be concatenated to reassemble the full decompressed byte
571 calls needs to be concatenated to reassemble the full decompressed byte
563 sequence.
572 sequence.
564
573
565 The purpose of ``decompressobj()`` is to provide an API-compatible interface
574 The purpose of ``decompressobj()`` is to provide an API-compatible interface
566 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
575 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
567 to swap in different decompressor objects while using the same API.
576 to swap in different decompressor objects while using the same API.
568
577
569 Each object is single use: once an input frame is decoded, ``decompress()``
578 Each object is single use: once an input frame is decoded, ``decompress()``
570 can no longer be called.
579 can no longer be called.
571
580
572 Here is how this API should be used::
581 Here is how this API should be used::
573
582
574 dctx = zstd.ZstdDeompressor()
583 dctx = zstd.ZstdDeompressor()
575 dobj = cctx.decompressobj()
584 dobj = cctx.decompressobj()
576 data = dobj.decompress(compressed_chunk_0)
585 data = dobj.decompress(compressed_chunk_0)
577 data = dobj.decompress(compressed_chunk_1)
586 data = dobj.decompress(compressed_chunk_1)
578
587
588 Content-Only Dictionary Chain Decompression
589 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
590
591 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
592 zstd frames produced using chained *content-only* dictionary compression. Such
593 a list of frames is produced by compressing discrete inputs where each
594 non-initial input is compressed with a *content-only* dictionary consisting
595 of the content of the previous input.
596
597 For example, say you have the following inputs::
598
599 inputs = [b'input 1', b'input 2', b'input 3']
600
601 The zstd frame chain consists of:
602
603 1. ``b'input 1'`` compressed in standalone/discrete mode
604 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
605 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
606
607 Each zstd frame **must** have the content size written.
608
609 The following Python code can be used to produce a *content-only dictionary
610 chain*::
611
612 def make_chain(inputs):
613 frames = []
614
615 # First frame is compressed in standalone/discrete mode.
616 zctx = zstd.ZstdCompressor(write_content_size=True)
617 frames.append(zctx.compress(inputs[0]))
618
619 # Subsequent frames use the previous fulltext as a content-only dictionary
620 for i, raw in enumerate(inputs[1:]):
621 dict_data = zstd.ZstdCompressionDict(inputs[i])
622 zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
623 frames.append(zctx.compress(raw))
624
625 return frames
626
627 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
628 element in the input chain.
629
630 It is possible to implement *content-only dictionary chain* decompression
631 on top of other Python APIs. However, this function will likely be significantly
632 faster, especially for long input chains, as it avoids the overhead of
633 instantiating and passing around intermediate objects between C and Python.
634
579 Choosing an API
635 Choosing an API
580 ---------------
636 ---------------
581
637
582 Various forms of compression and decompression APIs are provided because each
638 Various forms of compression and decompression APIs are provided because each
583 are suitable for different use cases.
639 are suitable for different use cases.
584
640
585 The simple/one-shot APIs are useful for small data, when the decompressed
641 The simple/one-shot APIs are useful for small data, when the decompressed
586 data size is known (either recorded in the zstd frame header via
642 data size is known (either recorded in the zstd frame header via
587 ``write_content_size`` or known via an out-of-band mechanism, such as a file
643 ``write_content_size`` or known via an out-of-band mechanism, such as a file
588 size).
644 size).
589
645
590 A limitation of the simple APIs is that input or output data must fit in memory.
646 A limitation of the simple APIs is that input or output data must fit in memory.
591 And unless using advanced tricks with Python *buffer objects*, both input and
647 And unless using advanced tricks with Python *buffer objects*, both input and
592 output must fit in memory simultaneously.
648 output must fit in memory simultaneously.
593
649
594 Another limitation is that compression or decompression is performed as a single
650 Another limitation is that compression or decompression is performed as a single
595 operation. So if you feed large input, it could take a long time for the
651 operation. So if you feed large input, it could take a long time for the
596 function to return.
652 function to return.
597
653
598 The streaming APIs do not have the limitations of the simple API. The cost to
654 The streaming APIs do not have the limitations of the simple API. The cost to
599 this is they are more complex to use than a single function call.
655 this is they are more complex to use than a single function call.
600
656
601 The streaming APIs put the caller in control of compression and decompression
657 The streaming APIs put the caller in control of compression and decompression
602 behavior by allowing them to directly control either the input or output side
658 behavior by allowing them to directly control either the input or output side
603 of the operation.
659 of the operation.
604
660
605 With the streaming input APIs, the caller feeds data into the compressor or
661 With the streaming input APIs, the caller feeds data into the compressor or
606 decompressor as they see fit. Output data will only be written after the caller
662 decompressor as they see fit. Output data will only be written after the caller
607 has explicitly written data.
663 has explicitly written data.
608
664
609 With the streaming output APIs, the caller consumes output from the compressor
665 With the streaming output APIs, the caller consumes output from the compressor
610 or decompressor as they see fit. The compressor or decompressor will only
666 or decompressor as they see fit. The compressor or decompressor will only
611 consume data from the source when the caller is ready to receive it.
667 consume data from the source when the caller is ready to receive it.
612
668
613 One end of the streaming APIs involves a file-like object that must
669 One end of the streaming APIs involves a file-like object that must
614 ``write()`` output data or ``read()`` input data. Depending on what the
670 ``write()`` output data or ``read()`` input data. Depending on what the
615 backing storage for these objects is, those operations may not complete quickly.
671 backing storage for these objects is, those operations may not complete quickly.
616 For example, when streaming compressed data to a file, the ``write()`` into
672 For example, when streaming compressed data to a file, the ``write()`` into
617 a streaming compressor could result in a ``write()`` to the filesystem, which
673 a streaming compressor could result in a ``write()`` to the filesystem, which
618 may take a long time to finish due to slow I/O on the filesystem. So, there
674 may take a long time to finish due to slow I/O on the filesystem. So, there
619 may be overhead in streaming APIs beyond the compression and decompression
675 may be overhead in streaming APIs beyond the compression and decompression
620 operations.
676 operations.
621
677
622 Dictionary Creation and Management
678 Dictionary Creation and Management
623 ----------------------------------
679 ----------------------------------
624
680
625 Zstandard allows *dictionaries* to be used when compressing and
681 Zstandard allows *dictionaries* to be used when compressing and
626 decompressing data. The idea is that if you are compressing a lot of similar
682 decompressing data. The idea is that if you are compressing a lot of similar
627 data, you can precompute common properties of that data (such as recurring
683 data, you can precompute common properties of that data (such as recurring
628 byte sequences) to achieve better compression ratios.
684 byte sequences) to achieve better compression ratios.
629
685
630 In Python, compression dictionaries are represented as the
686 In Python, compression dictionaries are represented as the
631 ``ZstdCompressionDict`` type.
687 ``ZstdCompressionDict`` type.
632
688
633 Instances can be constructed from bytes::
689 Instances can be constructed from bytes::
634
690
635 dict_data = zstd.ZstdCompressionDict(data)
691 dict_data = zstd.ZstdCompressionDict(data)
636
692
693 It is possible to construct a dictionary from *any* data. Unless the
694 data begins with a magic header, the dictionary will be treated as
695 *content-only*. *Content-only* dictionaries allow compression operations
696 that follow to reference raw data within the content. For one use of
697 *content-only* dictionaries, see
698 ``ZstdDecompressor.decompress_content_dict_chain()``.
699
637 More interestingly, instances can be created by *training* on sample data::
700 More interestingly, instances can be created by *training* on sample data::
638
701
639 dict_data = zstd.train_dictionary(size, samples)
702 dict_data = zstd.train_dictionary(size, samples)
640
703
641 This takes a list of bytes instances and creates and returns a
704 This takes a list of bytes instances and creates and returns a
642 ``ZstdCompressionDict``.
705 ``ZstdCompressionDict``.
643
706
644 You can see how many bytes are in the dictionary by calling ``len()``::
707 You can see how many bytes are in the dictionary by calling ``len()``::
645
708
646 dict_data = zstd.train_dictionary(size, samples)
709 dict_data = zstd.train_dictionary(size, samples)
647 dict_size = len(dict_data) # will not be larger than ``size``
710 dict_size = len(dict_data) # will not be larger than ``size``
648
711
649 Once you have a dictionary, you can pass it to the objects performing
712 Once you have a dictionary, you can pass it to the objects performing
650 compression and decompression::
713 compression and decompression::
651
714
652 dict_data = zstd.train_dictionary(16384, samples)
715 dict_data = zstd.train_dictionary(16384, samples)
653
716
654 cctx = zstd.ZstdCompressor(dict_data=dict_data)
717 cctx = zstd.ZstdCompressor(dict_data=dict_data)
655 for source_data in input_data:
718 for source_data in input_data:
656 compressed = cctx.compress(source_data)
719 compressed = cctx.compress(source_data)
657 # Do something with compressed data.
720 # Do something with compressed data.
658
721
659 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
722 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
660 for compressed_data in input_data:
723 for compressed_data in input_data:
661 buffer = io.BytesIO()
724 buffer = io.BytesIO()
662 with dctx.write_to(buffer) as decompressor:
725 with dctx.write_to(buffer) as decompressor:
663 decompressor.write(compressed_data)
726 decompressor.write(compressed_data)
664 # Do something with raw data in ``buffer``.
727 # Do something with raw data in ``buffer``.
665
728
666 Dictionaries have unique integer IDs. You can retrieve this ID via::
729 Dictionaries have unique integer IDs. You can retrieve this ID via::
667
730
668 dict_id = zstd.dictionary_id(dict_data)
731 dict_id = zstd.dictionary_id(dict_data)
669
732
670 You can obtain the raw data in the dict (useful for persisting and constructing
733 You can obtain the raw data in the dict (useful for persisting and constructing
671 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
734 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
672
735
673 dict_data = zstd.train_dictionary(size, samples)
736 dict_data = zstd.train_dictionary(size, samples)
674 raw_data = dict_data.as_bytes()
737 raw_data = dict_data.as_bytes()
675
738
676 Explicit Compression Parameters
739 Explicit Compression Parameters
677 -------------------------------
740 -------------------------------
678
741
679 Zstandard's integer compression levels along with the input size and dictionary
742 Zstandard's integer compression levels along with the input size and dictionary
680 size are converted into a data structure defining multiple parameters to tune
743 size are converted into a data structure defining multiple parameters to tune
681 behavior of the compression algorithm. It is possible to use define this
744 behavior of the compression algorithm. It is possible to use define this
682 data structure explicitly to have lower-level control over compression behavior.
745 data structure explicitly to have lower-level control over compression behavior.
683
746
684 The ``zstd.CompressionParameters`` type represents this data structure.
747 The ``zstd.CompressionParameters`` type represents this data structure.
685 You can see how Zstandard converts compression levels to this data structure
748 You can see how Zstandard converts compression levels to this data structure
686 by calling ``zstd.get_compression_parameters()``. e.g.::
749 by calling ``zstd.get_compression_parameters()``. e.g.::
687
750
688 params = zstd.get_compression_parameters(5)
751 params = zstd.get_compression_parameters(5)
689
752
690 This function also accepts the uncompressed data size and dictionary size
753 This function also accepts the uncompressed data size and dictionary size
691 to adjust parameters::
754 to adjust parameters::
692
755
693 params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data))
756 params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data))
694
757
695 You can also construct compression parameters from their low-level components::
758 You can also construct compression parameters from their low-level components::
696
759
697 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
760 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
698
761
699 You can then configure a compressor to use the custom parameters::
762 You can then configure a compressor to use the custom parameters::
700
763
701 cctx = zstd.ZstdCompressor(compression_params=params)
764 cctx = zstd.ZstdCompressor(compression_params=params)
702
765
703 The members of the ``CompressionParameters`` tuple are as follows::
766 The members/attributes of ``CompressionParameters`` instances are as follows::
704
767
705 * 0 - Window log
768 * window_log
706 * 1 - Chain log
769 * chain_log
707 * 2 - Hash log
770 * hash_log
708 * 3 - Search log
771 * search_log
709 * 4 - Search length
772 * search_length
710 * 5 - Target length
773 * target_length
711 * 6 - Strategy (one of the ``zstd.STRATEGY_`` constants)
774 * strategy
775
776 This is the order the arguments are passed to the constructor if not using
777 named arguments.
712
778
713 You'll need to read the Zstandard documentation for what these parameters
779 You'll need to read the Zstandard documentation for what these parameters
714 do.
780 do.
715
781
782 Frame Inspection
783 ----------------
784
785 Data emitted from zstd compression is encapsulated in a *frame*. This frame
786 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
787 the frame in more detail. For more info, see
788 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
789
790 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
791 instance and return a ``FrameParameters`` object describing the frame.
792
793 Depending on which fields are present in the frame and their values, the
794 length of the frame parameters varies. If insufficient bytes are passed
795 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
796 frame parameters can be parsed, pass in at least 18 bytes.
797
798 ``FrameParameters`` instances have the following attributes:
799
800 content_size
801 Integer size of original, uncompressed content. This will be ``0`` if the
802 original content size isn't written to the frame (controlled with the
803 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
804 content size was ``0``.
805
806 window_size
807 Integer size of maximum back-reference distance in compressed data.
808
809 dict_id
810 Integer of dictionary ID used for compression. ``0`` if no dictionary
811 ID was used or if the dictionary ID was ``0``.
812
813 has_checksum
814 Bool indicating whether a 4 byte content checksum is stored at the end
815 of the frame.
816
716 Misc Functionality
817 Misc Functionality
717 ------------------
818 ------------------
718
819
719 estimate_compression_context_size(CompressionParameters)
820 estimate_compression_context_size(CompressionParameters)
720 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
821 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
721
822
722 Given a ``CompressionParameters`` struct, estimate the memory size required
823 Given a ``CompressionParameters`` struct, estimate the memory size required
723 to perform compression.
824 to perform compression.
724
825
725 estimate_decompression_context_size()
826 estimate_decompression_context_size()
726 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
827 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
727
828
728 Estimate the memory size requirements for a decompressor instance.
829 Estimate the memory size requirements for a decompressor instance.
729
830
730 Constants
831 Constants
731 ---------
832 ---------
732
833
733 The following module constants/attributes are exposed:
834 The following module constants/attributes are exposed:
734
835
735 ZSTD_VERSION
836 ZSTD_VERSION
736 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
837 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
737 ``(1, 0, 0)``
838 ``(1, 0, 0)``
738 MAX_COMPRESSION_LEVEL
839 MAX_COMPRESSION_LEVEL
739 Integer max compression level accepted by compression functions
840 Integer max compression level accepted by compression functions
740 COMPRESSION_RECOMMENDED_INPUT_SIZE
841 COMPRESSION_RECOMMENDED_INPUT_SIZE
741 Recommended chunk size to feed to compressor functions
842 Recommended chunk size to feed to compressor functions
742 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
843 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
743 Recommended chunk size for compression output
844 Recommended chunk size for compression output
744 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
845 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
745 Recommended chunk size to feed into decompresor functions
846 Recommended chunk size to feed into decompresor functions
746 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
847 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
747 Recommended chunk size for decompression output
848 Recommended chunk size for decompression output
748
849
749 FRAME_HEADER
850 FRAME_HEADER
750 bytes containing header of the Zstandard frame
851 bytes containing header of the Zstandard frame
751 MAGIC_NUMBER
852 MAGIC_NUMBER
752 Frame header as an integer
853 Frame header as an integer
753
854
754 WINDOWLOG_MIN
855 WINDOWLOG_MIN
755 Minimum value for compression parameter
856 Minimum value for compression parameter
756 WINDOWLOG_MAX
857 WINDOWLOG_MAX
757 Maximum value for compression parameter
858 Maximum value for compression parameter
758 CHAINLOG_MIN
859 CHAINLOG_MIN
759 Minimum value for compression parameter
860 Minimum value for compression parameter
760 CHAINLOG_MAX
861 CHAINLOG_MAX
761 Maximum value for compression parameter
862 Maximum value for compression parameter
762 HASHLOG_MIN
863 HASHLOG_MIN
763 Minimum value for compression parameter
864 Minimum value for compression parameter
764 HASHLOG_MAX
865 HASHLOG_MAX
765 Maximum value for compression parameter
866 Maximum value for compression parameter
766 SEARCHLOG_MIN
867 SEARCHLOG_MIN
767 Minimum value for compression parameter
868 Minimum value for compression parameter
768 SEARCHLOG_MAX
869 SEARCHLOG_MAX
769 Maximum value for compression parameter
870 Maximum value for compression parameter
770 SEARCHLENGTH_MIN
871 SEARCHLENGTH_MIN
771 Minimum value for compression parameter
872 Minimum value for compression parameter
772 SEARCHLENGTH_MAX
873 SEARCHLENGTH_MAX
773 Maximum value for compression parameter
874 Maximum value for compression parameter
774 TARGETLENGTH_MIN
875 TARGETLENGTH_MIN
775 Minimum value for compression parameter
876 Minimum value for compression parameter
776 TARGETLENGTH_MAX
877 TARGETLENGTH_MAX
777 Maximum value for compression parameter
878 Maximum value for compression parameter
778 STRATEGY_FAST
879 STRATEGY_FAST
779 Compression strategory
880 Compression strategy
780 STRATEGY_DFAST
881 STRATEGY_DFAST
781 Compression strategory
882 Compression strategy
782 STRATEGY_GREEDY
883 STRATEGY_GREEDY
783 Compression strategory
884 Compression strategy
784 STRATEGY_LAZY
885 STRATEGY_LAZY
785 Compression strategory
886 Compression strategy
786 STRATEGY_LAZY2
887 STRATEGY_LAZY2
787 Compression strategory
888 Compression strategy
788 STRATEGY_BTLAZY2
889 STRATEGY_BTLAZY2
789 Compression strategory
890 Compression strategy
790 STRATEGY_BTOPT
891 STRATEGY_BTOPT
791 Compression strategory
892 Compression strategy
893
894 Performance Considerations
895 --------------------------
896
897 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
898 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
899 or ``ZstdDecompressor`` instance for multiple operations is faster than
900 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
901 operation. The differences are magnified as the size of data decreases. For
902 example, the difference between *context* reuse and non-reuse for 100,000
903 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
904 whereas 10 1,000,000 byte inputs will be more similar in speed (because the
905 time spent doing compression dwarfs time spent creating new *contexts*).
792
906
793 Note on Zstandard's *Experimental* API
907 Note on Zstandard's *Experimental* API
794 ======================================
908 ======================================
795
909
796 Many of the Zstandard APIs used by this module are marked as *experimental*
910 Many of the Zstandard APIs used by this module are marked as *experimental*
797 within the Zstandard project. This includes a large number of useful
911 within the Zstandard project. This includes a large number of useful
798 features, such as compression and frame parameters and parts of dictionary
912 features, such as compression and frame parameters and parts of dictionary
799 compression.
913 compression.
800
914
801 It is unclear how Zstandard's C API will evolve over time, especially with
915 It is unclear how Zstandard's C API will evolve over time, especially with
802 regards to this *experimental* functionality. We will try to maintain
916 regards to this *experimental* functionality. We will try to maintain
803 backwards compatibility at the Python API level. However, we cannot
917 backwards compatibility at the Python API level. However, we cannot
804 guarantee this for things not under our control.
918 guarantee this for things not under our control.
805
919
806 Since a copy of the Zstandard source code is distributed with this
920 Since a copy of the Zstandard source code is distributed with this
807 module and since we compile against it, the behavior of a specific
921 module and since we compile against it, the behavior of a specific
808 version of this module should be constant for all of time. So if you
922 version of this module should be constant for all of time. So if you
809 pin the version of this module used in your projects (which is a Python
923 pin the version of this module used in your projects (which is a Python
810 best practice), you should be buffered from unwanted future changes.
924 best practice), you should be buffered from unwanted future changes.
811
925
812 Donate
926 Donate
813 ======
927 ======
814
928
815 A lot of time has been invested into this project by the author.
929 A lot of time has been invested into this project by the author.
816
930
817 If you find this project useful and would like to thank the author for
931 If you find this project useful and would like to thank the author for
818 their work, consider donating some money. Any amount is appreciated.
932 their work, consider donating some money. Any amount is appreciated.
819
933
820 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
934 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
821 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
935 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
822 :alt: Donate via PayPal
936 :alt: Donate via PayPal
823
937
824 .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master
938 .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master
825 :target: https://travis-ci.org/indygreg/python-zstandard
939 :target: https://travis-ci.org/indygreg/python-zstandard
826
940
827 .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true
941 .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true
828 :target: https://ci.appveyor.com/project/indygreg/python-zstandard
942 :target: https://ci.appveyor.com/project/indygreg/python-zstandard
829 :alt: Windows build status
943 :alt: Windows build status
@@ -1,247 +1,248 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
14 static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
14 static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
15 size_t capacity;
15 size_t capacity;
16 PyObject* samples;
16 PyObject* samples;
17 Py_ssize_t samplesLen;
17 Py_ssize_t samplesLen;
18 PyObject* parameters = NULL;
18 PyObject* parameters = NULL;
19 ZDICT_params_t zparams;
19 ZDICT_params_t zparams;
20 Py_ssize_t sampleIndex;
20 Py_ssize_t sampleIndex;
21 Py_ssize_t sampleSize;
21 Py_ssize_t sampleSize;
22 PyObject* sampleItem;
22 PyObject* sampleItem;
23 size_t zresult;
23 size_t zresult;
24 void* sampleBuffer;
24 void* sampleBuffer;
25 void* sampleOffset;
25 void* sampleOffset;
26 size_t samplesSize = 0;
26 size_t samplesSize = 0;
27 size_t* sampleSizes;
27 size_t* sampleSizes;
28 void* dict;
28 void* dict;
29 ZstdCompressionDict* result;
29 ZstdCompressionDict* result;
30
30
31 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist,
31 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!:train_dictionary",
32 kwlist,
32 &capacity,
33 &capacity,
33 &PyList_Type, &samples,
34 &PyList_Type, &samples,
34 (PyObject*)&DictParametersType, &parameters)) {
35 (PyObject*)&DictParametersType, &parameters)) {
35 return NULL;
36 return NULL;
36 }
37 }
37
38
38 /* Validate parameters first since it is easiest. */
39 /* Validate parameters first since it is easiest. */
39 zparams.selectivityLevel = 0;
40 zparams.selectivityLevel = 0;
40 zparams.compressionLevel = 0;
41 zparams.compressionLevel = 0;
41 zparams.notificationLevel = 0;
42 zparams.notificationLevel = 0;
42 zparams.dictID = 0;
43 zparams.dictID = 0;
43 zparams.reserved[0] = 0;
44 zparams.reserved[0] = 0;
44 zparams.reserved[1] = 0;
45 zparams.reserved[1] = 0;
45
46
46 if (parameters) {
47 if (parameters) {
47 /* TODO validate data ranges */
48 /* TODO validate data ranges */
48 zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
49 zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
49 zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
50 zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
50 zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
51 zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
51 zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
52 zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
52 }
53 }
53
54
54 /* Figure out the size of the raw samples */
55 /* Figure out the size of the raw samples */
55 samplesLen = PyList_Size(samples);
56 samplesLen = PyList_Size(samples);
56 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
57 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
57 sampleItem = PyList_GetItem(samples, sampleIndex);
58 sampleItem = PyList_GetItem(samples, sampleIndex);
58 if (!PyBytes_Check(sampleItem)) {
59 if (!PyBytes_Check(sampleItem)) {
59 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
60 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
60 /* TODO probably need to perform DECREF here */
61 return NULL;
61 return NULL;
62 }
62 }
63 samplesSize += PyBytes_GET_SIZE(sampleItem);
63 samplesSize += PyBytes_GET_SIZE(sampleItem);
64 }
64 }
65
65
66 /* Now that we know the total size of the raw simples, we can allocate
66 /* Now that we know the total size of the raw simples, we can allocate
67 a buffer for the raw data */
67 a buffer for the raw data */
68 sampleBuffer = PyMem_Malloc(samplesSize);
68 sampleBuffer = PyMem_Malloc(samplesSize);
69 if (!sampleBuffer) {
69 if (!sampleBuffer) {
70 PyErr_NoMemory();
70 PyErr_NoMemory();
71 return NULL;
71 return NULL;
72 }
72 }
73 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
73 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
74 if (!sampleSizes) {
74 if (!sampleSizes) {
75 PyMem_Free(sampleBuffer);
75 PyMem_Free(sampleBuffer);
76 PyErr_NoMemory();
76 PyErr_NoMemory();
77 return NULL;
77 return NULL;
78 }
78 }
79
79
80 sampleOffset = sampleBuffer;
80 sampleOffset = sampleBuffer;
81 /* Now iterate again and assemble the samples in the buffer */
81 /* Now iterate again and assemble the samples in the buffer */
82 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
82 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
83 sampleItem = PyList_GetItem(samples, sampleIndex);
83 sampleItem = PyList_GetItem(samples, sampleIndex);
84 sampleSize = PyBytes_GET_SIZE(sampleItem);
84 sampleSize = PyBytes_GET_SIZE(sampleItem);
85 sampleSizes[sampleIndex] = sampleSize;
85 sampleSizes[sampleIndex] = sampleSize;
86 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
86 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
87 sampleOffset = (char*)sampleOffset + sampleSize;
87 sampleOffset = (char*)sampleOffset + sampleSize;
88 }
88 }
89
89
90 dict = PyMem_Malloc(capacity);
90 dict = PyMem_Malloc(capacity);
91 if (!dict) {
91 if (!dict) {
92 PyMem_Free(sampleSizes);
92 PyMem_Free(sampleSizes);
93 PyMem_Free(sampleBuffer);
93 PyMem_Free(sampleBuffer);
94 PyErr_NoMemory();
94 PyErr_NoMemory();
95 return NULL;
95 return NULL;
96 }
96 }
97
97
98 zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
98 zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
99 sampleBuffer, sampleSizes, (unsigned int)samplesLen,
99 sampleBuffer, sampleSizes, (unsigned int)samplesLen,
100 zparams);
100 zparams);
101 if (ZDICT_isError(zresult)) {
101 if (ZDICT_isError(zresult)) {
102 PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
102 PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
103 PyMem_Free(dict);
103 PyMem_Free(dict);
104 PyMem_Free(sampleSizes);
104 PyMem_Free(sampleSizes);
105 PyMem_Free(sampleBuffer);
105 PyMem_Free(sampleBuffer);
106 return NULL;
106 return NULL;
107 }
107 }
108
108
109 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
109 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
110 if (!result) {
110 if (!result) {
111 return NULL;
111 return NULL;
112 }
112 }
113
113
114 result->dictData = dict;
114 result->dictData = dict;
115 result->dictSize = zresult;
115 result->dictSize = zresult;
116 return result;
116 return result;
117 }
117 }
118
118
119
119
120 PyDoc_STRVAR(ZstdCompressionDict__doc__,
120 PyDoc_STRVAR(ZstdCompressionDict__doc__,
121 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
121 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
122 "\n"
122 "\n"
123 "This type holds the results of a computed Zstandard compression dictionary.\n"
123 "This type holds the results of a computed Zstandard compression dictionary.\n"
124 "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
124 "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
125 "obtained from another source into the constructor.\n"
125 "obtained from another source into the constructor.\n"
126 );
126 );
127
127
128 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
128 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
129 const char* source;
129 const char* source;
130 Py_ssize_t sourceSize;
130 Py_ssize_t sourceSize;
131
131
132 self->dictData = NULL;
132 self->dictData = NULL;
133 self->dictSize = 0;
133 self->dictSize = 0;
134
134
135 #if PY_MAJOR_VERSION >= 3
135 #if PY_MAJOR_VERSION >= 3
136 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
136 if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
137 #else
137 #else
138 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
138 if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
139 #endif
139 #endif
140 &source, &sourceSize)) {
140 return -1;
141 return -1;
141 }
142 }
142
143
143 self->dictData = PyMem_Malloc(sourceSize);
144 self->dictData = PyMem_Malloc(sourceSize);
144 if (!self->dictData) {
145 if (!self->dictData) {
145 PyErr_NoMemory();
146 PyErr_NoMemory();
146 return -1;
147 return -1;
147 }
148 }
148
149
149 memcpy(self->dictData, source, sourceSize);
150 memcpy(self->dictData, source, sourceSize);
150 self->dictSize = sourceSize;
151 self->dictSize = sourceSize;
151
152
152 return 0;
153 return 0;
153 }
154 }
154
155
155 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
156 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
156 if (self->dictData) {
157 if (self->dictData) {
157 PyMem_Free(self->dictData);
158 PyMem_Free(self->dictData);
158 self->dictData = NULL;
159 self->dictData = NULL;
159 }
160 }
160
161
161 PyObject_Del(self);
162 PyObject_Del(self);
162 }
163 }
163
164
164 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
165 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
165 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
166 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
166
167
167 return PyLong_FromLong(dictID);
168 return PyLong_FromLong(dictID);
168 }
169 }
169
170
170 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
171 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
171 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
172 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
172 }
173 }
173
174
174 static PyMethodDef ZstdCompressionDict_methods[] = {
175 static PyMethodDef ZstdCompressionDict_methods[] = {
175 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
176 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
176 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
177 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
177 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
178 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
178 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
179 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
179 { NULL, NULL }
180 { NULL, NULL }
180 };
181 };
181
182
182 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
183 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
183 return self->dictSize;
184 return self->dictSize;
184 }
185 }
185
186
186 static PySequenceMethods ZstdCompressionDict_sq = {
187 static PySequenceMethods ZstdCompressionDict_sq = {
187 (lenfunc)ZstdCompressionDict_length, /* sq_length */
188 (lenfunc)ZstdCompressionDict_length, /* sq_length */
188 0, /* sq_concat */
189 0, /* sq_concat */
189 0, /* sq_repeat */
190 0, /* sq_repeat */
190 0, /* sq_item */
191 0, /* sq_item */
191 0, /* sq_ass_item */
192 0, /* sq_ass_item */
192 0, /* sq_contains */
193 0, /* sq_contains */
193 0, /* sq_inplace_concat */
194 0, /* sq_inplace_concat */
194 0 /* sq_inplace_repeat */
195 0 /* sq_inplace_repeat */
195 };
196 };
196
197
197 PyTypeObject ZstdCompressionDictType = {
198 PyTypeObject ZstdCompressionDictType = {
198 PyVarObject_HEAD_INIT(NULL, 0)
199 PyVarObject_HEAD_INIT(NULL, 0)
199 "zstd.ZstdCompressionDict", /* tp_name */
200 "zstd.ZstdCompressionDict", /* tp_name */
200 sizeof(ZstdCompressionDict), /* tp_basicsize */
201 sizeof(ZstdCompressionDict), /* tp_basicsize */
201 0, /* tp_itemsize */
202 0, /* tp_itemsize */
202 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
203 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
203 0, /* tp_print */
204 0, /* tp_print */
204 0, /* tp_getattr */
205 0, /* tp_getattr */
205 0, /* tp_setattr */
206 0, /* tp_setattr */
206 0, /* tp_compare */
207 0, /* tp_compare */
207 0, /* tp_repr */
208 0, /* tp_repr */
208 0, /* tp_as_number */
209 0, /* tp_as_number */
209 &ZstdCompressionDict_sq, /* tp_as_sequence */
210 &ZstdCompressionDict_sq, /* tp_as_sequence */
210 0, /* tp_as_mapping */
211 0, /* tp_as_mapping */
211 0, /* tp_hash */
212 0, /* tp_hash */
212 0, /* tp_call */
213 0, /* tp_call */
213 0, /* tp_str */
214 0, /* tp_str */
214 0, /* tp_getattro */
215 0, /* tp_getattro */
215 0, /* tp_setattro */
216 0, /* tp_setattro */
216 0, /* tp_as_buffer */
217 0, /* tp_as_buffer */
217 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
218 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
218 ZstdCompressionDict__doc__, /* tp_doc */
219 ZstdCompressionDict__doc__, /* tp_doc */
219 0, /* tp_traverse */
220 0, /* tp_traverse */
220 0, /* tp_clear */
221 0, /* tp_clear */
221 0, /* tp_richcompare */
222 0, /* tp_richcompare */
222 0, /* tp_weaklistoffset */
223 0, /* tp_weaklistoffset */
223 0, /* tp_iter */
224 0, /* tp_iter */
224 0, /* tp_iternext */
225 0, /* tp_iternext */
225 ZstdCompressionDict_methods, /* tp_methods */
226 ZstdCompressionDict_methods, /* tp_methods */
226 0, /* tp_members */
227 0, /* tp_members */
227 0, /* tp_getset */
228 0, /* tp_getset */
228 0, /* tp_base */
229 0, /* tp_base */
229 0, /* tp_dict */
230 0, /* tp_dict */
230 0, /* tp_descr_get */
231 0, /* tp_descr_get */
231 0, /* tp_descr_set */
232 0, /* tp_descr_set */
232 0, /* tp_dictoffset */
233 0, /* tp_dictoffset */
233 (initproc)ZstdCompressionDict_init, /* tp_init */
234 (initproc)ZstdCompressionDict_init, /* tp_init */
234 0, /* tp_alloc */
235 0, /* tp_alloc */
235 PyType_GenericNew, /* tp_new */
236 PyType_GenericNew, /* tp_new */
236 };
237 };
237
238
238 void compressiondict_module_init(PyObject* mod) {
239 void compressiondict_module_init(PyObject* mod) {
239 Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
240 Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
240 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
241 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
241 return;
242 return;
242 }
243 }
243
244
244 Py_INCREF((PyObject*)&ZstdCompressionDictType);
245 Py_INCREF((PyObject*)&ZstdCompressionDictType);
245 PyModule_AddObject(mod, "ZstdCompressionDict",
246 PyModule_AddObject(mod, "ZstdCompressionDict",
246 (PyObject*)&ZstdCompressionDictType);
247 (PyObject*)&ZstdCompressionDictType);
247 }
248 }
@@ -1,226 +1,220 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) {
11 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) {
12 zparams->windowLog = params->windowLog;
12 zparams->windowLog = params->windowLog;
13 zparams->chainLog = params->chainLog;
13 zparams->chainLog = params->chainLog;
14 zparams->hashLog = params->hashLog;
14 zparams->hashLog = params->hashLog;
15 zparams->searchLog = params->searchLog;
15 zparams->searchLog = params->searchLog;
16 zparams->searchLength = params->searchLength;
16 zparams->searchLength = params->searchLength;
17 zparams->targetLength = params->targetLength;
17 zparams->targetLength = params->targetLength;
18 zparams->strategy = params->strategy;
18 zparams->strategy = params->strategy;
19 }
19 }
20
20
21 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) {
21 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) {
22 int compressionLevel;
22 int compressionLevel;
23 unsigned PY_LONG_LONG sourceSize = 0;
23 unsigned PY_LONG_LONG sourceSize = 0;
24 Py_ssize_t dictSize = 0;
24 Py_ssize_t dictSize = 0;
25 ZSTD_compressionParameters params;
25 ZSTD_compressionParameters params;
26 CompressionParametersObject* result;
26 CompressionParametersObject* result;
27
27
28 if (!PyArg_ParseTuple(args, "i|Kn", &compressionLevel, &sourceSize, &dictSize)) {
28 if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
29 &compressionLevel, &sourceSize, &dictSize)) {
29 return NULL;
30 return NULL;
30 }
31 }
31
32
32 params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize);
33 params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize);
33
34
34 result = PyObject_New(CompressionParametersObject, &CompressionParametersType);
35 result = PyObject_New(CompressionParametersObject, &CompressionParametersType);
35 if (!result) {
36 if (!result) {
36 return NULL;
37 return NULL;
37 }
38 }
38
39
39 result->windowLog = params.windowLog;
40 result->windowLog = params.windowLog;
40 result->chainLog = params.chainLog;
41 result->chainLog = params.chainLog;
41 result->hashLog = params.hashLog;
42 result->hashLog = params.hashLog;
42 result->searchLog = params.searchLog;
43 result->searchLog = params.searchLog;
43 result->searchLength = params.searchLength;
44 result->searchLength = params.searchLength;
44 result->targetLength = params.targetLength;
45 result->targetLength = params.targetLength;
45 result->strategy = params.strategy;
46 result->strategy = params.strategy;
46
47
47 return result;
48 return result;
48 }
49 }
49
50
51 static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
52 static char* kwlist[] = {
53 "window_log",
54 "chain_log",
55 "hash_log",
56 "search_log",
57 "search_length",
58 "target_length",
59 "strategy",
60 NULL
61 };
62
63 unsigned windowLog;
64 unsigned chainLog;
65 unsigned hashLog;
66 unsigned searchLog;
67 unsigned searchLength;
68 unsigned targetLength;
69 unsigned strategy;
70
71 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
72 kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
73 &targetLength, &strategy)) {
74 return -1;
75 }
76
77 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
78 PyErr_SetString(PyExc_ValueError, "invalid window log value");
79 return -1;
80 }
81
82 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
83 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
84 return -1;
85 }
86
87 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
88 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
89 return -1;
90 }
91
92 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
93 PyErr_SetString(PyExc_ValueError, "invalid search log value");
94 return -1;
95 }
96
97 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
98 PyErr_SetString(PyExc_ValueError, "invalid search length value");
99 return -1;
100 }
101
102 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
103 PyErr_SetString(PyExc_ValueError, "invalid target length value");
104 return -1;
105 }
106
107 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
108 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
109 return -1;
110 }
111
112 self->windowLog = windowLog;
113 self->chainLog = chainLog;
114 self->hashLog = hashLog;
115 self->searchLog = searchLog;
116 self->searchLength = searchLength;
117 self->targetLength = targetLength;
118 self->strategy = strategy;
119
120 return 0;
121 }
122
50 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
123 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
51 CompressionParametersObject* params;
124 CompressionParametersObject* params;
52 ZSTD_compressionParameters zparams;
125 ZSTD_compressionParameters zparams;
53 PyObject* result;
126 PyObject* result;
54
127
55 if (!PyArg_ParseTuple(args, "O!", &CompressionParametersType, &params)) {
128 if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
129 &CompressionParametersType, &params)) {
56 return NULL;
130 return NULL;
57 }
131 }
58
132
59 ztopy_compression_parameters(params, &zparams);
133 ztopy_compression_parameters(params, &zparams);
60 result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams));
134 result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams));
61 return result;
135 return result;
62 }
136 }
63
137
64 PyDoc_STRVAR(CompressionParameters__doc__,
138 PyDoc_STRVAR(CompressionParameters__doc__,
65 "CompressionParameters: low-level control over zstd compression");
139 "CompressionParameters: low-level control over zstd compression");
66
140
67 static PyObject* CompressionParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) {
68 CompressionParametersObject* self;
69 unsigned windowLog;
70 unsigned chainLog;
71 unsigned hashLog;
72 unsigned searchLog;
73 unsigned searchLength;
74 unsigned targetLength;
75 unsigned strategy;
76
77 if (!PyArg_ParseTuple(args, "IIIIIII", &windowLog, &chainLog, &hashLog, &searchLog,
78 &searchLength, &targetLength, &strategy)) {
79 return NULL;
80 }
81
82 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
83 PyErr_SetString(PyExc_ValueError, "invalid window log value");
84 return NULL;
85 }
86
87 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
88 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
89 return NULL;
90 }
91
92 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
93 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
94 return NULL;
95 }
96
97 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
98 PyErr_SetString(PyExc_ValueError, "invalid search log value");
99 return NULL;
100 }
101
102 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
103 PyErr_SetString(PyExc_ValueError, "invalid search length value");
104 return NULL;
105 }
106
107 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
108 PyErr_SetString(PyExc_ValueError, "invalid target length value");
109 return NULL;
110 }
111
112 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
113 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
114 return NULL;
115 }
116
117 self = (CompressionParametersObject*)subtype->tp_alloc(subtype, 1);
118 if (!self) {
119 return NULL;
120 }
121
122 self->windowLog = windowLog;
123 self->chainLog = chainLog;
124 self->hashLog = hashLog;
125 self->searchLog = searchLog;
126 self->searchLength = searchLength;
127 self->targetLength = targetLength;
128 self->strategy = strategy;
129
130 return (PyObject*)self;
131 }
132
133 static void CompressionParameters_dealloc(PyObject* self) {
141 static void CompressionParameters_dealloc(PyObject* self) {
134 PyObject_Del(self);
142 PyObject_Del(self);
135 }
143 }
136
144
137 static Py_ssize_t CompressionParameters_length(PyObject* self) {
145 static PyMemberDef CompressionParameters_members[] = {
138 return 7;
146 { "window_log", T_UINT,
139 }
147 offsetof(CompressionParametersObject, windowLog), READONLY,
140
148 "window log" },
141 static PyObject* CompressionParameters_item(PyObject* o, Py_ssize_t i) {
149 { "chain_log", T_UINT,
142 CompressionParametersObject* self = (CompressionParametersObject*)o;
150 offsetof(CompressionParametersObject, chainLog), READONLY,
143
151 "chain log" },
144 switch (i) {
152 { "hash_log", T_UINT,
145 case 0:
153 offsetof(CompressionParametersObject, hashLog), READONLY,
146 return PyLong_FromLong(self->windowLog);
154 "hash log" },
147 case 1:
155 { "search_log", T_UINT,
148 return PyLong_FromLong(self->chainLog);
156 offsetof(CompressionParametersObject, searchLog), READONLY,
149 case 2:
157 "search log" },
150 return PyLong_FromLong(self->hashLog);
158 { "search_length", T_UINT,
151 case 3:
159 offsetof(CompressionParametersObject, searchLength), READONLY,
152 return PyLong_FromLong(self->searchLog);
160 "search length" },
153 case 4:
161 { "target_length", T_UINT,
154 return PyLong_FromLong(self->searchLength);
162 offsetof(CompressionParametersObject, targetLength), READONLY,
155 case 5:
163 "target length" },
156 return PyLong_FromLong(self->targetLength);
164 { "strategy", T_INT,
157 case 6:
165 offsetof(CompressionParametersObject, strategy), READONLY,
158 return PyLong_FromLong(self->strategy);
166 "strategy" },
159 default:
167 { NULL }
160 PyErr_SetString(PyExc_IndexError, "index out of range");
161 return NULL;
162 }
163 }
164
165 static PySequenceMethods CompressionParameters_sq = {
166 CompressionParameters_length, /* sq_length */
167 0, /* sq_concat */
168 0, /* sq_repeat */
169 CompressionParameters_item, /* sq_item */
170 0, /* sq_ass_item */
171 0, /* sq_contains */
172 0, /* sq_inplace_concat */
173 0 /* sq_inplace_repeat */
174 };
168 };
175
169
176 PyTypeObject CompressionParametersType = {
170 PyTypeObject CompressionParametersType = {
177 PyVarObject_HEAD_INIT(NULL, 0)
171 PyVarObject_HEAD_INIT(NULL, 0)
178 "CompressionParameters", /* tp_name */
172 "CompressionParameters", /* tp_name */
179 sizeof(CompressionParametersObject), /* tp_basicsize */
173 sizeof(CompressionParametersObject), /* tp_basicsize */
180 0, /* tp_itemsize */
174 0, /* tp_itemsize */
181 (destructor)CompressionParameters_dealloc, /* tp_dealloc */
175 (destructor)CompressionParameters_dealloc, /* tp_dealloc */
182 0, /* tp_print */
176 0, /* tp_print */
183 0, /* tp_getattr */
177 0, /* tp_getattr */
184 0, /* tp_setattr */
178 0, /* tp_setattr */
185 0, /* tp_compare */
179 0, /* tp_compare */
186 0, /* tp_repr */
180 0, /* tp_repr */
187 0, /* tp_as_number */
181 0, /* tp_as_number */
188 &CompressionParameters_sq, /* tp_as_sequence */
182 0, /* tp_as_sequence */
189 0, /* tp_as_mapping */
183 0, /* tp_as_mapping */
190 0, /* tp_hash */
184 0, /* tp_hash */
191 0, /* tp_call */
185 0, /* tp_call */
192 0, /* tp_str */
186 0, /* tp_str */
193 0, /* tp_getattro */
187 0, /* tp_getattro */
194 0, /* tp_setattro */
188 0, /* tp_setattro */
195 0, /* tp_as_buffer */
189 0, /* tp_as_buffer */
196 Py_TPFLAGS_DEFAULT, /* tp_flags */
190 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
197 CompressionParameters__doc__, /* tp_doc */
191 CompressionParameters__doc__, /* tp_doc */
198 0, /* tp_traverse */
192 0, /* tp_traverse */
199 0, /* tp_clear */
193 0, /* tp_clear */
200 0, /* tp_richcompare */
194 0, /* tp_richcompare */
201 0, /* tp_weaklistoffset */
195 0, /* tp_weaklistoffset */
202 0, /* tp_iter */
196 0, /* tp_iter */
203 0, /* tp_iternext */
197 0, /* tp_iternext */
204 0, /* tp_methods */
198 0, /* tp_methods */
205 0, /* tp_members */
199 CompressionParameters_members, /* tp_members */
206 0, /* tp_getset */
200 0, /* tp_getset */
207 0, /* tp_base */
201 0, /* tp_base */
208 0, /* tp_dict */
202 0, /* tp_dict */
209 0, /* tp_descr_get */
203 0, /* tp_descr_get */
210 0, /* tp_descr_set */
204 0, /* tp_descr_set */
211 0, /* tp_dictoffset */
205 0, /* tp_dictoffset */
212 0, /* tp_init */
206 (initproc)CompressionParameters_init, /* tp_init */
213 0, /* tp_alloc */
207 0, /* tp_alloc */
214 CompressionParameters_new, /* tp_new */
208 PyType_GenericNew, /* tp_new */
215 };
209 };
216
210
217 void compressionparams_module_init(PyObject* mod) {
211 void compressionparams_module_init(PyObject* mod) {
218 Py_TYPE(&CompressionParametersType) = &PyType_Type;
212 Py_TYPE(&CompressionParametersType) = &PyType_Type;
219 if (PyType_Ready(&CompressionParametersType) < 0) {
213 if (PyType_Ready(&CompressionParametersType) < 0) {
220 return;
214 return;
221 }
215 }
222
216
223 Py_IncRef((PyObject*)&CompressionParametersType);
217 Py_IncRef((PyObject*)&CompressionParametersType);
224 PyModule_AddObject(mod, "CompressionParameters",
218 PyModule_AddObject(mod, "CompressionParameters",
225 (PyObject*)&CompressionParametersType);
219 (PyObject*)&CompressionParametersType);
226 }
220 }
@@ -1,288 +1,290 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
14 """A context manager used for writing compressed output to a writer.\n"
14 """A context manager used for writing compressed output to a writer.\n"
15 );
15 );
16
16
17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
18 Py_XDECREF(self->compressor);
18 Py_XDECREF(self->compressor);
19 Py_XDECREF(self->writer);
19 Py_XDECREF(self->writer);
20
20
21 if (self->cstream) {
21 if (self->cstream) {
22 ZSTD_freeCStream(self->cstream);
22 ZSTD_freeCStream(self->cstream);
23 self->cstream = NULL;
23 self->cstream = NULL;
24 }
24 }
25
25
26 PyObject_Del(self);
26 PyObject_Del(self);
27 }
27 }
28
28
29 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
29 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
30 if (self->entered) {
30 if (self->entered) {
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
32 return NULL;
32 return NULL;
33 }
33 }
34
34
35 self->cstream = CStream_from_ZstdCompressor(self->compressor, self->sourceSize);
35 self->cstream = CStream_from_ZstdCompressor(self->compressor, self->sourceSize);
36 if (!self->cstream) {
36 if (!self->cstream) {
37 return NULL;
37 return NULL;
38 }
38 }
39
39
40 self->entered = 1;
40 self->entered = 1;
41
41
42 Py_INCREF(self);
42 Py_INCREF(self);
43 return (PyObject*)self;
43 return (PyObject*)self;
44 }
44 }
45
45
46 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
46 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
47 PyObject* exc_type;
47 PyObject* exc_type;
48 PyObject* exc_value;
48 PyObject* exc_value;
49 PyObject* exc_tb;
49 PyObject* exc_tb;
50 size_t zresult;
50 size_t zresult;
51
51
52 ZSTD_outBuffer output;
52 ZSTD_outBuffer output;
53 PyObject* res;
53 PyObject* res;
54
54
55 if (!PyArg_ParseTuple(args, "OOO", &exc_type, &exc_value, &exc_tb)) {
55 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
56 return NULL;
56 return NULL;
57 }
57 }
58
58
59 self->entered = 0;
59 self->entered = 0;
60
60
61 if (self->cstream && exc_type == Py_None && exc_value == Py_None &&
61 if (self->cstream && exc_type == Py_None && exc_value == Py_None &&
62 exc_tb == Py_None) {
62 exc_tb == Py_None) {
63
63
64 output.dst = PyMem_Malloc(self->outSize);
64 output.dst = PyMem_Malloc(self->outSize);
65 if (!output.dst) {
65 if (!output.dst) {
66 return PyErr_NoMemory();
66 return PyErr_NoMemory();
67 }
67 }
68 output.size = self->outSize;
68 output.size = self->outSize;
69 output.pos = 0;
69 output.pos = 0;
70
70
71 while (1) {
71 while (1) {
72 zresult = ZSTD_endStream(self->cstream, &output);
72 zresult = ZSTD_endStream(self->cstream, &output);
73 if (ZSTD_isError(zresult)) {
73 if (ZSTD_isError(zresult)) {
74 PyErr_Format(ZstdError, "error ending compression stream: %s",
74 PyErr_Format(ZstdError, "error ending compression stream: %s",
75 ZSTD_getErrorName(zresult));
75 ZSTD_getErrorName(zresult));
76 PyMem_Free(output.dst);
76 PyMem_Free(output.dst);
77 return NULL;
77 return NULL;
78 }
78 }
79
79
80 if (output.pos) {
80 if (output.pos) {
81 #if PY_MAJOR_VERSION >= 3
81 #if PY_MAJOR_VERSION >= 3
82 res = PyObject_CallMethod(self->writer, "write", "y#",
82 res = PyObject_CallMethod(self->writer, "write", "y#",
83 #else
83 #else
84 res = PyObject_CallMethod(self->writer, "write", "s#",
84 res = PyObject_CallMethod(self->writer, "write", "s#",
85 #endif
85 #endif
86 output.dst, output.pos);
86 output.dst, output.pos);
87 Py_XDECREF(res);
87 Py_XDECREF(res);
88 }
88 }
89
89
90 if (!zresult) {
90 if (!zresult) {
91 break;
91 break;
92 }
92 }
93
93
94 output.pos = 0;
94 output.pos = 0;
95 }
95 }
96
96
97 PyMem_Free(output.dst);
97 PyMem_Free(output.dst);
98 ZSTD_freeCStream(self->cstream);
98 ZSTD_freeCStream(self->cstream);
99 self->cstream = NULL;
99 self->cstream = NULL;
100 }
100 }
101
101
102 Py_RETURN_FALSE;
102 Py_RETURN_FALSE;
103 }
103 }
104
104
105 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
105 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
106 if (!self->cstream) {
106 if (!self->cstream) {
107 PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
107 PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
108 "call when a context manager is active");
108 "call when a context manager is active");
109 return NULL;
109 return NULL;
110 }
110 }
111
111
112 return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->cstream));
112 return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->cstream));
113 }
113 }
114
114
115 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
115 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
116 const char* source;
116 const char* source;
117 Py_ssize_t sourceSize;
117 Py_ssize_t sourceSize;
118 size_t zresult;
118 size_t zresult;
119 ZSTD_inBuffer input;
119 ZSTD_inBuffer input;
120 ZSTD_outBuffer output;
120 ZSTD_outBuffer output;
121 PyObject* res;
121 PyObject* res;
122 Py_ssize_t totalWrite = 0;
122
123
123 #if PY_MAJOR_VERSION >= 3
124 #if PY_MAJOR_VERSION >= 3
124 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
125 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
125 #else
126 #else
126 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
127 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
127 #endif
128 #endif
128 return NULL;
129 return NULL;
129 }
130 }
130
131
131 if (!self->entered) {
132 if (!self->entered) {
132 PyErr_SetString(ZstdError, "compress must be called from an active context manager");
133 PyErr_SetString(ZstdError, "compress must be called from an active context manager");
133 return NULL;
134 return NULL;
134 }
135 }
135
136
136 output.dst = PyMem_Malloc(self->outSize);
137 output.dst = PyMem_Malloc(self->outSize);
137 if (!output.dst) {
138 if (!output.dst) {
138 return PyErr_NoMemory();
139 return PyErr_NoMemory();
139 }
140 }
140 output.size = self->outSize;
141 output.size = self->outSize;
141 output.pos = 0;
142 output.pos = 0;
142
143
143 input.src = source;
144 input.src = source;
144 input.size = sourceSize;
145 input.size = sourceSize;
145 input.pos = 0;
146 input.pos = 0;
146
147
147 while ((ssize_t)input.pos < sourceSize) {
148 while ((ssize_t)input.pos < sourceSize) {
148 Py_BEGIN_ALLOW_THREADS
149 Py_BEGIN_ALLOW_THREADS
149 zresult = ZSTD_compressStream(self->cstream, &output, &input);
150 zresult = ZSTD_compressStream(self->cstream, &output, &input);
150 Py_END_ALLOW_THREADS
151 Py_END_ALLOW_THREADS
151
152
152 if (ZSTD_isError(zresult)) {
153 if (ZSTD_isError(zresult)) {
153 PyMem_Free(output.dst);
154 PyMem_Free(output.dst);
154 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
155 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
155 return NULL;
156 return NULL;
156 }
157 }
157
158
158 /* Copy data from output buffer to writer. */
159 /* Copy data from output buffer to writer. */
159 if (output.pos) {
160 if (output.pos) {
160 #if PY_MAJOR_VERSION >= 3
161 #if PY_MAJOR_VERSION >= 3
161 res = PyObject_CallMethod(self->writer, "write", "y#",
162 res = PyObject_CallMethod(self->writer, "write", "y#",
162 #else
163 #else
163 res = PyObject_CallMethod(self->writer, "write", "s#",
164 res = PyObject_CallMethod(self->writer, "write", "s#",
164 #endif
165 #endif
165 output.dst, output.pos);
166 output.dst, output.pos);
166 Py_XDECREF(res);
167 Py_XDECREF(res);
168 totalWrite += output.pos;
167 }
169 }
168 output.pos = 0;
170 output.pos = 0;
169 }
171 }
170
172
171 PyMem_Free(output.dst);
173 PyMem_Free(output.dst);
172
174
173 /* TODO return bytes written */
175 return PyLong_FromSsize_t(totalWrite);
174 Py_RETURN_NONE;
175 }
176 }
176
177
177 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
178 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
178 size_t zresult;
179 size_t zresult;
179 ZSTD_outBuffer output;
180 ZSTD_outBuffer output;
180 PyObject* res;
181 PyObject* res;
182 Py_ssize_t totalWrite = 0;
181
183
182 if (!self->entered) {
184 if (!self->entered) {
183 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
185 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
184 return NULL;
186 return NULL;
185 }
187 }
186
188
187 output.dst = PyMem_Malloc(self->outSize);
189 output.dst = PyMem_Malloc(self->outSize);
188 if (!output.dst) {
190 if (!output.dst) {
189 return PyErr_NoMemory();
191 return PyErr_NoMemory();
190 }
192 }
191 output.size = self->outSize;
193 output.size = self->outSize;
192 output.pos = 0;
194 output.pos = 0;
193
195
194 while (1) {
196 while (1) {
195 Py_BEGIN_ALLOW_THREADS
197 Py_BEGIN_ALLOW_THREADS
196 zresult = ZSTD_flushStream(self->cstream, &output);
198 zresult = ZSTD_flushStream(self->cstream, &output);
197 Py_END_ALLOW_THREADS
199 Py_END_ALLOW_THREADS
198
200
199 if (ZSTD_isError(zresult)) {
201 if (ZSTD_isError(zresult)) {
200 PyMem_Free(output.dst);
202 PyMem_Free(output.dst);
201 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
203 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
202 return NULL;
204 return NULL;
203 }
205 }
204
206
205 if (!output.pos) {
207 if (!output.pos) {
206 break;
208 break;
207 }
209 }
208
210
209 /* Copy data from output buffer to writer. */
211 /* Copy data from output buffer to writer. */
210 if (output.pos) {
212 if (output.pos) {
211 #if PY_MAJOR_VERSION >= 3
213 #if PY_MAJOR_VERSION >= 3
212 res = PyObject_CallMethod(self->writer, "write", "y#",
214 res = PyObject_CallMethod(self->writer, "write", "y#",
213 #else
215 #else
214 res = PyObject_CallMethod(self->writer, "write", "s#",
216 res = PyObject_CallMethod(self->writer, "write", "s#",
215 #endif
217 #endif
216 output.dst, output.pos);
218 output.dst, output.pos);
217 Py_XDECREF(res);
219 Py_XDECREF(res);
220 totalWrite += output.pos;
218 }
221 }
219 output.pos = 0;
222 output.pos = 0;
220 }
223 }
221
224
222 PyMem_Free(output.dst);
225 PyMem_Free(output.dst);
223
226
224 /* TODO return bytes written */
227 return PyLong_FromSsize_t(totalWrite);
225 Py_RETURN_NONE;
226 }
228 }
227
229
228 static PyMethodDef ZstdCompressionWriter_methods[] = {
230 static PyMethodDef ZstdCompressionWriter_methods[] = {
229 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
231 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
230 PyDoc_STR("Enter a compression context.") },
232 PyDoc_STR("Enter a compression context.") },
231 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
233 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
232 PyDoc_STR("Exit a compression context.") },
234 PyDoc_STR("Exit a compression context.") },
233 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
235 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
234 PyDoc_STR("Obtain the memory size of the underlying compressor") },
236 PyDoc_STR("Obtain the memory size of the underlying compressor") },
235 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS,
237 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS,
236 PyDoc_STR("Compress data") },
238 PyDoc_STR("Compress data") },
237 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
239 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
238 PyDoc_STR("Flush data and finish a zstd frame") },
240 PyDoc_STR("Flush data and finish a zstd frame") },
239 { NULL, NULL }
241 { NULL, NULL }
240 };
242 };
241
243
242 PyTypeObject ZstdCompressionWriterType = {
244 PyTypeObject ZstdCompressionWriterType = {
243 PyVarObject_HEAD_INIT(NULL, 0)
245 PyVarObject_HEAD_INIT(NULL, 0)
244 "zstd.ZstdCompressionWriter", /* tp_name */
246 "zstd.ZstdCompressionWriter", /* tp_name */
245 sizeof(ZstdCompressionWriter), /* tp_basicsize */
247 sizeof(ZstdCompressionWriter), /* tp_basicsize */
246 0, /* tp_itemsize */
248 0, /* tp_itemsize */
247 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
249 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
248 0, /* tp_print */
250 0, /* tp_print */
249 0, /* tp_getattr */
251 0, /* tp_getattr */
250 0, /* tp_setattr */
252 0, /* tp_setattr */
251 0, /* tp_compare */
253 0, /* tp_compare */
252 0, /* tp_repr */
254 0, /* tp_repr */
253 0, /* tp_as_number */
255 0, /* tp_as_number */
254 0, /* tp_as_sequence */
256 0, /* tp_as_sequence */
255 0, /* tp_as_mapping */
257 0, /* tp_as_mapping */
256 0, /* tp_hash */
258 0, /* tp_hash */
257 0, /* tp_call */
259 0, /* tp_call */
258 0, /* tp_str */
260 0, /* tp_str */
259 0, /* tp_getattro */
261 0, /* tp_getattro */
260 0, /* tp_setattro */
262 0, /* tp_setattro */
261 0, /* tp_as_buffer */
263 0, /* tp_as_buffer */
262 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
264 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
263 ZstdCompresssionWriter__doc__, /* tp_doc */
265 ZstdCompresssionWriter__doc__, /* tp_doc */
264 0, /* tp_traverse */
266 0, /* tp_traverse */
265 0, /* tp_clear */
267 0, /* tp_clear */
266 0, /* tp_richcompare */
268 0, /* tp_richcompare */
267 0, /* tp_weaklistoffset */
269 0, /* tp_weaklistoffset */
268 0, /* tp_iter */
270 0, /* tp_iter */
269 0, /* tp_iternext */
271 0, /* tp_iternext */
270 ZstdCompressionWriter_methods, /* tp_methods */
272 ZstdCompressionWriter_methods, /* tp_methods */
271 0, /* tp_members */
273 0, /* tp_members */
272 0, /* tp_getset */
274 0, /* tp_getset */
273 0, /* tp_base */
275 0, /* tp_base */
274 0, /* tp_dict */
276 0, /* tp_dict */
275 0, /* tp_descr_get */
277 0, /* tp_descr_get */
276 0, /* tp_descr_set */
278 0, /* tp_descr_set */
277 0, /* tp_dictoffset */
279 0, /* tp_dictoffset */
278 0, /* tp_init */
280 0, /* tp_init */
279 0, /* tp_alloc */
281 0, /* tp_alloc */
280 PyType_GenericNew, /* tp_new */
282 PyType_GenericNew, /* tp_new */
281 };
283 };
282
284
283 void compressionwriter_module_init(PyObject* mod) {
285 void compressionwriter_module_init(PyObject* mod) {
284 Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
286 Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
285 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
287 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
286 return;
288 return;
287 }
289 }
288 }
290 }
@@ -1,250 +1,250 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
14 "Perform compression using a standard library compatible API.\n"
14 "Perform compression using a standard library compatible API.\n"
15 );
15 );
16
16
17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
18 PyMem_Free(self->output.dst);
18 PyMem_Free(self->output.dst);
19 self->output.dst = NULL;
19 self->output.dst = NULL;
20
20
21 if (self->cstream) {
21 if (self->cstream) {
22 ZSTD_freeCStream(self->cstream);
22 ZSTD_freeCStream(self->cstream);
23 self->cstream = NULL;
23 self->cstream = NULL;
24 }
24 }
25
25
26 Py_XDECREF(self->compressor);
26 Py_XDECREF(self->compressor);
27
27
28 PyObject_Del(self);
28 PyObject_Del(self);
29 }
29 }
30
30
31 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) {
31 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) {
32 const char* source;
32 const char* source;
33 Py_ssize_t sourceSize;
33 Py_ssize_t sourceSize;
34 ZSTD_inBuffer input;
34 ZSTD_inBuffer input;
35 size_t zresult;
35 size_t zresult;
36 PyObject* result = NULL;
36 PyObject* result = NULL;
37 Py_ssize_t resultSize = 0;
37 Py_ssize_t resultSize = 0;
38
38
39 if (self->finished) {
39 if (self->finished) {
40 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
40 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
41 return NULL;
41 return NULL;
42 }
42 }
43
43
44 #if PY_MAJOR_VERSION >= 3
44 #if PY_MAJOR_VERSION >= 3
45 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
45 if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
46 #else
46 #else
47 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
47 if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
48 #endif
48 #endif
49 return NULL;
49 return NULL;
50 }
50 }
51
51
52 input.src = source;
52 input.src = source;
53 input.size = sourceSize;
53 input.size = sourceSize;
54 input.pos = 0;
54 input.pos = 0;
55
55
56 while ((ssize_t)input.pos < sourceSize) {
56 while ((ssize_t)input.pos < sourceSize) {
57 Py_BEGIN_ALLOW_THREADS
57 Py_BEGIN_ALLOW_THREADS
58 zresult = ZSTD_compressStream(self->cstream, &self->output, &input);
58 zresult = ZSTD_compressStream(self->cstream, &self->output, &input);
59 Py_END_ALLOW_THREADS
59 Py_END_ALLOW_THREADS
60
60
61 if (ZSTD_isError(zresult)) {
61 if (ZSTD_isError(zresult)) {
62 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
62 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
63 return NULL;
63 return NULL;
64 }
64 }
65
65
66 if (self->output.pos) {
66 if (self->output.pos) {
67 if (result) {
67 if (result) {
68 resultSize = PyBytes_GET_SIZE(result);
68 resultSize = PyBytes_GET_SIZE(result);
69 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
69 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
70 return NULL;
70 return NULL;
71 }
71 }
72
72
73 memcpy(PyBytes_AS_STRING(result) + resultSize,
73 memcpy(PyBytes_AS_STRING(result) + resultSize,
74 self->output.dst, self->output.pos);
74 self->output.dst, self->output.pos);
75 }
75 }
76 else {
76 else {
77 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
77 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
78 if (!result) {
78 if (!result) {
79 return NULL;
79 return NULL;
80 }
80 }
81 }
81 }
82
82
83 self->output.pos = 0;
83 self->output.pos = 0;
84 }
84 }
85 }
85 }
86
86
87 if (result) {
87 if (result) {
88 return result;
88 return result;
89 }
89 }
90 else {
90 else {
91 return PyBytes_FromString("");
91 return PyBytes_FromString("");
92 }
92 }
93 }
93 }
94
94
95 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) {
95 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) {
96 int flushMode = compressorobj_flush_finish;
96 int flushMode = compressorobj_flush_finish;
97 size_t zresult;
97 size_t zresult;
98 PyObject* result = NULL;
98 PyObject* result = NULL;
99 Py_ssize_t resultSize = 0;
99 Py_ssize_t resultSize = 0;
100
100
101 if (!PyArg_ParseTuple(args, "|i", &flushMode)) {
101 if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
102 return NULL;
102 return NULL;
103 }
103 }
104
104
105 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
105 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
106 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
106 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
107 return NULL;
107 return NULL;
108 }
108 }
109
109
110 if (self->finished) {
110 if (self->finished) {
111 PyErr_SetString(ZstdError, "compressor object already finished");
111 PyErr_SetString(ZstdError, "compressor object already finished");
112 return NULL;
112 return NULL;
113 }
113 }
114
114
115 assert(self->output.pos == 0);
115 assert(self->output.pos == 0);
116
116
117 if (flushMode == compressorobj_flush_block) {
117 if (flushMode == compressorobj_flush_block) {
118 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
118 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
119 guaranteed to hold a full block. */
119 guaranteed to hold a full block. */
120 Py_BEGIN_ALLOW_THREADS
120 Py_BEGIN_ALLOW_THREADS
121 zresult = ZSTD_flushStream(self->cstream, &self->output);
121 zresult = ZSTD_flushStream(self->cstream, &self->output);
122 Py_END_ALLOW_THREADS
122 Py_END_ALLOW_THREADS
123
123
124 if (ZSTD_isError(zresult)) {
124 if (ZSTD_isError(zresult)) {
125 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
125 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
126 return NULL;
126 return NULL;
127 }
127 }
128
128
129 /* Output buffer is guaranteed to hold full block. */
129 /* Output buffer is guaranteed to hold full block. */
130 assert(zresult == 0);
130 assert(zresult == 0);
131
131
132 if (self->output.pos) {
132 if (self->output.pos) {
133 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
133 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
134 if (!result) {
134 if (!result) {
135 return NULL;
135 return NULL;
136 }
136 }
137 }
137 }
138
138
139 self->output.pos = 0;
139 self->output.pos = 0;
140
140
141 if (result) {
141 if (result) {
142 return result;
142 return result;
143 }
143 }
144 else {
144 else {
145 return PyBytes_FromString("");
145 return PyBytes_FromString("");
146 }
146 }
147 }
147 }
148
148
149 assert(flushMode == compressorobj_flush_finish);
149 assert(flushMode == compressorobj_flush_finish);
150 self->finished = 1;
150 self->finished = 1;
151
151
152 while (1) {
152 while (1) {
153 zresult = ZSTD_endStream(self->cstream, &self->output);
153 zresult = ZSTD_endStream(self->cstream, &self->output);
154 if (ZSTD_isError(zresult)) {
154 if (ZSTD_isError(zresult)) {
155 PyErr_Format(ZstdError, "error ending compression stream: %s",
155 PyErr_Format(ZstdError, "error ending compression stream: %s",
156 ZSTD_getErrorName(zresult));
156 ZSTD_getErrorName(zresult));
157 return NULL;
157 return NULL;
158 }
158 }
159
159
160 if (self->output.pos) {
160 if (self->output.pos) {
161 if (result) {
161 if (result) {
162 resultSize = PyBytes_GET_SIZE(result);
162 resultSize = PyBytes_GET_SIZE(result);
163 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
163 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
164 return NULL;
164 return NULL;
165 }
165 }
166
166
167 memcpy(PyBytes_AS_STRING(result) + resultSize,
167 memcpy(PyBytes_AS_STRING(result) + resultSize,
168 self->output.dst, self->output.pos);
168 self->output.dst, self->output.pos);
169 }
169 }
170 else {
170 else {
171 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
171 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
172 if (!result) {
172 if (!result) {
173 return NULL;
173 return NULL;
174 }
174 }
175 }
175 }
176
176
177 self->output.pos = 0;
177 self->output.pos = 0;
178 }
178 }
179
179
180 if (!zresult) {
180 if (!zresult) {
181 break;
181 break;
182 }
182 }
183 }
183 }
184
184
185 ZSTD_freeCStream(self->cstream);
185 ZSTD_freeCStream(self->cstream);
186 self->cstream = NULL;
186 self->cstream = NULL;
187
187
188 if (result) {
188 if (result) {
189 return result;
189 return result;
190 }
190 }
191 else {
191 else {
192 return PyBytes_FromString("");
192 return PyBytes_FromString("");
193 }
193 }
194 }
194 }
195
195
196 static PyMethodDef ZstdCompressionObj_methods[] = {
196 static PyMethodDef ZstdCompressionObj_methods[] = {
197 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS,
197 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS,
198 PyDoc_STR("compress data") },
198 PyDoc_STR("compress data") },
199 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS,
199 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS,
200 PyDoc_STR("finish compression operation") },
200 PyDoc_STR("finish compression operation") },
201 { NULL, NULL }
201 { NULL, NULL }
202 };
202 };
203
203
204 PyTypeObject ZstdCompressionObjType = {
204 PyTypeObject ZstdCompressionObjType = {
205 PyVarObject_HEAD_INIT(NULL, 0)
205 PyVarObject_HEAD_INIT(NULL, 0)
206 "zstd.ZstdCompressionObj", /* tp_name */
206 "zstd.ZstdCompressionObj", /* tp_name */
207 sizeof(ZstdCompressionObj), /* tp_basicsize */
207 sizeof(ZstdCompressionObj), /* tp_basicsize */
208 0, /* tp_itemsize */
208 0, /* tp_itemsize */
209 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
209 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
210 0, /* tp_print */
210 0, /* tp_print */
211 0, /* tp_getattr */
211 0, /* tp_getattr */
212 0, /* tp_setattr */
212 0, /* tp_setattr */
213 0, /* tp_compare */
213 0, /* tp_compare */
214 0, /* tp_repr */
214 0, /* tp_repr */
215 0, /* tp_as_number */
215 0, /* tp_as_number */
216 0, /* tp_as_sequence */
216 0, /* tp_as_sequence */
217 0, /* tp_as_mapping */
217 0, /* tp_as_mapping */
218 0, /* tp_hash */
218 0, /* tp_hash */
219 0, /* tp_call */
219 0, /* tp_call */
220 0, /* tp_str */
220 0, /* tp_str */
221 0, /* tp_getattro */
221 0, /* tp_getattro */
222 0, /* tp_setattro */
222 0, /* tp_setattro */
223 0, /* tp_as_buffer */
223 0, /* tp_as_buffer */
224 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
224 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
225 ZstdCompressionObj__doc__, /* tp_doc */
225 ZstdCompressionObj__doc__, /* tp_doc */
226 0, /* tp_traverse */
226 0, /* tp_traverse */
227 0, /* tp_clear */
227 0, /* tp_clear */
228 0, /* tp_richcompare */
228 0, /* tp_richcompare */
229 0, /* tp_weaklistoffset */
229 0, /* tp_weaklistoffset */
230 0, /* tp_iter */
230 0, /* tp_iter */
231 0, /* tp_iternext */
231 0, /* tp_iternext */
232 ZstdCompressionObj_methods, /* tp_methods */
232 ZstdCompressionObj_methods, /* tp_methods */
233 0, /* tp_members */
233 0, /* tp_members */
234 0, /* tp_getset */
234 0, /* tp_getset */
235 0, /* tp_base */
235 0, /* tp_base */
236 0, /* tp_dict */
236 0, /* tp_dict */
237 0, /* tp_descr_get */
237 0, /* tp_descr_get */
238 0, /* tp_descr_set */
238 0, /* tp_descr_set */
239 0, /* tp_dictoffset */
239 0, /* tp_dictoffset */
240 0, /* tp_init */
240 0, /* tp_init */
241 0, /* tp_alloc */
241 0, /* tp_alloc */
242 PyType_GenericNew, /* tp_new */
242 PyType_GenericNew, /* tp_new */
243 };
243 };
244
244
245 void compressobj_module_init(PyObject* module) {
245 void compressobj_module_init(PyObject* module) {
246 Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
246 Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
247 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
247 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
248 return;
248 return;
249 }
249 }
250 }
250 }
@@ -1,791 +1,791 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) {
13 int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) {
14 ZSTD_customMem zmem;
14 ZSTD_customMem zmem;
15 assert(!compressor->cdict);
15 assert(!compressor->cdict);
16 Py_BEGIN_ALLOW_THREADS
16 Py_BEGIN_ALLOW_THREADS
17 memset(&zmem, 0, sizeof(zmem));
17 memset(&zmem, 0, sizeof(zmem));
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
19 compressor->dict->dictSize, *zparams, zmem);
19 compressor->dict->dictSize, 1, *zparams, zmem);
20 Py_END_ALLOW_THREADS
20 Py_END_ALLOW_THREADS
21
21
22 if (!compressor->cdict) {
22 if (!compressor->cdict) {
23 PyErr_SetString(ZstdError, "could not create compression dictionary");
23 PyErr_SetString(ZstdError, "could not create compression dictionary");
24 return 1;
24 return 1;
25 }
25 }
26
26
27 return 0;
27 return 0;
28 }
28 }
29
29
30 /**
30 /**
31 * Initialize a zstd CStream from a ZstdCompressor instance.
31 * Initialize a zstd CStream from a ZstdCompressor instance.
32 *
32 *
33 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python
33 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python
34 * exception will be set.
34 * exception will be set.
35 */
35 */
36 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
36 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
37 ZSTD_CStream* cstream;
37 ZSTD_CStream* cstream;
38 ZSTD_parameters zparams;
38 ZSTD_parameters zparams;
39 void* dictData = NULL;
39 void* dictData = NULL;
40 size_t dictSize = 0;
40 size_t dictSize = 0;
41 size_t zresult;
41 size_t zresult;
42
42
43 cstream = ZSTD_createCStream();
43 cstream = ZSTD_createCStream();
44 if (!cstream) {
44 if (!cstream) {
45 PyErr_SetString(ZstdError, "cannot create CStream");
45 PyErr_SetString(ZstdError, "cannot create CStream");
46 return NULL;
46 return NULL;
47 }
47 }
48
48
49 if (compressor->dict) {
49 if (compressor->dict) {
50 dictData = compressor->dict->dictData;
50 dictData = compressor->dict->dictData;
51 dictSize = compressor->dict->dictSize;
51 dictSize = compressor->dict->dictSize;
52 }
52 }
53
53
54 memset(&zparams, 0, sizeof(zparams));
54 memset(&zparams, 0, sizeof(zparams));
55 if (compressor->cparams) {
55 if (compressor->cparams) {
56 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
56 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
57 /* Do NOT call ZSTD_adjustCParams() here because the compression params
57 /* Do NOT call ZSTD_adjustCParams() here because the compression params
58 come from the user. */
58 come from the user. */
59 }
59 }
60 else {
60 else {
61 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
61 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
62 }
62 }
63
63
64 zparams.fParams = compressor->fparams;
64 zparams.fParams = compressor->fparams;
65
65
66 zresult = ZSTD_initCStream_advanced(cstream, dictData, dictSize, zparams, sourceSize);
66 zresult = ZSTD_initCStream_advanced(cstream, dictData, dictSize, zparams, sourceSize);
67
67
68 if (ZSTD_isError(zresult)) {
68 if (ZSTD_isError(zresult)) {
69 ZSTD_freeCStream(cstream);
69 ZSTD_freeCStream(cstream);
70 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
70 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
71 return NULL;
71 return NULL;
72 }
72 }
73
73
74 return cstream;
74 return cstream;
75 }
75 }
76
76
77 PyDoc_STRVAR(ZstdCompressor__doc__,
77 PyDoc_STRVAR(ZstdCompressor__doc__,
78 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
78 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
79 "\n"
79 "\n"
80 "Create an object used to perform Zstandard compression.\n"
80 "Create an object used to perform Zstandard compression.\n"
81 "\n"
81 "\n"
82 "An instance can compress data various ways. Instances can be used multiple\n"
82 "An instance can compress data various ways. Instances can be used multiple\n"
83 "times. Each compression operation will use the compression parameters\n"
83 "times. Each compression operation will use the compression parameters\n"
84 "defined at construction time.\n"
84 "defined at construction time.\n"
85 "\n"
85 "\n"
86 "Compression can be configured via the following names arguments:\n"
86 "Compression can be configured via the following names arguments:\n"
87 "\n"
87 "\n"
88 "level\n"
88 "level\n"
89 " Integer compression level.\n"
89 " Integer compression level.\n"
90 "dict_data\n"
90 "dict_data\n"
91 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
91 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
92 "compression_params\n"
92 "compression_params\n"
93 " A ``CompressionParameters`` instance defining low-level compression"
93 " A ``CompressionParameters`` instance defining low-level compression"
94 " parameters. If defined, this will overwrite the ``level`` argument.\n"
94 " parameters. If defined, this will overwrite the ``level`` argument.\n"
95 "write_checksum\n"
95 "write_checksum\n"
96 " If True, a 4 byte content checksum will be written with the compressed\n"
96 " If True, a 4 byte content checksum will be written with the compressed\n"
97 " data, allowing the decompressor to perform content verification.\n"
97 " data, allowing the decompressor to perform content verification.\n"
98 "write_content_size\n"
98 "write_content_size\n"
99 " If True, the decompressed content size will be included in the header of\n"
99 " If True, the decompressed content size will be included in the header of\n"
100 " the compressed data. This data will only be written if the compressor\n"
100 " the compressed data. This data will only be written if the compressor\n"
101 " knows the size of the input data.\n"
101 " knows the size of the input data.\n"
102 "write_dict_id\n"
102 "write_dict_id\n"
103 " Determines whether the dictionary ID will be written into the compressed\n"
103 " Determines whether the dictionary ID will be written into the compressed\n"
104 " data. Defaults to True. Only adds content to the compressed data if\n"
104 " data. Defaults to True. Only adds content to the compressed data if\n"
105 " a dictionary is being used.\n"
105 " a dictionary is being used.\n"
106 );
106 );
107
107
108 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
108 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
109 static char* kwlist[] = {
109 static char* kwlist[] = {
110 "level",
110 "level",
111 "dict_data",
111 "dict_data",
112 "compression_params",
112 "compression_params",
113 "write_checksum",
113 "write_checksum",
114 "write_content_size",
114 "write_content_size",
115 "write_dict_id",
115 "write_dict_id",
116 NULL
116 NULL
117 };
117 };
118
118
119 int level = 3;
119 int level = 3;
120 ZstdCompressionDict* dict = NULL;
120 ZstdCompressionDict* dict = NULL;
121 CompressionParametersObject* params = NULL;
121 CompressionParametersObject* params = NULL;
122 PyObject* writeChecksum = NULL;
122 PyObject* writeChecksum = NULL;
123 PyObject* writeContentSize = NULL;
123 PyObject* writeContentSize = NULL;
124 PyObject* writeDictID = NULL;
124 PyObject* writeDictID = NULL;
125
125
126 self->cctx = NULL;
126 self->cctx = NULL;
127 self->dict = NULL;
127 self->dict = NULL;
128 self->cparams = NULL;
128 self->cparams = NULL;
129 self->cdict = NULL;
129 self->cdict = NULL;
130
130
131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist,
131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO:ZstdCompressor",
132 &level, &ZstdCompressionDictType, &dict,
132 kwlist, &level, &ZstdCompressionDictType, &dict,
133 &CompressionParametersType, &params,
133 &CompressionParametersType, &params,
134 &writeChecksum, &writeContentSize, &writeDictID)) {
134 &writeChecksum, &writeContentSize, &writeDictID)) {
135 return -1;
135 return -1;
136 }
136 }
137
137
138 if (level < 1) {
138 if (level < 1) {
139 PyErr_SetString(PyExc_ValueError, "level must be greater than 0");
139 PyErr_SetString(PyExc_ValueError, "level must be greater than 0");
140 return -1;
140 return -1;
141 }
141 }
142
142
143 if (level > ZSTD_maxCLevel()) {
143 if (level > ZSTD_maxCLevel()) {
144 PyErr_Format(PyExc_ValueError, "level must be less than %d",
144 PyErr_Format(PyExc_ValueError, "level must be less than %d",
145 ZSTD_maxCLevel() + 1);
145 ZSTD_maxCLevel() + 1);
146 return -1;
146 return -1;
147 }
147 }
148
148
149 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
149 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
150 overhead of each compression operation. */
150 overhead of each compression operation. */
151 self->cctx = ZSTD_createCCtx();
151 self->cctx = ZSTD_createCCtx();
152 if (!self->cctx) {
152 if (!self->cctx) {
153 PyErr_NoMemory();
153 PyErr_NoMemory();
154 return -1;
154 return -1;
155 }
155 }
156
156
157 self->compressionLevel = level;
157 self->compressionLevel = level;
158
158
159 if (dict) {
159 if (dict) {
160 self->dict = dict;
160 self->dict = dict;
161 Py_INCREF(dict);
161 Py_INCREF(dict);
162 }
162 }
163
163
164 if (params) {
164 if (params) {
165 self->cparams = params;
165 self->cparams = params;
166 Py_INCREF(params);
166 Py_INCREF(params);
167 }
167 }
168
168
169 memset(&self->fparams, 0, sizeof(self->fparams));
169 memset(&self->fparams, 0, sizeof(self->fparams));
170
170
171 if (writeChecksum && PyObject_IsTrue(writeChecksum)) {
171 if (writeChecksum && PyObject_IsTrue(writeChecksum)) {
172 self->fparams.checksumFlag = 1;
172 self->fparams.checksumFlag = 1;
173 }
173 }
174 if (writeContentSize && PyObject_IsTrue(writeContentSize)) {
174 if (writeContentSize && PyObject_IsTrue(writeContentSize)) {
175 self->fparams.contentSizeFlag = 1;
175 self->fparams.contentSizeFlag = 1;
176 }
176 }
177 if (writeDictID && PyObject_Not(writeDictID)) {
177 if (writeDictID && PyObject_Not(writeDictID)) {
178 self->fparams.noDictIDFlag = 1;
178 self->fparams.noDictIDFlag = 1;
179 }
179 }
180
180
181 return 0;
181 return 0;
182 }
182 }
183
183
184 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
184 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
185 Py_XDECREF(self->cparams);
185 Py_XDECREF(self->cparams);
186 Py_XDECREF(self->dict);
186 Py_XDECREF(self->dict);
187
187
188 if (self->cdict) {
188 if (self->cdict) {
189 ZSTD_freeCDict(self->cdict);
189 ZSTD_freeCDict(self->cdict);
190 self->cdict = NULL;
190 self->cdict = NULL;
191 }
191 }
192
192
193 if (self->cctx) {
193 if (self->cctx) {
194 ZSTD_freeCCtx(self->cctx);
194 ZSTD_freeCCtx(self->cctx);
195 self->cctx = NULL;
195 self->cctx = NULL;
196 }
196 }
197
197
198 PyObject_Del(self);
198 PyObject_Del(self);
199 }
199 }
200
200
201 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
201 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
202 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
202 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
203 "compress data between streams\n"
203 "compress data between streams\n"
204 "\n"
204 "\n"
205 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
205 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
206 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
206 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
207 "method.\n"
207 "method.\n"
208 "\n"
208 "\n"
209 "An optional ``size`` argument specifies the size of the source stream.\n"
209 "An optional ``size`` argument specifies the size of the source stream.\n"
210 "If defined, compression parameters will be tuned based on the size.\n"
210 "If defined, compression parameters will be tuned based on the size.\n"
211 "\n"
211 "\n"
212 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
212 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
213 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
213 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
214 "the default compression stream input and output sizes, respectively.\n"
214 "the default compression stream input and output sizes, respectively.\n"
215 );
215 );
216
216
217 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
217 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
218 static char* kwlist[] = {
218 static char* kwlist[] = {
219 "ifh",
219 "ifh",
220 "ofh",
220 "ofh",
221 "size",
221 "size",
222 "read_size",
222 "read_size",
223 "write_size",
223 "write_size",
224 NULL
224 NULL
225 };
225 };
226
226
227 PyObject* source;
227 PyObject* source;
228 PyObject* dest;
228 PyObject* dest;
229 Py_ssize_t sourceSize = 0;
229 Py_ssize_t sourceSize = 0;
230 size_t inSize = ZSTD_CStreamInSize();
230 size_t inSize = ZSTD_CStreamInSize();
231 size_t outSize = ZSTD_CStreamOutSize();
231 size_t outSize = ZSTD_CStreamOutSize();
232 ZSTD_CStream* cstream;
232 ZSTD_CStream* cstream;
233 ZSTD_inBuffer input;
233 ZSTD_inBuffer input;
234 ZSTD_outBuffer output;
234 ZSTD_outBuffer output;
235 Py_ssize_t totalRead = 0;
235 Py_ssize_t totalRead = 0;
236 Py_ssize_t totalWrite = 0;
236 Py_ssize_t totalWrite = 0;
237 char* readBuffer;
237 char* readBuffer;
238 Py_ssize_t readSize;
238 Py_ssize_t readSize;
239 PyObject* readResult;
239 PyObject* readResult;
240 PyObject* res = NULL;
240 PyObject* res = NULL;
241 size_t zresult;
241 size_t zresult;
242 PyObject* writeResult;
242 PyObject* writeResult;
243 PyObject* totalReadPy;
243 PyObject* totalReadPy;
244 PyObject* totalWritePy;
244 PyObject* totalWritePy;
245
245
246 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk", kwlist, &source, &dest, &sourceSize,
246 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
247 &inSize, &outSize)) {
247 &source, &dest, &sourceSize, &inSize, &outSize)) {
248 return NULL;
248 return NULL;
249 }
249 }
250
250
251 if (!PyObject_HasAttrString(source, "read")) {
251 if (!PyObject_HasAttrString(source, "read")) {
252 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
252 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
253 return NULL;
253 return NULL;
254 }
254 }
255
255
256 if (!PyObject_HasAttrString(dest, "write")) {
256 if (!PyObject_HasAttrString(dest, "write")) {
257 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
257 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
258 return NULL;
258 return NULL;
259 }
259 }
260
260
261 /* Prevent free on uninitialized memory in finally. */
261 /* Prevent free on uninitialized memory in finally. */
262 output.dst = NULL;
262 output.dst = NULL;
263
263
264 cstream = CStream_from_ZstdCompressor(self, sourceSize);
264 cstream = CStream_from_ZstdCompressor(self, sourceSize);
265 if (!cstream) {
265 if (!cstream) {
266 res = NULL;
266 res = NULL;
267 goto finally;
267 goto finally;
268 }
268 }
269
269
270 output.dst = PyMem_Malloc(outSize);
270 output.dst = PyMem_Malloc(outSize);
271 if (!output.dst) {
271 if (!output.dst) {
272 PyErr_NoMemory();
272 PyErr_NoMemory();
273 res = NULL;
273 res = NULL;
274 goto finally;
274 goto finally;
275 }
275 }
276 output.size = outSize;
276 output.size = outSize;
277 output.pos = 0;
277 output.pos = 0;
278
278
279 while (1) {
279 while (1) {
280 /* Try to read from source stream. */
280 /* Try to read from source stream. */
281 readResult = PyObject_CallMethod(source, "read", "n", inSize);
281 readResult = PyObject_CallMethod(source, "read", "n", inSize);
282 if (!readResult) {
282 if (!readResult) {
283 PyErr_SetString(ZstdError, "could not read() from source");
283 PyErr_SetString(ZstdError, "could not read() from source");
284 goto finally;
284 goto finally;
285 }
285 }
286
286
287 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
287 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
288
288
289 /* If no data was read, we're at EOF. */
289 /* If no data was read, we're at EOF. */
290 if (0 == readSize) {
290 if (0 == readSize) {
291 break;
291 break;
292 }
292 }
293
293
294 totalRead += readSize;
294 totalRead += readSize;
295
295
296 /* Send data to compressor */
296 /* Send data to compressor */
297 input.src = readBuffer;
297 input.src = readBuffer;
298 input.size = readSize;
298 input.size = readSize;
299 input.pos = 0;
299 input.pos = 0;
300
300
301 while (input.pos < input.size) {
301 while (input.pos < input.size) {
302 Py_BEGIN_ALLOW_THREADS
302 Py_BEGIN_ALLOW_THREADS
303 zresult = ZSTD_compressStream(cstream, &output, &input);
303 zresult = ZSTD_compressStream(cstream, &output, &input);
304 Py_END_ALLOW_THREADS
304 Py_END_ALLOW_THREADS
305
305
306 if (ZSTD_isError(zresult)) {
306 if (ZSTD_isError(zresult)) {
307 res = NULL;
307 res = NULL;
308 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
308 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
309 goto finally;
309 goto finally;
310 }
310 }
311
311
312 if (output.pos) {
312 if (output.pos) {
313 #if PY_MAJOR_VERSION >= 3
313 #if PY_MAJOR_VERSION >= 3
314 writeResult = PyObject_CallMethod(dest, "write", "y#",
314 writeResult = PyObject_CallMethod(dest, "write", "y#",
315 #else
315 #else
316 writeResult = PyObject_CallMethod(dest, "write", "s#",
316 writeResult = PyObject_CallMethod(dest, "write", "s#",
317 #endif
317 #endif
318 output.dst, output.pos);
318 output.dst, output.pos);
319 Py_XDECREF(writeResult);
319 Py_XDECREF(writeResult);
320 totalWrite += output.pos;
320 totalWrite += output.pos;
321 output.pos = 0;
321 output.pos = 0;
322 }
322 }
323 }
323 }
324 }
324 }
325
325
326 /* We've finished reading. Now flush the compressor stream. */
326 /* We've finished reading. Now flush the compressor stream. */
327 while (1) {
327 while (1) {
328 zresult = ZSTD_endStream(cstream, &output);
328 zresult = ZSTD_endStream(cstream, &output);
329 if (ZSTD_isError(zresult)) {
329 if (ZSTD_isError(zresult)) {
330 PyErr_Format(ZstdError, "error ending compression stream: %s",
330 PyErr_Format(ZstdError, "error ending compression stream: %s",
331 ZSTD_getErrorName(zresult));
331 ZSTD_getErrorName(zresult));
332 res = NULL;
332 res = NULL;
333 goto finally;
333 goto finally;
334 }
334 }
335
335
336 if (output.pos) {
336 if (output.pos) {
337 #if PY_MAJOR_VERSION >= 3
337 #if PY_MAJOR_VERSION >= 3
338 writeResult = PyObject_CallMethod(dest, "write", "y#",
338 writeResult = PyObject_CallMethod(dest, "write", "y#",
339 #else
339 #else
340 writeResult = PyObject_CallMethod(dest, "write", "s#",
340 writeResult = PyObject_CallMethod(dest, "write", "s#",
341 #endif
341 #endif
342 output.dst, output.pos);
342 output.dst, output.pos);
343 totalWrite += output.pos;
343 totalWrite += output.pos;
344 Py_XDECREF(writeResult);
344 Py_XDECREF(writeResult);
345 output.pos = 0;
345 output.pos = 0;
346 }
346 }
347
347
348 if (!zresult) {
348 if (!zresult) {
349 break;
349 break;
350 }
350 }
351 }
351 }
352
352
353 ZSTD_freeCStream(cstream);
353 ZSTD_freeCStream(cstream);
354 cstream = NULL;
354 cstream = NULL;
355
355
356 totalReadPy = PyLong_FromSsize_t(totalRead);
356 totalReadPy = PyLong_FromSsize_t(totalRead);
357 totalWritePy = PyLong_FromSsize_t(totalWrite);
357 totalWritePy = PyLong_FromSsize_t(totalWrite);
358 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
358 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
359 Py_DecRef(totalReadPy);
359 Py_DecRef(totalReadPy);
360 Py_DecRef(totalWritePy);
360 Py_DecRef(totalWritePy);
361
361
362 finally:
362 finally:
363 if (output.dst) {
363 if (output.dst) {
364 PyMem_Free(output.dst);
364 PyMem_Free(output.dst);
365 }
365 }
366
366
367 if (cstream) {
367 if (cstream) {
368 ZSTD_freeCStream(cstream);
368 ZSTD_freeCStream(cstream);
369 }
369 }
370
370
371 return res;
371 return res;
372 }
372 }
373
373
374 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
374 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
375 "compress(data, allow_empty=False)\n"
375 "compress(data, allow_empty=False)\n"
376 "\n"
376 "\n"
377 "Compress data in a single operation.\n"
377 "Compress data in a single operation.\n"
378 "\n"
378 "\n"
379 "This is the simplest mechanism to perform compression: simply pass in a\n"
379 "This is the simplest mechanism to perform compression: simply pass in a\n"
380 "value and get a compressed value back. It is almost the most prone to abuse.\n"
380 "value and get a compressed value back. It is almost the most prone to abuse.\n"
381 "The input and output values must fit in memory, so passing in very large\n"
381 "The input and output values must fit in memory, so passing in very large\n"
382 "values can result in excessive memory usage. For this reason, one of the\n"
382 "values can result in excessive memory usage. For this reason, one of the\n"
383 "streaming based APIs is preferred for larger values.\n"
383 "streaming based APIs is preferred for larger values.\n"
384 );
384 );
385
385
386 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
386 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
387 static char* kwlist[] = {
387 static char* kwlist[] = {
388 "data",
388 "data",
389 "allow_empty",
389 "allow_empty",
390 NULL
390 NULL
391 };
391 };
392
392
393 const char* source;
393 const char* source;
394 Py_ssize_t sourceSize;
394 Py_ssize_t sourceSize;
395 PyObject* allowEmpty = NULL;
395 PyObject* allowEmpty = NULL;
396 size_t destSize;
396 size_t destSize;
397 PyObject* output;
397 PyObject* output;
398 char* dest;
398 char* dest;
399 void* dictData = NULL;
399 void* dictData = NULL;
400 size_t dictSize = 0;
400 size_t dictSize = 0;
401 size_t zresult;
401 size_t zresult;
402 ZSTD_parameters zparams;
402 ZSTD_parameters zparams;
403
403
404 #if PY_MAJOR_VERSION >= 3
404 #if PY_MAJOR_VERSION >= 3
405 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O",
405 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
406 #else
406 #else
407 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O",
407 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
408 #endif
408 #endif
409 kwlist, &source, &sourceSize, &allowEmpty)) {
409 kwlist, &source, &sourceSize, &allowEmpty)) {
410 return NULL;
410 return NULL;
411 }
411 }
412
412
413 /* Limitation in zstd C API doesn't let decompression side distinguish
413 /* Limitation in zstd C API doesn't let decompression side distinguish
414 between content size of 0 and unknown content size. This can make round
414 between content size of 0 and unknown content size. This can make round
415 tripping via Python difficult. Until this is fixed, require a flag
415 tripping via Python difficult. Until this is fixed, require a flag
416 to fire the footgun.
416 to fire the footgun.
417 https://github.com/indygreg/python-zstandard/issues/11 */
417 https://github.com/indygreg/python-zstandard/issues/11 */
418 if (0 == sourceSize && self->fparams.contentSizeFlag
418 if (0 == sourceSize && self->fparams.contentSizeFlag
419 && (!allowEmpty || PyObject_Not(allowEmpty))) {
419 && (!allowEmpty || PyObject_Not(allowEmpty))) {
420 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
420 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
421 return NULL;
421 return NULL;
422 }
422 }
423
423
424 destSize = ZSTD_compressBound(sourceSize);
424 destSize = ZSTD_compressBound(sourceSize);
425 output = PyBytes_FromStringAndSize(NULL, destSize);
425 output = PyBytes_FromStringAndSize(NULL, destSize);
426 if (!output) {
426 if (!output) {
427 return NULL;
427 return NULL;
428 }
428 }
429
429
430 dest = PyBytes_AsString(output);
430 dest = PyBytes_AsString(output);
431
431
432 if (self->dict) {
432 if (self->dict) {
433 dictData = self->dict->dictData;
433 dictData = self->dict->dictData;
434 dictSize = self->dict->dictSize;
434 dictSize = self->dict->dictSize;
435 }
435 }
436
436
437 memset(&zparams, 0, sizeof(zparams));
437 memset(&zparams, 0, sizeof(zparams));
438 if (!self->cparams) {
438 if (!self->cparams) {
439 zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize);
439 zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize);
440 }
440 }
441 else {
441 else {
442 ztopy_compression_parameters(self->cparams, &zparams.cParams);
442 ztopy_compression_parameters(self->cparams, &zparams.cParams);
443 /* Do NOT call ZSTD_adjustCParams() here because the compression params
443 /* Do NOT call ZSTD_adjustCParams() here because the compression params
444 come from the user. */
444 come from the user. */
445 }
445 }
446
446
447 zparams.fParams = self->fparams;
447 zparams.fParams = self->fparams;
448
448
449 /* The raw dict data has to be processed before it can be used. Since this
449 /* The raw dict data has to be processed before it can be used. Since this
450 adds overhead - especially if multiple dictionary compression operations
450 adds overhead - especially if multiple dictionary compression operations
451 are performed on the same ZstdCompressor instance - we create a
451 are performed on the same ZstdCompressor instance - we create a
452 ZSTD_CDict once and reuse it for all operations.
452 ZSTD_CDict once and reuse it for all operations.
453
453
454 Note: the compression parameters used for the first invocation (possibly
454 Note: the compression parameters used for the first invocation (possibly
455 derived from the source size) will be reused on all subsequent invocations.
455 derived from the source size) will be reused on all subsequent invocations.
456 https://github.com/facebook/zstd/issues/358 contains more info. We could
456 https://github.com/facebook/zstd/issues/358 contains more info. We could
457 potentially add an argument somewhere to control this behavior.
457 potentially add an argument somewhere to control this behavior.
458 */
458 */
459 if (dictData && !self->cdict) {
459 if (dictData && !self->cdict) {
460 if (populate_cdict(self, dictData, dictSize, &zparams)) {
460 if (populate_cdict(self, dictData, dictSize, &zparams)) {
461 Py_DECREF(output);
461 Py_DECREF(output);
462 return NULL;
462 return NULL;
463 }
463 }
464 }
464 }
465
465
466 Py_BEGIN_ALLOW_THREADS
466 Py_BEGIN_ALLOW_THREADS
467 /* By avoiding ZSTD_compress(), we don't necessarily write out content
467 /* By avoiding ZSTD_compress(), we don't necessarily write out content
468 size. This means the argument to ZstdCompressor to control frame
468 size. This means the argument to ZstdCompressor to control frame
469 parameters is honored. */
469 parameters is honored. */
470 if (self->cdict) {
470 if (self->cdict) {
471 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
471 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
472 source, sourceSize, self->cdict);
472 source, sourceSize, self->cdict);
473 }
473 }
474 else {
474 else {
475 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
475 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
476 source, sourceSize, dictData, dictSize, zparams);
476 source, sourceSize, dictData, dictSize, zparams);
477 }
477 }
478 Py_END_ALLOW_THREADS
478 Py_END_ALLOW_THREADS
479
479
480 if (ZSTD_isError(zresult)) {
480 if (ZSTD_isError(zresult)) {
481 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
481 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
482 Py_CLEAR(output);
482 Py_CLEAR(output);
483 return NULL;
483 return NULL;
484 }
484 }
485 else {
485 else {
486 Py_SIZE(output) = zresult;
486 Py_SIZE(output) = zresult;
487 }
487 }
488
488
489 return output;
489 return output;
490 }
490 }
491
491
492 PyDoc_STRVAR(ZstdCompressionObj__doc__,
492 PyDoc_STRVAR(ZstdCompressionObj__doc__,
493 "compressobj()\n"
493 "compressobj()\n"
494 "\n"
494 "\n"
495 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
495 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
496 "\n"
496 "\n"
497 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
497 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
498 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
498 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
499 "without changing how compression is performed.\n"
499 "without changing how compression is performed.\n"
500 );
500 );
501
501
502 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
502 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
503 static char* kwlist[] = {
503 static char* kwlist[] = {
504 "size",
504 "size",
505 NULL
505 NULL
506 };
506 };
507
507
508 Py_ssize_t inSize = 0;
508 Py_ssize_t inSize = 0;
509 size_t outSize = ZSTD_CStreamOutSize();
509 size_t outSize = ZSTD_CStreamOutSize();
510 ZstdCompressionObj* result = PyObject_New(ZstdCompressionObj, &ZstdCompressionObjType);
510 ZstdCompressionObj* result = PyObject_New(ZstdCompressionObj, &ZstdCompressionObjType);
511 if (!result) {
511 if (!result) {
512 return NULL;
512 return NULL;
513 }
513 }
514
514
515 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &inSize)) {
515 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
516 return NULL;
516 return NULL;
517 }
517 }
518
518
519 result->cstream = CStream_from_ZstdCompressor(self, inSize);
519 result->cstream = CStream_from_ZstdCompressor(self, inSize);
520 if (!result->cstream) {
520 if (!result->cstream) {
521 Py_DECREF(result);
521 Py_DECREF(result);
522 return NULL;
522 return NULL;
523 }
523 }
524
524
525 result->output.dst = PyMem_Malloc(outSize);
525 result->output.dst = PyMem_Malloc(outSize);
526 if (!result->output.dst) {
526 if (!result->output.dst) {
527 PyErr_NoMemory();
527 PyErr_NoMemory();
528 Py_DECREF(result);
528 Py_DECREF(result);
529 return NULL;
529 return NULL;
530 }
530 }
531 result->output.size = outSize;
531 result->output.size = outSize;
532 result->output.pos = 0;
532 result->output.pos = 0;
533
533
534 result->compressor = self;
534 result->compressor = self;
535 Py_INCREF(result->compressor);
535 Py_INCREF(result->compressor);
536
536
537 result->finished = 0;
537 result->finished = 0;
538
538
539 return result;
539 return result;
540 }
540 }
541
541
542 PyDoc_STRVAR(ZstdCompressor_read_from__doc__,
542 PyDoc_STRVAR(ZstdCompressor_read_from__doc__,
543 "read_from(reader, [size=0, read_size=default, write_size=default])\n"
543 "read_from(reader, [size=0, read_size=default, write_size=default])\n"
544 "Read uncompress data from a reader and return an iterator\n"
544 "Read uncompress data from a reader and return an iterator\n"
545 "\n"
545 "\n"
546 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
546 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
547 "\n"
547 "\n"
548 "Uncompressed data will be obtained from ``reader`` by calling the\n"
548 "Uncompressed data will be obtained from ``reader`` by calling the\n"
549 "``read(size)`` method of it. The source data will be streamed into a\n"
549 "``read(size)`` method of it. The source data will be streamed into a\n"
550 "compressor. As compressed data is available, it will be exposed to the\n"
550 "compressor. As compressed data is available, it will be exposed to the\n"
551 "iterator.\n"
551 "iterator.\n"
552 "\n"
552 "\n"
553 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
553 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
554 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
554 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
555 "and output defaults, respectively.\n"
555 "and output defaults, respectively.\n"
556 "\n"
556 "\n"
557 "The caller is partially in control of how fast data is fed into the\n"
557 "The caller is partially in control of how fast data is fed into the\n"
558 "compressor by how it consumes the returned iterator. The compressor will\n"
558 "compressor by how it consumes the returned iterator. The compressor will\n"
559 "not consume from the reader unless the caller consumes from the iterator.\n"
559 "not consume from the reader unless the caller consumes from the iterator.\n"
560 );
560 );
561
561
562 static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
562 static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
563 static char* kwlist[] = {
563 static char* kwlist[] = {
564 "reader",
564 "reader",
565 "size",
565 "size",
566 "read_size",
566 "read_size",
567 "write_size",
567 "write_size",
568 NULL
568 NULL
569 };
569 };
570
570
571 PyObject* reader;
571 PyObject* reader;
572 Py_ssize_t sourceSize = 0;
572 Py_ssize_t sourceSize = 0;
573 size_t inSize = ZSTD_CStreamInSize();
573 size_t inSize = ZSTD_CStreamInSize();
574 size_t outSize = ZSTD_CStreamOutSize();
574 size_t outSize = ZSTD_CStreamOutSize();
575 ZstdCompressorIterator* result;
575 ZstdCompressorIterator* result;
576
576
577 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk", kwlist, &reader, &sourceSize,
577 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
578 &inSize, &outSize)) {
578 &reader, &sourceSize, &inSize, &outSize)) {
579 return NULL;
579 return NULL;
580 }
580 }
581
581
582 result = PyObject_New(ZstdCompressorIterator, &ZstdCompressorIteratorType);
582 result = PyObject_New(ZstdCompressorIterator, &ZstdCompressorIteratorType);
583 if (!result) {
583 if (!result) {
584 return NULL;
584 return NULL;
585 }
585 }
586
586
587 result->compressor = NULL;
587 result->compressor = NULL;
588 result->reader = NULL;
588 result->reader = NULL;
589 result->buffer = NULL;
589 result->buffer = NULL;
590 result->cstream = NULL;
590 result->cstream = NULL;
591 result->input.src = NULL;
591 result->input.src = NULL;
592 result->output.dst = NULL;
592 result->output.dst = NULL;
593 result->readResult = NULL;
593 result->readResult = NULL;
594
594
595 if (PyObject_HasAttrString(reader, "read")) {
595 if (PyObject_HasAttrString(reader, "read")) {
596 result->reader = reader;
596 result->reader = reader;
597 Py_INCREF(result->reader);
597 Py_INCREF(result->reader);
598 }
598 }
599 else if (1 == PyObject_CheckBuffer(reader)) {
599 else if (1 == PyObject_CheckBuffer(reader)) {
600 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
600 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
601 if (!result->buffer) {
601 if (!result->buffer) {
602 goto except;
602 goto except;
603 }
603 }
604
604
605 memset(result->buffer, 0, sizeof(Py_buffer));
605 memset(result->buffer, 0, sizeof(Py_buffer));
606
606
607 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
607 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
608 goto except;
608 goto except;
609 }
609 }
610
610
611 result->bufferOffset = 0;
611 result->bufferOffset = 0;
612 sourceSize = result->buffer->len;
612 sourceSize = result->buffer->len;
613 }
613 }
614 else {
614 else {
615 PyErr_SetString(PyExc_ValueError,
615 PyErr_SetString(PyExc_ValueError,
616 "must pass an object with a read() method or conforms to buffer protocol");
616 "must pass an object with a read() method or conforms to buffer protocol");
617 goto except;
617 goto except;
618 }
618 }
619
619
620 result->compressor = self;
620 result->compressor = self;
621 Py_INCREF(result->compressor);
621 Py_INCREF(result->compressor);
622
622
623 result->sourceSize = sourceSize;
623 result->sourceSize = sourceSize;
624 result->cstream = CStream_from_ZstdCompressor(self, sourceSize);
624 result->cstream = CStream_from_ZstdCompressor(self, sourceSize);
625 if (!result->cstream) {
625 if (!result->cstream) {
626 goto except;
626 goto except;
627 }
627 }
628
628
629 result->inSize = inSize;
629 result->inSize = inSize;
630 result->outSize = outSize;
630 result->outSize = outSize;
631
631
632 result->output.dst = PyMem_Malloc(outSize);
632 result->output.dst = PyMem_Malloc(outSize);
633 if (!result->output.dst) {
633 if (!result->output.dst) {
634 PyErr_NoMemory();
634 PyErr_NoMemory();
635 goto except;
635 goto except;
636 }
636 }
637 result->output.size = outSize;
637 result->output.size = outSize;
638 result->output.pos = 0;
638 result->output.pos = 0;
639
639
640 result->input.src = NULL;
640 result->input.src = NULL;
641 result->input.size = 0;
641 result->input.size = 0;
642 result->input.pos = 0;
642 result->input.pos = 0;
643
643
644 result->finishedInput = 0;
644 result->finishedInput = 0;
645 result->finishedOutput = 0;
645 result->finishedOutput = 0;
646
646
647 goto finally;
647 goto finally;
648
648
649 except:
649 except:
650 if (result->cstream) {
650 if (result->cstream) {
651 ZSTD_freeCStream(result->cstream);
651 ZSTD_freeCStream(result->cstream);
652 result->cstream = NULL;
652 result->cstream = NULL;
653 }
653 }
654
654
655 Py_DecRef((PyObject*)result->compressor);
655 Py_DecRef((PyObject*)result->compressor);
656 Py_DecRef(result->reader);
656 Py_DecRef(result->reader);
657
657
658 Py_DECREF(result);
658 Py_DECREF(result);
659 result = NULL;
659 result = NULL;
660
660
661 finally:
661 finally:
662 return result;
662 return result;
663 }
663 }
664
664
665 PyDoc_STRVAR(ZstdCompressor_write_to___doc__,
665 PyDoc_STRVAR(ZstdCompressor_write_to___doc__,
666 "Create a context manager to write compressed data to an object.\n"
666 "Create a context manager to write compressed data to an object.\n"
667 "\n"
667 "\n"
668 "The passed object must have a ``write()`` method.\n"
668 "The passed object must have a ``write()`` method.\n"
669 "\n"
669 "\n"
670 "The caller feeds input data to the object by calling ``compress(data)``.\n"
670 "The caller feeds input data to the object by calling ``compress(data)``.\n"
671 "Compressed data is written to the argument given to this function.\n"
671 "Compressed data is written to the argument given to this function.\n"
672 "\n"
672 "\n"
673 "The function takes an optional ``size`` argument indicating the total size\n"
673 "The function takes an optional ``size`` argument indicating the total size\n"
674 "of the eventual input. If specified, the size will influence compression\n"
674 "of the eventual input. If specified, the size will influence compression\n"
675 "parameter tuning and could result in the size being written into the\n"
675 "parameter tuning and could result in the size being written into the\n"
676 "header of the compressed data.\n"
676 "header of the compressed data.\n"
677 "\n"
677 "\n"
678 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
678 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
679 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
679 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
680 "for a compressor output stream.\n"
680 "for a compressor output stream.\n"
681 );
681 );
682
682
683 static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
683 static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
684 static char* kwlist[] = {
684 static char* kwlist[] = {
685 "writer",
685 "writer",
686 "size",
686 "size",
687 "write_size",
687 "write_size",
688 NULL
688 NULL
689 };
689 };
690
690
691 PyObject* writer;
691 PyObject* writer;
692 ZstdCompressionWriter* result;
692 ZstdCompressionWriter* result;
693 Py_ssize_t sourceSize = 0;
693 Py_ssize_t sourceSize = 0;
694 size_t outSize = ZSTD_CStreamOutSize();
694 size_t outSize = ZSTD_CStreamOutSize();
695
695
696 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk", kwlist, &writer, &sourceSize,
696 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
697 &outSize)) {
697 &writer, &sourceSize, &outSize)) {
698 return NULL;
698 return NULL;
699 }
699 }
700
700
701 if (!PyObject_HasAttrString(writer, "write")) {
701 if (!PyObject_HasAttrString(writer, "write")) {
702 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
702 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
703 return NULL;
703 return NULL;
704 }
704 }
705
705
706 result = PyObject_New(ZstdCompressionWriter, &ZstdCompressionWriterType);
706 result = PyObject_New(ZstdCompressionWriter, &ZstdCompressionWriterType);
707 if (!result) {
707 if (!result) {
708 return NULL;
708 return NULL;
709 }
709 }
710
710
711 result->compressor = self;
711 result->compressor = self;
712 Py_INCREF(result->compressor);
712 Py_INCREF(result->compressor);
713
713
714 result->writer = writer;
714 result->writer = writer;
715 Py_INCREF(result->writer);
715 Py_INCREF(result->writer);
716
716
717 result->sourceSize = sourceSize;
717 result->sourceSize = sourceSize;
718
718
719 result->outSize = outSize;
719 result->outSize = outSize;
720
720
721 result->entered = 0;
721 result->entered = 0;
722 result->cstream = NULL;
722 result->cstream = NULL;
723
723
724 return result;
724 return result;
725 }
725 }
726
726
727 static PyMethodDef ZstdCompressor_methods[] = {
727 static PyMethodDef ZstdCompressor_methods[] = {
728 { "compress", (PyCFunction)ZstdCompressor_compress,
728 { "compress", (PyCFunction)ZstdCompressor_compress,
729 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
729 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
730 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
730 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
731 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
731 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
732 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
732 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
733 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
733 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
734 { "read_from", (PyCFunction)ZstdCompressor_read_from,
734 { "read_from", (PyCFunction)ZstdCompressor_read_from,
735 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
735 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
736 { "write_to", (PyCFunction)ZstdCompressor_write_to,
736 { "write_to", (PyCFunction)ZstdCompressor_write_to,
737 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
737 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
738 { NULL, NULL }
738 { NULL, NULL }
739 };
739 };
740
740
741 PyTypeObject ZstdCompressorType = {
741 PyTypeObject ZstdCompressorType = {
742 PyVarObject_HEAD_INIT(NULL, 0)
742 PyVarObject_HEAD_INIT(NULL, 0)
743 "zstd.ZstdCompressor", /* tp_name */
743 "zstd.ZstdCompressor", /* tp_name */
744 sizeof(ZstdCompressor), /* tp_basicsize */
744 sizeof(ZstdCompressor), /* tp_basicsize */
745 0, /* tp_itemsize */
745 0, /* tp_itemsize */
746 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
746 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
747 0, /* tp_print */
747 0, /* tp_print */
748 0, /* tp_getattr */
748 0, /* tp_getattr */
749 0, /* tp_setattr */
749 0, /* tp_setattr */
750 0, /* tp_compare */
750 0, /* tp_compare */
751 0, /* tp_repr */
751 0, /* tp_repr */
752 0, /* tp_as_number */
752 0, /* tp_as_number */
753 0, /* tp_as_sequence */
753 0, /* tp_as_sequence */
754 0, /* tp_as_mapping */
754 0, /* tp_as_mapping */
755 0, /* tp_hash */
755 0, /* tp_hash */
756 0, /* tp_call */
756 0, /* tp_call */
757 0, /* tp_str */
757 0, /* tp_str */
758 0, /* tp_getattro */
758 0, /* tp_getattro */
759 0, /* tp_setattro */
759 0, /* tp_setattro */
760 0, /* tp_as_buffer */
760 0, /* tp_as_buffer */
761 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
761 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
762 ZstdCompressor__doc__, /* tp_doc */
762 ZstdCompressor__doc__, /* tp_doc */
763 0, /* tp_traverse */
763 0, /* tp_traverse */
764 0, /* tp_clear */
764 0, /* tp_clear */
765 0, /* tp_richcompare */
765 0, /* tp_richcompare */
766 0, /* tp_weaklistoffset */
766 0, /* tp_weaklistoffset */
767 0, /* tp_iter */
767 0, /* tp_iter */
768 0, /* tp_iternext */
768 0, /* tp_iternext */
769 ZstdCompressor_methods, /* tp_methods */
769 ZstdCompressor_methods, /* tp_methods */
770 0, /* tp_members */
770 0, /* tp_members */
771 0, /* tp_getset */
771 0, /* tp_getset */
772 0, /* tp_base */
772 0, /* tp_base */
773 0, /* tp_dict */
773 0, /* tp_dict */
774 0, /* tp_descr_get */
774 0, /* tp_descr_get */
775 0, /* tp_descr_set */
775 0, /* tp_descr_set */
776 0, /* tp_dictoffset */
776 0, /* tp_dictoffset */
777 (initproc)ZstdCompressor_init, /* tp_init */
777 (initproc)ZstdCompressor_init, /* tp_init */
778 0, /* tp_alloc */
778 0, /* tp_alloc */
779 PyType_GenericNew, /* tp_new */
779 PyType_GenericNew, /* tp_new */
780 };
780 };
781
781
782 void compressor_module_init(PyObject* mod) {
782 void compressor_module_init(PyObject* mod) {
783 Py_TYPE(&ZstdCompressorType) = &PyType_Type;
783 Py_TYPE(&ZstdCompressorType) = &PyType_Type;
784 if (PyType_Ready(&ZstdCompressorType) < 0) {
784 if (PyType_Ready(&ZstdCompressorType) < 0) {
785 return;
785 return;
786 }
786 }
787
787
788 Py_INCREF((PyObject*)&ZstdCompressorType);
788 Py_INCREF((PyObject*)&ZstdCompressorType);
789 PyModule_AddObject(mod, "ZstdCompressor",
789 PyModule_AddObject(mod, "ZstdCompressor",
790 (PyObject*)&ZstdCompressorType);
790 (PyObject*)&ZstdCompressorType);
791 }
791 }
@@ -1,187 +1,188 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
14 """A context manager used for writing decompressed output.\n"
14 """A context manager used for writing decompressed output.\n"
15 );
15 );
16
16
17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
18 Py_XDECREF(self->decompressor);
18 Py_XDECREF(self->decompressor);
19 Py_XDECREF(self->writer);
19 Py_XDECREF(self->writer);
20
20
21 if (self->dstream) {
21 if (self->dstream) {
22 ZSTD_freeDStream(self->dstream);
22 ZSTD_freeDStream(self->dstream);
23 self->dstream = NULL;
23 self->dstream = NULL;
24 }
24 }
25
25
26 PyObject_Del(self);
26 PyObject_Del(self);
27 }
27 }
28
28
29 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
29 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
30 if (self->entered) {
30 if (self->entered) {
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
32 return NULL;
32 return NULL;
33 }
33 }
34
34
35 self->dstream = DStream_from_ZstdDecompressor(self->decompressor);
35 self->dstream = DStream_from_ZstdDecompressor(self->decompressor);
36 if (!self->dstream) {
36 if (!self->dstream) {
37 return NULL;
37 return NULL;
38 }
38 }
39
39
40 self->entered = 1;
40 self->entered = 1;
41
41
42 Py_INCREF(self);
42 Py_INCREF(self);
43 return (PyObject*)self;
43 return (PyObject*)self;
44 }
44 }
45
45
46 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
46 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
47 self->entered = 0;
47 self->entered = 0;
48
48
49 if (self->dstream) {
49 if (self->dstream) {
50 ZSTD_freeDStream(self->dstream);
50 ZSTD_freeDStream(self->dstream);
51 self->dstream = NULL;
51 self->dstream = NULL;
52 }
52 }
53
53
54 Py_RETURN_FALSE;
54 Py_RETURN_FALSE;
55 }
55 }
56
56
57 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
57 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
58 if (!self->dstream) {
58 if (!self->dstream) {
59 PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
59 PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
60 "call when context manager is active");
60 "call when context manager is active");
61 return NULL;
61 return NULL;
62 }
62 }
63
63
64 return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->dstream));
64 return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->dstream));
65 }
65 }
66
66
67 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
67 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
68 const char* source;
68 const char* source;
69 Py_ssize_t sourceSize;
69 Py_ssize_t sourceSize;
70 size_t zresult = 0;
70 size_t zresult = 0;
71 ZSTD_inBuffer input;
71 ZSTD_inBuffer input;
72 ZSTD_outBuffer output;
72 ZSTD_outBuffer output;
73 PyObject* res;
73 PyObject* res;
74 Py_ssize_t totalWrite = 0;
74
75
75 #if PY_MAJOR_VERSION >= 3
76 #if PY_MAJOR_VERSION >= 3
76 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
77 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
77 #else
78 #else
78 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
79 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
79 #endif
80 #endif
80 return NULL;
81 return NULL;
81 }
82 }
82
83
83 if (!self->entered) {
84 if (!self->entered) {
84 PyErr_SetString(ZstdError, "write must be called from an active context manager");
85 PyErr_SetString(ZstdError, "write must be called from an active context manager");
85 return NULL;
86 return NULL;
86 }
87 }
87
88
88 output.dst = PyMem_Malloc(self->outSize);
89 output.dst = PyMem_Malloc(self->outSize);
89 if (!output.dst) {
90 if (!output.dst) {
90 return PyErr_NoMemory();
91 return PyErr_NoMemory();
91 }
92 }
92 output.size = self->outSize;
93 output.size = self->outSize;
93 output.pos = 0;
94 output.pos = 0;
94
95
95 input.src = source;
96 input.src = source;
96 input.size = sourceSize;
97 input.size = sourceSize;
97 input.pos = 0;
98 input.pos = 0;
98
99
99 while ((ssize_t)input.pos < sourceSize) {
100 while ((ssize_t)input.pos < sourceSize) {
100 Py_BEGIN_ALLOW_THREADS
101 Py_BEGIN_ALLOW_THREADS
101 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
102 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
102 Py_END_ALLOW_THREADS
103 Py_END_ALLOW_THREADS
103
104
104 if (ZSTD_isError(zresult)) {
105 if (ZSTD_isError(zresult)) {
105 PyMem_Free(output.dst);
106 PyMem_Free(output.dst);
106 PyErr_Format(ZstdError, "zstd decompress error: %s",
107 PyErr_Format(ZstdError, "zstd decompress error: %s",
107 ZSTD_getErrorName(zresult));
108 ZSTD_getErrorName(zresult));
108 return NULL;
109 return NULL;
109 }
110 }
110
111
111 if (output.pos) {
112 if (output.pos) {
112 #if PY_MAJOR_VERSION >= 3
113 #if PY_MAJOR_VERSION >= 3
113 res = PyObject_CallMethod(self->writer, "write", "y#",
114 res = PyObject_CallMethod(self->writer, "write", "y#",
114 #else
115 #else
115 res = PyObject_CallMethod(self->writer, "write", "s#",
116 res = PyObject_CallMethod(self->writer, "write", "s#",
116 #endif
117 #endif
117 output.dst, output.pos);
118 output.dst, output.pos);
118 Py_XDECREF(res);
119 Py_XDECREF(res);
120 totalWrite += output.pos;
119 output.pos = 0;
121 output.pos = 0;
120 }
122 }
121 }
123 }
122
124
123 PyMem_Free(output.dst);
125 PyMem_Free(output.dst);
124
126
125 /* TODO return bytes written */
127 return PyLong_FromSsize_t(totalWrite);
126 Py_RETURN_NONE;
128 }
127 }
128
129
129 static PyMethodDef ZstdDecompressionWriter_methods[] = {
130 static PyMethodDef ZstdDecompressionWriter_methods[] = {
130 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
131 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
131 PyDoc_STR("Enter a decompression context.") },
132 PyDoc_STR("Enter a decompression context.") },
132 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
133 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
133 PyDoc_STR("Exit a decompression context.") },
134 PyDoc_STR("Exit a decompression context.") },
134 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
135 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
135 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
136 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
136 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS,
137 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS,
137 PyDoc_STR("Compress data") },
138 PyDoc_STR("Compress data") },
138 { NULL, NULL }
139 { NULL, NULL }
139 };
140 };
140
141
141 PyTypeObject ZstdDecompressionWriterType = {
142 PyTypeObject ZstdDecompressionWriterType = {
142 PyVarObject_HEAD_INIT(NULL, 0)
143 PyVarObject_HEAD_INIT(NULL, 0)
143 "zstd.ZstdDecompressionWriter", /* tp_name */
144 "zstd.ZstdDecompressionWriter", /* tp_name */
144 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
145 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
145 0, /* tp_itemsize */
146 0, /* tp_itemsize */
146 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
147 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
147 0, /* tp_print */
148 0, /* tp_print */
148 0, /* tp_getattr */
149 0, /* tp_getattr */
149 0, /* tp_setattr */
150 0, /* tp_setattr */
150 0, /* tp_compare */
151 0, /* tp_compare */
151 0, /* tp_repr */
152 0, /* tp_repr */
152 0, /* tp_as_number */
153 0, /* tp_as_number */
153 0, /* tp_as_sequence */
154 0, /* tp_as_sequence */
154 0, /* tp_as_mapping */
155 0, /* tp_as_mapping */
155 0, /* tp_hash */
156 0, /* tp_hash */
156 0, /* tp_call */
157 0, /* tp_call */
157 0, /* tp_str */
158 0, /* tp_str */
158 0, /* tp_getattro */
159 0, /* tp_getattro */
159 0, /* tp_setattro */
160 0, /* tp_setattro */
160 0, /* tp_as_buffer */
161 0, /* tp_as_buffer */
161 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
162 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
162 ZstdDecompressionWriter__doc, /* tp_doc */
163 ZstdDecompressionWriter__doc, /* tp_doc */
163 0, /* tp_traverse */
164 0, /* tp_traverse */
164 0, /* tp_clear */
165 0, /* tp_clear */
165 0, /* tp_richcompare */
166 0, /* tp_richcompare */
166 0, /* tp_weaklistoffset */
167 0, /* tp_weaklistoffset */
167 0, /* tp_iter */
168 0, /* tp_iter */
168 0, /* tp_iternext */
169 0, /* tp_iternext */
169 ZstdDecompressionWriter_methods,/* tp_methods */
170 ZstdDecompressionWriter_methods,/* tp_methods */
170 0, /* tp_members */
171 0, /* tp_members */
171 0, /* tp_getset */
172 0, /* tp_getset */
172 0, /* tp_base */
173 0, /* tp_base */
173 0, /* tp_dict */
174 0, /* tp_dict */
174 0, /* tp_descr_get */
175 0, /* tp_descr_get */
175 0, /* tp_descr_set */
176 0, /* tp_descr_set */
176 0, /* tp_dictoffset */
177 0, /* tp_dictoffset */
177 0, /* tp_init */
178 0, /* tp_init */
178 0, /* tp_alloc */
179 0, /* tp_alloc */
179 PyType_GenericNew, /* tp_new */
180 PyType_GenericNew, /* tp_new */
180 };
181 };
181
182
182 void decompressionwriter_module_init(PyObject* mod) {
183 void decompressionwriter_module_init(PyObject* mod) {
183 Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
184 Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
184 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
185 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
185 return;
186 return;
186 }
187 }
187 }
188 }
@@ -1,170 +1,170 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(DecompressionObj__doc__,
13 PyDoc_STRVAR(DecompressionObj__doc__,
14 "Perform decompression using a standard library compatible API.\n"
14 "Perform decompression using a standard library compatible API.\n"
15 );
15 );
16
16
17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
18 if (self->dstream) {
18 if (self->dstream) {
19 ZSTD_freeDStream(self->dstream);
19 ZSTD_freeDStream(self->dstream);
20 self->dstream = NULL;
20 self->dstream = NULL;
21 }
21 }
22
22
23 Py_XDECREF(self->decompressor);
23 Py_XDECREF(self->decompressor);
24
24
25 PyObject_Del(self);
25 PyObject_Del(self);
26 }
26 }
27
27
28 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) {
28 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) {
29 const char* source;
29 const char* source;
30 Py_ssize_t sourceSize;
30 Py_ssize_t sourceSize;
31 size_t zresult;
31 size_t zresult;
32 ZSTD_inBuffer input;
32 ZSTD_inBuffer input;
33 ZSTD_outBuffer output;
33 ZSTD_outBuffer output;
34 size_t outSize = ZSTD_DStreamOutSize();
34 size_t outSize = ZSTD_DStreamOutSize();
35 PyObject* result = NULL;
35 PyObject* result = NULL;
36 Py_ssize_t resultSize = 0;
36 Py_ssize_t resultSize = 0;
37
37
38 if (self->finished) {
38 if (self->finished) {
39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
40 return NULL;
40 return NULL;
41 }
41 }
42
42
43 #if PY_MAJOR_VERSION >= 3
43 #if PY_MAJOR_VERSION >= 3
44 if (!PyArg_ParseTuple(args, "y#",
44 if (!PyArg_ParseTuple(args, "y#:decompress",
45 #else
45 #else
46 if (!PyArg_ParseTuple(args, "s#",
46 if (!PyArg_ParseTuple(args, "s#:decompress",
47 #endif
47 #endif
48 &source, &sourceSize)) {
48 &source, &sourceSize)) {
49 return NULL;
49 return NULL;
50 }
50 }
51
51
52 input.src = source;
52 input.src = source;
53 input.size = sourceSize;
53 input.size = sourceSize;
54 input.pos = 0;
54 input.pos = 0;
55
55
56 output.dst = PyMem_Malloc(outSize);
56 output.dst = PyMem_Malloc(outSize);
57 if (!output.dst) {
57 if (!output.dst) {
58 PyErr_NoMemory();
58 PyErr_NoMemory();
59 return NULL;
59 return NULL;
60 }
60 }
61 output.size = outSize;
61 output.size = outSize;
62 output.pos = 0;
62 output.pos = 0;
63
63
64 /* Read input until exhausted. */
64 /* Read input until exhausted. */
65 while (input.pos < input.size) {
65 while (input.pos < input.size) {
66 Py_BEGIN_ALLOW_THREADS
66 Py_BEGIN_ALLOW_THREADS
67 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
67 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
68 Py_END_ALLOW_THREADS
68 Py_END_ALLOW_THREADS
69
69
70 if (ZSTD_isError(zresult)) {
70 if (ZSTD_isError(zresult)) {
71 PyErr_Format(ZstdError, "zstd decompressor error: %s",
71 PyErr_Format(ZstdError, "zstd decompressor error: %s",
72 ZSTD_getErrorName(zresult));
72 ZSTD_getErrorName(zresult));
73 result = NULL;
73 result = NULL;
74 goto finally;
74 goto finally;
75 }
75 }
76
76
77 if (0 == zresult) {
77 if (0 == zresult) {
78 self->finished = 1;
78 self->finished = 1;
79 }
79 }
80
80
81 if (output.pos) {
81 if (output.pos) {
82 if (result) {
82 if (result) {
83 resultSize = PyBytes_GET_SIZE(result);
83 resultSize = PyBytes_GET_SIZE(result);
84 if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) {
84 if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) {
85 goto except;
85 goto except;
86 }
86 }
87
87
88 memcpy(PyBytes_AS_STRING(result) + resultSize,
88 memcpy(PyBytes_AS_STRING(result) + resultSize,
89 output.dst, output.pos);
89 output.dst, output.pos);
90 }
90 }
91 else {
91 else {
92 result = PyBytes_FromStringAndSize(output.dst, output.pos);
92 result = PyBytes_FromStringAndSize(output.dst, output.pos);
93 if (!result) {
93 if (!result) {
94 goto except;
94 goto except;
95 }
95 }
96 }
96 }
97
97
98 output.pos = 0;
98 output.pos = 0;
99 }
99 }
100 }
100 }
101
101
102 if (!result) {
102 if (!result) {
103 result = PyBytes_FromString("");
103 result = PyBytes_FromString("");
104 }
104 }
105
105
106 goto finally;
106 goto finally;
107
107
108 except:
108 except:
109 Py_DecRef(result);
109 Py_DecRef(result);
110 result = NULL;
110 result = NULL;
111
111
112 finally:
112 finally:
113 PyMem_Free(output.dst);
113 PyMem_Free(output.dst);
114
114
115 return result;
115 return result;
116 }
116 }
117
117
118 static PyMethodDef DecompressionObj_methods[] = {
118 static PyMethodDef DecompressionObj_methods[] = {
119 { "decompress", (PyCFunction)DecompressionObj_decompress,
119 { "decompress", (PyCFunction)DecompressionObj_decompress,
120 METH_VARARGS, PyDoc_STR("decompress data") },
120 METH_VARARGS, PyDoc_STR("decompress data") },
121 { NULL, NULL }
121 { NULL, NULL }
122 };
122 };
123
123
124 PyTypeObject ZstdDecompressionObjType = {
124 PyTypeObject ZstdDecompressionObjType = {
125 PyVarObject_HEAD_INIT(NULL, 0)
125 PyVarObject_HEAD_INIT(NULL, 0)
126 "zstd.ZstdDecompressionObj", /* tp_name */
126 "zstd.ZstdDecompressionObj", /* tp_name */
127 sizeof(ZstdDecompressionObj), /* tp_basicsize */
127 sizeof(ZstdDecompressionObj), /* tp_basicsize */
128 0, /* tp_itemsize */
128 0, /* tp_itemsize */
129 (destructor)DecompressionObj_dealloc, /* tp_dealloc */
129 (destructor)DecompressionObj_dealloc, /* tp_dealloc */
130 0, /* tp_print */
130 0, /* tp_print */
131 0, /* tp_getattr */
131 0, /* tp_getattr */
132 0, /* tp_setattr */
132 0, /* tp_setattr */
133 0, /* tp_compare */
133 0, /* tp_compare */
134 0, /* tp_repr */
134 0, /* tp_repr */
135 0, /* tp_as_number */
135 0, /* tp_as_number */
136 0, /* tp_as_sequence */
136 0, /* tp_as_sequence */
137 0, /* tp_as_mapping */
137 0, /* tp_as_mapping */
138 0, /* tp_hash */
138 0, /* tp_hash */
139 0, /* tp_call */
139 0, /* tp_call */
140 0, /* tp_str */
140 0, /* tp_str */
141 0, /* tp_getattro */
141 0, /* tp_getattro */
142 0, /* tp_setattro */
142 0, /* tp_setattro */
143 0, /* tp_as_buffer */
143 0, /* tp_as_buffer */
144 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
144 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
145 DecompressionObj__doc__, /* tp_doc */
145 DecompressionObj__doc__, /* tp_doc */
146 0, /* tp_traverse */
146 0, /* tp_traverse */
147 0, /* tp_clear */
147 0, /* tp_clear */
148 0, /* tp_richcompare */
148 0, /* tp_richcompare */
149 0, /* tp_weaklistoffset */
149 0, /* tp_weaklistoffset */
150 0, /* tp_iter */
150 0, /* tp_iter */
151 0, /* tp_iternext */
151 0, /* tp_iternext */
152 DecompressionObj_methods, /* tp_methods */
152 DecompressionObj_methods, /* tp_methods */
153 0, /* tp_members */
153 0, /* tp_members */
154 0, /* tp_getset */
154 0, /* tp_getset */
155 0, /* tp_base */
155 0, /* tp_base */
156 0, /* tp_dict */
156 0, /* tp_dict */
157 0, /* tp_descr_get */
157 0, /* tp_descr_get */
158 0, /* tp_descr_set */
158 0, /* tp_descr_set */
159 0, /* tp_dictoffset */
159 0, /* tp_dictoffset */
160 0, /* tp_init */
160 0, /* tp_init */
161 0, /* tp_alloc */
161 0, /* tp_alloc */
162 PyType_GenericNew, /* tp_new */
162 PyType_GenericNew, /* tp_new */
163 };
163 };
164
164
165 void decompressobj_module_init(PyObject* module) {
165 void decompressobj_module_init(PyObject* module) {
166 Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
166 Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
167 if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
167 if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
168 return;
168 return;
169 }
169 }
170 }
170 }
@@ -1,672 +1,845 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) {
13 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) {
14 ZSTD_DStream* dstream;
14 ZSTD_DStream* dstream;
15 void* dictData = NULL;
15 void* dictData = NULL;
16 size_t dictSize = 0;
16 size_t dictSize = 0;
17 size_t zresult;
17 size_t zresult;
18
18
19 dstream = ZSTD_createDStream();
19 dstream = ZSTD_createDStream();
20 if (!dstream) {
20 if (!dstream) {
21 PyErr_SetString(ZstdError, "could not create DStream");
21 PyErr_SetString(ZstdError, "could not create DStream");
22 return NULL;
22 return NULL;
23 }
23 }
24
24
25 if (decompressor->dict) {
25 if (decompressor->dict) {
26 dictData = decompressor->dict->dictData;
26 dictData = decompressor->dict->dictData;
27 dictSize = decompressor->dict->dictSize;
27 dictSize = decompressor->dict->dictSize;
28 }
28 }
29
29
30 if (dictData) {
30 if (dictData) {
31 zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize);
31 zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize);
32 }
32 }
33 else {
33 else {
34 zresult = ZSTD_initDStream(dstream);
34 zresult = ZSTD_initDStream(dstream);
35 }
35 }
36
36
37 if (ZSTD_isError(zresult)) {
37 if (ZSTD_isError(zresult)) {
38 PyErr_Format(ZstdError, "could not initialize DStream: %s",
38 PyErr_Format(ZstdError, "could not initialize DStream: %s",
39 ZSTD_getErrorName(zresult));
39 ZSTD_getErrorName(zresult));
40 return NULL;
40 return NULL;
41 }
41 }
42
42
43 return dstream;
43 return dstream;
44 }
44 }
45
45
46 PyDoc_STRVAR(Decompressor__doc__,
46 PyDoc_STRVAR(Decompressor__doc__,
47 "ZstdDecompressor(dict_data=None)\n"
47 "ZstdDecompressor(dict_data=None)\n"
48 "\n"
48 "\n"
49 "Create an object used to perform Zstandard decompression.\n"
49 "Create an object used to perform Zstandard decompression.\n"
50 "\n"
50 "\n"
51 "An instance can perform multiple decompression operations."
51 "An instance can perform multiple decompression operations."
52 );
52 );
53
53
54 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
54 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
55 static char* kwlist[] = {
55 static char* kwlist[] = {
56 "dict_data",
56 "dict_data",
57 NULL
57 NULL
58 };
58 };
59
59
60 ZstdCompressionDict* dict = NULL;
60 ZstdCompressionDict* dict = NULL;
61
61
62 self->refdctx = NULL;
62 self->dctx = NULL;
63 self->dict = NULL;
63 self->dict = NULL;
64 self->ddict = NULL;
64 self->ddict = NULL;
65
65
66 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!", kwlist,
66 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
67 &ZstdCompressionDictType, &dict)) {
67 &ZstdCompressionDictType, &dict)) {
68 return -1;
68 return -1;
69 }
69 }
70
70
71 /* Instead of creating a ZSTD_DCtx for every decompression operation,
72 we create an instance at object creation time and recycle it via
73 ZSTD_copyDCTx() on each use. This means each use is a malloc+memcpy
74 instead of a malloc+init. */
75 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
71 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
76 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
72 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
77 self->refdctx = ZSTD_createDCtx();
73 self->dctx = ZSTD_createDCtx();
78 if (!self->refdctx) {
74 if (!self->dctx) {
79 PyErr_NoMemory();
75 PyErr_NoMemory();
80 goto except;
76 goto except;
81 }
77 }
82
78
83 if (dict) {
79 if (dict) {
84 self->dict = dict;
80 self->dict = dict;
85 Py_INCREF(dict);
81 Py_INCREF(dict);
86 }
82 }
87
83
88 return 0;
84 return 0;
89
85
90 except:
86 except:
91 if (self->refdctx) {
87 if (self->dctx) {
92 ZSTD_freeDCtx(self->refdctx);
88 ZSTD_freeDCtx(self->dctx);
93 self->refdctx = NULL;
89 self->dctx = NULL;
94 }
90 }
95
91
96 return -1;
92 return -1;
97 }
93 }
98
94
99 static void Decompressor_dealloc(ZstdDecompressor* self) {
95 static void Decompressor_dealloc(ZstdDecompressor* self) {
100 if (self->refdctx) {
96 if (self->dctx) {
101 ZSTD_freeDCtx(self->refdctx);
97 ZSTD_freeDCtx(self->dctx);
102 }
98 }
103
99
104 Py_XDECREF(self->dict);
100 Py_XDECREF(self->dict);
105
101
106 if (self->ddict) {
102 if (self->ddict) {
107 ZSTD_freeDDict(self->ddict);
103 ZSTD_freeDDict(self->ddict);
108 self->ddict = NULL;
104 self->ddict = NULL;
109 }
105 }
110
106
111 PyObject_Del(self);
107 PyObject_Del(self);
112 }
108 }
113
109
114 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
110 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
115 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
111 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
116 "\n"
112 "\n"
117 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
113 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
118 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
114 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
119 "``write(data)`` method.\n"
115 "``write(data)`` method.\n"
120 "\n"
116 "\n"
121 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
117 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
122 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
118 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
123 "to the default input and output sizes of zstd decompressor streams.\n"
119 "to the default input and output sizes of zstd decompressor streams.\n"
124 );
120 );
125
121
126 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
122 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
127 static char* kwlist[] = {
123 static char* kwlist[] = {
128 "ifh",
124 "ifh",
129 "ofh",
125 "ofh",
130 "read_size",
126 "read_size",
131 "write_size",
127 "write_size",
132 NULL
128 NULL
133 };
129 };
134
130
135 PyObject* source;
131 PyObject* source;
136 PyObject* dest;
132 PyObject* dest;
137 size_t inSize = ZSTD_DStreamInSize();
133 size_t inSize = ZSTD_DStreamInSize();
138 size_t outSize = ZSTD_DStreamOutSize();
134 size_t outSize = ZSTD_DStreamOutSize();
139 ZSTD_DStream* dstream;
135 ZSTD_DStream* dstream;
140 ZSTD_inBuffer input;
136 ZSTD_inBuffer input;
141 ZSTD_outBuffer output;
137 ZSTD_outBuffer output;
142 Py_ssize_t totalRead = 0;
138 Py_ssize_t totalRead = 0;
143 Py_ssize_t totalWrite = 0;
139 Py_ssize_t totalWrite = 0;
144 char* readBuffer;
140 char* readBuffer;
145 Py_ssize_t readSize;
141 Py_ssize_t readSize;
146 PyObject* readResult;
142 PyObject* readResult;
147 PyObject* res = NULL;
143 PyObject* res = NULL;
148 size_t zresult = 0;
144 size_t zresult = 0;
149 PyObject* writeResult;
145 PyObject* writeResult;
150 PyObject* totalReadPy;
146 PyObject* totalReadPy;
151 PyObject* totalWritePy;
147 PyObject* totalWritePy;
152
148
153 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk", kwlist, &source,
149 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
154 &dest, &inSize, &outSize)) {
150 &source, &dest, &inSize, &outSize)) {
155 return NULL;
151 return NULL;
156 }
152 }
157
153
158 if (!PyObject_HasAttrString(source, "read")) {
154 if (!PyObject_HasAttrString(source, "read")) {
159 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
155 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
160 return NULL;
156 return NULL;
161 }
157 }
162
158
163 if (!PyObject_HasAttrString(dest, "write")) {
159 if (!PyObject_HasAttrString(dest, "write")) {
164 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
160 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
165 return NULL;
161 return NULL;
166 }
162 }
167
163
168 /* Prevent free on uninitialized memory in finally. */
164 /* Prevent free on uninitialized memory in finally. */
169 output.dst = NULL;
165 output.dst = NULL;
170
166
171 dstream = DStream_from_ZstdDecompressor(self);
167 dstream = DStream_from_ZstdDecompressor(self);
172 if (!dstream) {
168 if (!dstream) {
173 res = NULL;
169 res = NULL;
174 goto finally;
170 goto finally;
175 }
171 }
176
172
177 output.dst = PyMem_Malloc(outSize);
173 output.dst = PyMem_Malloc(outSize);
178 if (!output.dst) {
174 if (!output.dst) {
179 PyErr_NoMemory();
175 PyErr_NoMemory();
180 res = NULL;
176 res = NULL;
181 goto finally;
177 goto finally;
182 }
178 }
183 output.size = outSize;
179 output.size = outSize;
184 output.pos = 0;
180 output.pos = 0;
185
181
186 /* Read source stream until EOF */
182 /* Read source stream until EOF */
187 while (1) {
183 while (1) {
188 readResult = PyObject_CallMethod(source, "read", "n", inSize);
184 readResult = PyObject_CallMethod(source, "read", "n", inSize);
189 if (!readResult) {
185 if (!readResult) {
190 PyErr_SetString(ZstdError, "could not read() from source");
186 PyErr_SetString(ZstdError, "could not read() from source");
191 goto finally;
187 goto finally;
192 }
188 }
193
189
194 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
190 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
195
191
196 /* If no data was read, we're at EOF. */
192 /* If no data was read, we're at EOF. */
197 if (0 == readSize) {
193 if (0 == readSize) {
198 break;
194 break;
199 }
195 }
200
196
201 totalRead += readSize;
197 totalRead += readSize;
202
198
203 /* Send data to decompressor */
199 /* Send data to decompressor */
204 input.src = readBuffer;
200 input.src = readBuffer;
205 input.size = readSize;
201 input.size = readSize;
206 input.pos = 0;
202 input.pos = 0;
207
203
208 while (input.pos < input.size) {
204 while (input.pos < input.size) {
209 Py_BEGIN_ALLOW_THREADS
205 Py_BEGIN_ALLOW_THREADS
210 zresult = ZSTD_decompressStream(dstream, &output, &input);
206 zresult = ZSTD_decompressStream(dstream, &output, &input);
211 Py_END_ALLOW_THREADS
207 Py_END_ALLOW_THREADS
212
208
213 if (ZSTD_isError(zresult)) {
209 if (ZSTD_isError(zresult)) {
214 PyErr_Format(ZstdError, "zstd decompressor error: %s",
210 PyErr_Format(ZstdError, "zstd decompressor error: %s",
215 ZSTD_getErrorName(zresult));
211 ZSTD_getErrorName(zresult));
216 res = NULL;
212 res = NULL;
217 goto finally;
213 goto finally;
218 }
214 }
219
215
220 if (output.pos) {
216 if (output.pos) {
221 #if PY_MAJOR_VERSION >= 3
217 #if PY_MAJOR_VERSION >= 3
222 writeResult = PyObject_CallMethod(dest, "write", "y#",
218 writeResult = PyObject_CallMethod(dest, "write", "y#",
223 #else
219 #else
224 writeResult = PyObject_CallMethod(dest, "write", "s#",
220 writeResult = PyObject_CallMethod(dest, "write", "s#",
225 #endif
221 #endif
226 output.dst, output.pos);
222 output.dst, output.pos);
227
223
228 Py_XDECREF(writeResult);
224 Py_XDECREF(writeResult);
229 totalWrite += output.pos;
225 totalWrite += output.pos;
230 output.pos = 0;
226 output.pos = 0;
231 }
227 }
232 }
228 }
233 }
229 }
234
230
235 /* Source stream is exhausted. Finish up. */
231 /* Source stream is exhausted. Finish up. */
236
232
237 ZSTD_freeDStream(dstream);
233 ZSTD_freeDStream(dstream);
238 dstream = NULL;
234 dstream = NULL;
239
235
240 totalReadPy = PyLong_FromSsize_t(totalRead);
236 totalReadPy = PyLong_FromSsize_t(totalRead);
241 totalWritePy = PyLong_FromSsize_t(totalWrite);
237 totalWritePy = PyLong_FromSsize_t(totalWrite);
242 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
238 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
243 Py_DecRef(totalReadPy);
239 Py_DecRef(totalReadPy);
244 Py_DecRef(totalWritePy);
240 Py_DecRef(totalWritePy);
245
241
246 finally:
242 finally:
247 if (output.dst) {
243 if (output.dst) {
248 PyMem_Free(output.dst);
244 PyMem_Free(output.dst);
249 }
245 }
250
246
251 if (dstream) {
247 if (dstream) {
252 ZSTD_freeDStream(dstream);
248 ZSTD_freeDStream(dstream);
253 }
249 }
254
250
255 return res;
251 return res;
256 }
252 }
257
253
258 PyDoc_STRVAR(Decompressor_decompress__doc__,
254 PyDoc_STRVAR(Decompressor_decompress__doc__,
259 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
255 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
260 "\n"
256 "\n"
261 "This method will decompress the entirety of the argument and return the\n"
257 "This method will decompress the entirety of the argument and return the\n"
262 "result.\n"
258 "result.\n"
263 "\n"
259 "\n"
264 "The input bytes are expected to contain a full Zstandard frame (something\n"
260 "The input bytes are expected to contain a full Zstandard frame (something\n"
265 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
261 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
266 "not contain a full frame, an exception will be raised.\n"
262 "not contain a full frame, an exception will be raised.\n"
267 "\n"
263 "\n"
268 "If the frame header of the compressed data does not contain the content size\n"
264 "If the frame header of the compressed data does not contain the content size\n"
269 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
265 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
270 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
266 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
271 "be made to perform decompression into that buffer. If the buffer is too\n"
267 "be made to perform decompression into that buffer. If the buffer is too\n"
272 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
268 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
273 "be resized if it is too large.\n"
269 "be resized if it is too large.\n"
274 "\n"
270 "\n"
275 "Uncompressed data could be much larger than compressed data. As a result,\n"
271 "Uncompressed data could be much larger than compressed data. As a result,\n"
276 "calling this function could result in a very large memory allocation being\n"
272 "calling this function could result in a very large memory allocation being\n"
277 "performed to hold the uncompressed data. Therefore it is **highly**\n"
273 "performed to hold the uncompressed data. Therefore it is **highly**\n"
278 "recommended to use a streaming decompression method instead of this one.\n"
274 "recommended to use a streaming decompression method instead of this one.\n"
279 );
275 );
280
276
281 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
277 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
282 static char* kwlist[] = {
278 static char* kwlist[] = {
283 "data",
279 "data",
284 "max_output_size",
280 "max_output_size",
285 NULL
281 NULL
286 };
282 };
287
283
288 const char* source;
284 const char* source;
289 Py_ssize_t sourceSize;
285 Py_ssize_t sourceSize;
290 Py_ssize_t maxOutputSize = 0;
286 Py_ssize_t maxOutputSize = 0;
291 unsigned long long decompressedSize;
287 unsigned long long decompressedSize;
292 size_t destCapacity;
288 size_t destCapacity;
293 PyObject* result = NULL;
289 PyObject* result = NULL;
294 ZSTD_DCtx* dctx = NULL;
295 void* dictData = NULL;
290 void* dictData = NULL;
296 size_t dictSize = 0;
291 size_t dictSize = 0;
297 size_t zresult;
292 size_t zresult;
298
293
299 #if PY_MAJOR_VERSION >= 3
294 #if PY_MAJOR_VERSION >= 3
300 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n", kwlist,
295 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
301 #else
296 #else
302 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n", kwlist,
297 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
303 #endif
298 #endif
304 &source, &sourceSize, &maxOutputSize)) {
299 kwlist, &source, &sourceSize, &maxOutputSize)) {
305 return NULL;
300 return NULL;
306 }
301 }
307
302
308 dctx = PyMem_Malloc(ZSTD_sizeof_DCtx(self->refdctx));
309 if (!dctx) {
310 PyErr_NoMemory();
311 return NULL;
312 }
313
314 ZSTD_copyDCtx(dctx, self->refdctx);
315
316 if (self->dict) {
303 if (self->dict) {
317 dictData = self->dict->dictData;
304 dictData = self->dict->dictData;
318 dictSize = self->dict->dictSize;
305 dictSize = self->dict->dictSize;
319 }
306 }
320
307
321 if (dictData && !self->ddict) {
308 if (dictData && !self->ddict) {
322 Py_BEGIN_ALLOW_THREADS
309 Py_BEGIN_ALLOW_THREADS
323 self->ddict = ZSTD_createDDict(dictData, dictSize);
310 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
324 Py_END_ALLOW_THREADS
311 Py_END_ALLOW_THREADS
325
312
326 if (!self->ddict) {
313 if (!self->ddict) {
327 PyErr_SetString(ZstdError, "could not create decompression dict");
314 PyErr_SetString(ZstdError, "could not create decompression dict");
328 goto except;
315 return NULL;
329 }
316 }
330 }
317 }
331
318
332 decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
319 decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
333 /* 0 returned if content size not in the zstd frame header */
320 /* 0 returned if content size not in the zstd frame header */
334 if (0 == decompressedSize) {
321 if (0 == decompressedSize) {
335 if (0 == maxOutputSize) {
322 if (0 == maxOutputSize) {
336 PyErr_SetString(ZstdError, "input data invalid or missing content size "
323 PyErr_SetString(ZstdError, "input data invalid or missing content size "
337 "in frame header");
324 "in frame header");
338 goto except;
325 return NULL;
339 }
326 }
340 else {
327 else {
341 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
328 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
342 destCapacity = maxOutputSize;
329 destCapacity = maxOutputSize;
343 }
330 }
344 }
331 }
345 else {
332 else {
346 result = PyBytes_FromStringAndSize(NULL, decompressedSize);
333 result = PyBytes_FromStringAndSize(NULL, decompressedSize);
347 destCapacity = decompressedSize;
334 destCapacity = decompressedSize;
348 }
335 }
349
336
350 if (!result) {
337 if (!result) {
351 goto except;
338 return NULL;
352 }
339 }
353
340
354 Py_BEGIN_ALLOW_THREADS
341 Py_BEGIN_ALLOW_THREADS
355 if (self->ddict) {
342 if (self->ddict) {
356 zresult = ZSTD_decompress_usingDDict(dctx, PyBytes_AsString(result), destCapacity,
343 zresult = ZSTD_decompress_usingDDict(self->dctx,
344 PyBytes_AsString(result), destCapacity,
357 source, sourceSize, self->ddict);
345 source, sourceSize, self->ddict);
358 }
346 }
359 else {
347 else {
360 zresult = ZSTD_decompressDCtx(dctx, PyBytes_AsString(result), destCapacity, source, sourceSize);
348 zresult = ZSTD_decompressDCtx(self->dctx,
349 PyBytes_AsString(result), destCapacity, source, sourceSize);
361 }
350 }
362 Py_END_ALLOW_THREADS
351 Py_END_ALLOW_THREADS
363
352
364 if (ZSTD_isError(zresult)) {
353 if (ZSTD_isError(zresult)) {
365 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
354 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
366 goto except;
355 Py_DecRef(result);
356 return NULL;
367 }
357 }
368 else if (decompressedSize && zresult != decompressedSize) {
358 else if (decompressedSize && zresult != decompressedSize) {
369 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
359 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
370 zresult, decompressedSize);
360 zresult, decompressedSize);
371 goto except;
361 Py_DecRef(result);
362 return NULL;
372 }
363 }
373 else if (zresult < destCapacity) {
364 else if (zresult < destCapacity) {
374 if (_PyBytes_Resize(&result, zresult)) {
365 if (_PyBytes_Resize(&result, zresult)) {
375 goto except;
366 Py_DecRef(result);
367 return NULL;
376 }
368 }
377 }
369 }
378
370
379 goto finally;
380
381 except:
382 Py_DecRef(result);
383 result = NULL;
384
385 finally:
386 if (dctx) {
387 PyMem_FREE(dctx);
388 }
389
390 return result;
371 return result;
391 }
372 }
392
373
393 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
374 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
394 "decompressobj()\n"
375 "decompressobj()\n"
395 "\n"
376 "\n"
396 "Incrementally feed data into a decompressor.\n"
377 "Incrementally feed data into a decompressor.\n"
397 "\n"
378 "\n"
398 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
379 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
399 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
380 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
400 "callers can swap in the zstd decompressor while using the same API.\n"
381 "callers can swap in the zstd decompressor while using the same API.\n"
401 );
382 );
402
383
403 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
384 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
404 ZstdDecompressionObj* result = PyObject_New(ZstdDecompressionObj, &ZstdDecompressionObjType);
385 ZstdDecompressionObj* result = PyObject_New(ZstdDecompressionObj, &ZstdDecompressionObjType);
405 if (!result) {
386 if (!result) {
406 return NULL;
387 return NULL;
407 }
388 }
408
389
409 result->dstream = DStream_from_ZstdDecompressor(self);
390 result->dstream = DStream_from_ZstdDecompressor(self);
410 if (!result->dstream) {
391 if (!result->dstream) {
411 Py_DecRef((PyObject*)result);
392 Py_DecRef((PyObject*)result);
412 return NULL;
393 return NULL;
413 }
394 }
414
395
415 result->decompressor = self;
396 result->decompressor = self;
416 Py_INCREF(result->decompressor);
397 Py_INCREF(result->decompressor);
417
398
418 result->finished = 0;
399 result->finished = 0;
419
400
420 return result;
401 return result;
421 }
402 }
422
403
423 PyDoc_STRVAR(Decompressor_read_from__doc__,
404 PyDoc_STRVAR(Decompressor_read_from__doc__,
424 "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
405 "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
425 "Read compressed data and return an iterator\n"
406 "Read compressed data and return an iterator\n"
426 "\n"
407 "\n"
427 "Returns an iterator of decompressed data chunks produced from reading from\n"
408 "Returns an iterator of decompressed data chunks produced from reading from\n"
428 "the ``reader``.\n"
409 "the ``reader``.\n"
429 "\n"
410 "\n"
430 "Compressed data will be obtained from ``reader`` by calling the\n"
411 "Compressed data will be obtained from ``reader`` by calling the\n"
431 "``read(size)`` method of it. The source data will be streamed into a\n"
412 "``read(size)`` method of it. The source data will be streamed into a\n"
432 "decompressor. As decompressed data is available, it will be exposed to the\n"
413 "decompressor. As decompressed data is available, it will be exposed to the\n"
433 "returned iterator.\n"
414 "returned iterator.\n"
434 "\n"
415 "\n"
435 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
416 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
436 "iterator in chunks of size ``write_size``. The default values are the input\n"
417 "iterator in chunks of size ``write_size``. The default values are the input\n"
437 "and output sizes for a zstd streaming decompressor.\n"
418 "and output sizes for a zstd streaming decompressor.\n"
438 "\n"
419 "\n"
439 "There is also support for skipping the first ``skip_bytes`` of data from\n"
420 "There is also support for skipping the first ``skip_bytes`` of data from\n"
440 "the source.\n"
421 "the source.\n"
441 );
422 );
442
423
443 static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
424 static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
444 static char* kwlist[] = {
425 static char* kwlist[] = {
445 "reader",
426 "reader",
446 "read_size",
427 "read_size",
447 "write_size",
428 "write_size",
448 "skip_bytes",
429 "skip_bytes",
449 NULL
430 NULL
450 };
431 };
451
432
452 PyObject* reader;
433 PyObject* reader;
453 size_t inSize = ZSTD_DStreamInSize();
434 size_t inSize = ZSTD_DStreamInSize();
454 size_t outSize = ZSTD_DStreamOutSize();
435 size_t outSize = ZSTD_DStreamOutSize();
455 ZstdDecompressorIterator* result;
436 ZstdDecompressorIterator* result;
456 size_t skipBytes = 0;
437 size_t skipBytes = 0;
457
438
458 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk", kwlist, &reader,
439 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
459 &inSize, &outSize, &skipBytes)) {
440 &reader, &inSize, &outSize, &skipBytes)) {
460 return NULL;
441 return NULL;
461 }
442 }
462
443
463 if (skipBytes >= inSize) {
444 if (skipBytes >= inSize) {
464 PyErr_SetString(PyExc_ValueError,
445 PyErr_SetString(PyExc_ValueError,
465 "skip_bytes must be smaller than read_size");
446 "skip_bytes must be smaller than read_size");
466 return NULL;
447 return NULL;
467 }
448 }
468
449
469 result = PyObject_New(ZstdDecompressorIterator, &ZstdDecompressorIteratorType);
450 result = PyObject_New(ZstdDecompressorIterator, &ZstdDecompressorIteratorType);
470 if (!result) {
451 if (!result) {
471 return NULL;
452 return NULL;
472 }
453 }
473
454
474 result->decompressor = NULL;
455 result->decompressor = NULL;
475 result->reader = NULL;
456 result->reader = NULL;
476 result->buffer = NULL;
457 result->buffer = NULL;
477 result->dstream = NULL;
458 result->dstream = NULL;
478 result->input.src = NULL;
459 result->input.src = NULL;
479 result->output.dst = NULL;
460 result->output.dst = NULL;
480
461
481 if (PyObject_HasAttrString(reader, "read")) {
462 if (PyObject_HasAttrString(reader, "read")) {
482 result->reader = reader;
463 result->reader = reader;
483 Py_INCREF(result->reader);
464 Py_INCREF(result->reader);
484 }
465 }
485 else if (1 == PyObject_CheckBuffer(reader)) {
466 else if (1 == PyObject_CheckBuffer(reader)) {
486 /* Object claims it is a buffer. Try to get a handle to it. */
467 /* Object claims it is a buffer. Try to get a handle to it. */
487 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
468 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
488 if (!result->buffer) {
469 if (!result->buffer) {
489 goto except;
470 goto except;
490 }
471 }
491
472
492 memset(result->buffer, 0, sizeof(Py_buffer));
473 memset(result->buffer, 0, sizeof(Py_buffer));
493
474
494 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
475 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
495 goto except;
476 goto except;
496 }
477 }
497
478
498 result->bufferOffset = 0;
479 result->bufferOffset = 0;
499 }
480 }
500 else {
481 else {
501 PyErr_SetString(PyExc_ValueError,
482 PyErr_SetString(PyExc_ValueError,
502 "must pass an object with a read() method or conforms to buffer protocol");
483 "must pass an object with a read() method or conforms to buffer protocol");
503 goto except;
484 goto except;
504 }
485 }
505
486
506 result->decompressor = self;
487 result->decompressor = self;
507 Py_INCREF(result->decompressor);
488 Py_INCREF(result->decompressor);
508
489
509 result->inSize = inSize;
490 result->inSize = inSize;
510 result->outSize = outSize;
491 result->outSize = outSize;
511 result->skipBytes = skipBytes;
492 result->skipBytes = skipBytes;
512
493
513 result->dstream = DStream_from_ZstdDecompressor(self);
494 result->dstream = DStream_from_ZstdDecompressor(self);
514 if (!result->dstream) {
495 if (!result->dstream) {
515 goto except;
496 goto except;
516 }
497 }
517
498
518 result->input.src = PyMem_Malloc(inSize);
499 result->input.src = PyMem_Malloc(inSize);
519 if (!result->input.src) {
500 if (!result->input.src) {
520 PyErr_NoMemory();
501 PyErr_NoMemory();
521 goto except;
502 goto except;
522 }
503 }
523 result->input.size = 0;
504 result->input.size = 0;
524 result->input.pos = 0;
505 result->input.pos = 0;
525
506
526 result->output.dst = NULL;
507 result->output.dst = NULL;
527 result->output.size = 0;
508 result->output.size = 0;
528 result->output.pos = 0;
509 result->output.pos = 0;
529
510
530 result->readCount = 0;
511 result->readCount = 0;
531 result->finishedInput = 0;
512 result->finishedInput = 0;
532 result->finishedOutput = 0;
513 result->finishedOutput = 0;
533
514
534 goto finally;
515 goto finally;
535
516
536 except:
517 except:
537 if (result->reader) {
518 Py_CLEAR(result->reader);
538 Py_DECREF(result->reader);
539 result->reader = NULL;
540 }
541
519
542 if (result->buffer) {
520 if (result->buffer) {
543 PyBuffer_Release(result->buffer);
521 PyBuffer_Release(result->buffer);
544 Py_DECREF(result->buffer);
522 Py_CLEAR(result->buffer);
545 result->buffer = NULL;
546 }
523 }
547
524
548 Py_DECREF(result);
525 Py_CLEAR(result);
549 result = NULL;
550
526
551 finally:
527 finally:
552
528
553 return result;
529 return result;
554 }
530 }
555
531
556 PyDoc_STRVAR(Decompressor_write_to__doc__,
532 PyDoc_STRVAR(Decompressor_write_to__doc__,
557 "Create a context manager to write decompressed data to an object.\n"
533 "Create a context manager to write decompressed data to an object.\n"
558 "\n"
534 "\n"
559 "The passed object must have a ``write()`` method.\n"
535 "The passed object must have a ``write()`` method.\n"
560 "\n"
536 "\n"
561 "The caller feeds intput data to the object by calling ``write(data)``.\n"
537 "The caller feeds intput data to the object by calling ``write(data)``.\n"
562 "Decompressed data is written to the argument given as it is decompressed.\n"
538 "Decompressed data is written to the argument given as it is decompressed.\n"
563 "\n"
539 "\n"
564 "An optional ``write_size`` argument defines the size of chunks to\n"
540 "An optional ``write_size`` argument defines the size of chunks to\n"
565 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
541 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
566 "streaming decompressor.\n"
542 "streaming decompressor.\n"
567 );
543 );
568
544
569 static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
545 static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
570 static char* kwlist[] = {
546 static char* kwlist[] = {
571 "writer",
547 "writer",
572 "write_size",
548 "write_size",
573 NULL
549 NULL
574 };
550 };
575
551
576 PyObject* writer;
552 PyObject* writer;
577 size_t outSize = ZSTD_DStreamOutSize();
553 size_t outSize = ZSTD_DStreamOutSize();
578 ZstdDecompressionWriter* result;
554 ZstdDecompressionWriter* result;
579
555
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k", kwlist, &writer, &outSize)) {
556 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
557 &writer, &outSize)) {
581 return NULL;
558 return NULL;
582 }
559 }
583
560
584 if (!PyObject_HasAttrString(writer, "write")) {
561 if (!PyObject_HasAttrString(writer, "write")) {
585 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
562 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
586 return NULL;
563 return NULL;
587 }
564 }
588
565
589 result = PyObject_New(ZstdDecompressionWriter, &ZstdDecompressionWriterType);
566 result = PyObject_New(ZstdDecompressionWriter, &ZstdDecompressionWriterType);
590 if (!result) {
567 if (!result) {
591 return NULL;
568 return NULL;
592 }
569 }
593
570
594 result->decompressor = self;
571 result->decompressor = self;
595 Py_INCREF(result->decompressor);
572 Py_INCREF(result->decompressor);
596
573
597 result->writer = writer;
574 result->writer = writer;
598 Py_INCREF(result->writer);
575 Py_INCREF(result->writer);
599
576
600 result->outSize = outSize;
577 result->outSize = outSize;
601
578
602 result->entered = 0;
579 result->entered = 0;
603 result->dstream = NULL;
580 result->dstream = NULL;
604
581
605 return result;
582 return result;
606 }
583 }
607
584
585 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
586 "Decompress a series of chunks using the content dictionary chaining technique\n"
587 );
588
589 static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
590 static char* kwlist[] = {
591 "frames",
592 NULL
593 };
594
595 PyObject* chunks;
596 Py_ssize_t chunksLen;
597 Py_ssize_t chunkIndex;
598 char parity = 0;
599 PyObject* chunk;
600 char* chunkData;
601 Py_ssize_t chunkSize;
602 ZSTD_DCtx* dctx = NULL;
603 size_t zresult;
604 ZSTD_frameParams frameParams;
605 void* buffer1 = NULL;
606 size_t buffer1Size = 0;
607 size_t buffer1ContentSize = 0;
608 void* buffer2 = NULL;
609 size_t buffer2Size = 0;
610 size_t buffer2ContentSize = 0;
611 void* destBuffer = NULL;
612 PyObject* result = NULL;
613
614 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
615 kwlist, &PyList_Type, &chunks)) {
616 return NULL;
617 }
618
619 chunksLen = PyList_Size(chunks);
620 if (!chunksLen) {
621 PyErr_SetString(PyExc_ValueError, "empty input chain");
622 return NULL;
623 }
624
625 /* The first chunk should not be using a dictionary. We handle it specially. */
626 chunk = PyList_GetItem(chunks, 0);
627 if (!PyBytes_Check(chunk)) {
628 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
629 return NULL;
630 }
631
632 /* We require that all chunks be zstd frames and that they have content size set. */
633 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
634 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
635 if (ZSTD_isError(zresult)) {
636 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
637 return NULL;
638 }
639 else if (zresult) {
640 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
641 return NULL;
642 }
643
644 if (0 == frameParams.frameContentSize) {
645 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
646 return NULL;
647 }
648
649 dctx = ZSTD_createDCtx();
650 if (!dctx) {
651 PyErr_NoMemory();
652 goto finally;
653 }
654
655 buffer1Size = frameParams.frameContentSize;
656 buffer1 = PyMem_Malloc(buffer1Size);
657 if (!buffer1) {
658 goto finally;
659 }
660
661 Py_BEGIN_ALLOW_THREADS
662 zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
663 Py_END_ALLOW_THREADS
664 if (ZSTD_isError(zresult)) {
665 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
666 goto finally;
667 }
668
669 buffer1ContentSize = zresult;
670
671 /* Special case of a simple chain. */
672 if (1 == chunksLen) {
673 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
674 goto finally;
675 }
676
677 /* This should ideally look at next chunk. But this is slightly simpler. */
678 buffer2Size = frameParams.frameContentSize;
679 buffer2 = PyMem_Malloc(buffer2Size);
680 if (!buffer2) {
681 goto finally;
682 }
683
684 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
685 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
686 used as a content dictionary) and the other holds the new fulltext. The
687 buffers grow when needed but never decrease in size. This limits the
688 memory allocator overhead.
689 */
690 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
691 chunk = PyList_GetItem(chunks, chunkIndex);
692 if (!PyBytes_Check(chunk)) {
693 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
694 goto finally;
695 }
696
697 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
698 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
699 if (ZSTD_isError(zresult)) {
700 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
701 goto finally;
702 }
703 else if (zresult) {
704 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
705 goto finally;
706 }
707
708 if (0 == frameParams.frameContentSize) {
709 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
710 goto finally;
711 }
712
713 parity = chunkIndex % 2;
714
715 /* This could definitely be abstracted to reduce code duplication. */
716 if (parity) {
717 /* Resize destination buffer to hold larger content. */
718 if (buffer2Size < frameParams.frameContentSize) {
719 buffer2Size = frameParams.frameContentSize;
720 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
721 if (!destBuffer) {
722 goto finally;
723 }
724 buffer2 = destBuffer;
725 }
726
727 Py_BEGIN_ALLOW_THREADS
728 zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
729 chunkData, chunkSize, buffer1, buffer1ContentSize);
730 Py_END_ALLOW_THREADS
731 if (ZSTD_isError(zresult)) {
732 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
733 chunkIndex, ZSTD_getErrorName(zresult));
734 goto finally;
735 }
736 buffer2ContentSize = zresult;
737 }
738 else {
739 if (buffer1Size < frameParams.frameContentSize) {
740 buffer1Size = frameParams.frameContentSize;
741 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
742 if (!destBuffer) {
743 goto finally;
744 }
745 buffer1 = destBuffer;
746 }
747
748 Py_BEGIN_ALLOW_THREADS
749 zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
750 chunkData, chunkSize, buffer2, buffer2ContentSize);
751 Py_END_ALLOW_THREADS
752 if (ZSTD_isError(zresult)) {
753 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
754 chunkIndex, ZSTD_getErrorName(zresult));
755 goto finally;
756 }
757 buffer1ContentSize = zresult;
758 }
759 }
760
761 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
762 parity ? buffer2ContentSize : buffer1ContentSize);
763
764 finally:
765 if (buffer2) {
766 PyMem_Free(buffer2);
767 }
768 if (buffer1) {
769 PyMem_Free(buffer1);
770 }
771
772 if (dctx) {
773 ZSTD_freeDCtx(dctx);
774 }
775
776 return result;
777 }
778
608 static PyMethodDef Decompressor_methods[] = {
779 static PyMethodDef Decompressor_methods[] = {
609 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
780 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
610 Decompressor_copy_stream__doc__ },
781 Decompressor_copy_stream__doc__ },
611 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
782 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
612 Decompressor_decompress__doc__ },
783 Decompressor_decompress__doc__ },
613 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
784 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
614 Decompressor_decompressobj__doc__ },
785 Decompressor_decompressobj__doc__ },
615 { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
786 { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
616 Decompressor_read_from__doc__ },
787 Decompressor_read_from__doc__ },
617 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
788 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
618 Decompressor_write_to__doc__ },
789 Decompressor_write_to__doc__ },
790 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
791 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
619 { NULL, NULL }
792 { NULL, NULL }
620 };
793 };
621
794
622 PyTypeObject ZstdDecompressorType = {
795 PyTypeObject ZstdDecompressorType = {
623 PyVarObject_HEAD_INIT(NULL, 0)
796 PyVarObject_HEAD_INIT(NULL, 0)
624 "zstd.ZstdDecompressor", /* tp_name */
797 "zstd.ZstdDecompressor", /* tp_name */
625 sizeof(ZstdDecompressor), /* tp_basicsize */
798 sizeof(ZstdDecompressor), /* tp_basicsize */
626 0, /* tp_itemsize */
799 0, /* tp_itemsize */
627 (destructor)Decompressor_dealloc, /* tp_dealloc */
800 (destructor)Decompressor_dealloc, /* tp_dealloc */
628 0, /* tp_print */
801 0, /* tp_print */
629 0, /* tp_getattr */
802 0, /* tp_getattr */
630 0, /* tp_setattr */
803 0, /* tp_setattr */
631 0, /* tp_compare */
804 0, /* tp_compare */
632 0, /* tp_repr */
805 0, /* tp_repr */
633 0, /* tp_as_number */
806 0, /* tp_as_number */
634 0, /* tp_as_sequence */
807 0, /* tp_as_sequence */
635 0, /* tp_as_mapping */
808 0, /* tp_as_mapping */
636 0, /* tp_hash */
809 0, /* tp_hash */
637 0, /* tp_call */
810 0, /* tp_call */
638 0, /* tp_str */
811 0, /* tp_str */
639 0, /* tp_getattro */
812 0, /* tp_getattro */
640 0, /* tp_setattro */
813 0, /* tp_setattro */
641 0, /* tp_as_buffer */
814 0, /* tp_as_buffer */
642 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
815 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
643 Decompressor__doc__, /* tp_doc */
816 Decompressor__doc__, /* tp_doc */
644 0, /* tp_traverse */
817 0, /* tp_traverse */
645 0, /* tp_clear */
818 0, /* tp_clear */
646 0, /* tp_richcompare */
819 0, /* tp_richcompare */
647 0, /* tp_weaklistoffset */
820 0, /* tp_weaklistoffset */
648 0, /* tp_iter */
821 0, /* tp_iter */
649 0, /* tp_iternext */
822 0, /* tp_iternext */
650 Decompressor_methods, /* tp_methods */
823 Decompressor_methods, /* tp_methods */
651 0, /* tp_members */
824 0, /* tp_members */
652 0, /* tp_getset */
825 0, /* tp_getset */
653 0, /* tp_base */
826 0, /* tp_base */
654 0, /* tp_dict */
827 0, /* tp_dict */
655 0, /* tp_descr_get */
828 0, /* tp_descr_get */
656 0, /* tp_descr_set */
829 0, /* tp_descr_set */
657 0, /* tp_dictoffset */
830 0, /* tp_dictoffset */
658 (initproc)Decompressor_init, /* tp_init */
831 (initproc)Decompressor_init, /* tp_init */
659 0, /* tp_alloc */
832 0, /* tp_alloc */
660 PyType_GenericNew, /* tp_new */
833 PyType_GenericNew, /* tp_new */
661 };
834 };
662
835
663 void decompressor_module_init(PyObject* mod) {
836 void decompressor_module_init(PyObject* mod) {
664 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
837 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
665 if (PyType_Ready(&ZstdDecompressorType) < 0) {
838 if (PyType_Ready(&ZstdDecompressorType) < 0) {
666 return;
839 return;
667 }
840 }
668
841
669 Py_INCREF((PyObject*)&ZstdDecompressorType);
842 Py_INCREF((PyObject*)&ZstdDecompressorType);
670 PyModule_AddObject(mod, "ZstdDecompressor",
843 PyModule_AddObject(mod, "ZstdDecompressor",
671 (PyObject*)&ZstdDecompressorType);
844 (PyObject*)&ZstdDecompressorType);
672 }
845 }
@@ -1,125 +1,141 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 PyDoc_STRVAR(DictParameters__doc__,
11 PyDoc_STRVAR(DictParameters__doc__,
12 "DictParameters: low-level control over dictionary generation");
12 "DictParameters: low-level control over dictionary generation");
13
13
14 static PyObject* DictParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) {
14 static PyObject* DictParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) {
15 DictParametersObject* self;
15 DictParametersObject* self;
16 unsigned selectivityLevel;
16 unsigned selectivityLevel;
17 int compressionLevel;
17 int compressionLevel;
18 unsigned notificationLevel;
18 unsigned notificationLevel;
19 unsigned dictID;
19 unsigned dictID;
20
20
21 if (!PyArg_ParseTuple(args, "IiII", &selectivityLevel, &compressionLevel,
21 if (!PyArg_ParseTuple(args, "IiII:DictParameters",
22 &notificationLevel, &dictID)) {
22 &selectivityLevel, &compressionLevel, &notificationLevel, &dictID)) {
23 return NULL;
23 return NULL;
24 }
24 }
25
25
26 self = (DictParametersObject*)subtype->tp_alloc(subtype, 1);
26 self = (DictParametersObject*)subtype->tp_alloc(subtype, 1);
27 if (!self) {
27 if (!self) {
28 return NULL;
28 return NULL;
29 }
29 }
30
30
31 self->selectivityLevel = selectivityLevel;
31 self->selectivityLevel = selectivityLevel;
32 self->compressionLevel = compressionLevel;
32 self->compressionLevel = compressionLevel;
33 self->notificationLevel = notificationLevel;
33 self->notificationLevel = notificationLevel;
34 self->dictID = dictID;
34 self->dictID = dictID;
35
35
36 return (PyObject*)self;
36 return (PyObject*)self;
37 }
37 }
38
38
39 static void DictParameters_dealloc(PyObject* self) {
39 static void DictParameters_dealloc(PyObject* self) {
40 PyObject_Del(self);
40 PyObject_Del(self);
41 }
41 }
42
42
43 static PyMemberDef DictParameters_members[] = {
44 { "selectivity_level", T_UINT,
45 offsetof(DictParametersObject, selectivityLevel), READONLY,
46 "selectivity level" },
47 { "compression_level", T_INT,
48 offsetof(DictParametersObject, compressionLevel), READONLY,
49 "compression level" },
50 { "notification_level", T_UINT,
51 offsetof(DictParametersObject, notificationLevel), READONLY,
52 "notification level" },
53 { "dict_id", T_UINT,
54 offsetof(DictParametersObject, dictID), READONLY,
55 "dictionary ID" },
56 { NULL }
57 };
58
43 static Py_ssize_t DictParameters_length(PyObject* self) {
59 static Py_ssize_t DictParameters_length(PyObject* self) {
44 return 4;
60 return 4;
45 }
61 }
46
62
47 static PyObject* DictParameters_item(PyObject* o, Py_ssize_t i) {
63 static PyObject* DictParameters_item(PyObject* o, Py_ssize_t i) {
48 DictParametersObject* self = (DictParametersObject*)o;
64 DictParametersObject* self = (DictParametersObject*)o;
49
65
50 switch (i) {
66 switch (i) {
51 case 0:
67 case 0:
52 return PyLong_FromLong(self->selectivityLevel);
68 return PyLong_FromLong(self->selectivityLevel);
53 case 1:
69 case 1:
54 return PyLong_FromLong(self->compressionLevel);
70 return PyLong_FromLong(self->compressionLevel);
55 case 2:
71 case 2:
56 return PyLong_FromLong(self->notificationLevel);
72 return PyLong_FromLong(self->notificationLevel);
57 case 3:
73 case 3:
58 return PyLong_FromLong(self->dictID);
74 return PyLong_FromLong(self->dictID);
59 default:
75 default:
60 PyErr_SetString(PyExc_IndexError, "index out of range");
76 PyErr_SetString(PyExc_IndexError, "index out of range");
61 return NULL;
77 return NULL;
62 }
78 }
63 }
79 }
64
80
65 static PySequenceMethods DictParameters_sq = {
81 static PySequenceMethods DictParameters_sq = {
66 DictParameters_length, /* sq_length */
82 DictParameters_length, /* sq_length */
67 0, /* sq_concat */
83 0, /* sq_concat */
68 0, /* sq_repeat */
84 0, /* sq_repeat */
69 DictParameters_item, /* sq_item */
85 DictParameters_item, /* sq_item */
70 0, /* sq_ass_item */
86 0, /* sq_ass_item */
71 0, /* sq_contains */
87 0, /* sq_contains */
72 0, /* sq_inplace_concat */
88 0, /* sq_inplace_concat */
73 0 /* sq_inplace_repeat */
89 0 /* sq_inplace_repeat */
74 };
90 };
75
91
76 PyTypeObject DictParametersType = {
92 PyTypeObject DictParametersType = {
77 PyVarObject_HEAD_INIT(NULL, 0)
93 PyVarObject_HEAD_INIT(NULL, 0)
78 "DictParameters", /* tp_name */
94 "DictParameters", /* tp_name */
79 sizeof(DictParametersObject), /* tp_basicsize */
95 sizeof(DictParametersObject), /* tp_basicsize */
80 0, /* tp_itemsize */
96 0, /* tp_itemsize */
81 (destructor)DictParameters_dealloc, /* tp_dealloc */
97 (destructor)DictParameters_dealloc, /* tp_dealloc */
82 0, /* tp_print */
98 0, /* tp_print */
83 0, /* tp_getattr */
99 0, /* tp_getattr */
84 0, /* tp_setattr */
100 0, /* tp_setattr */
85 0, /* tp_compare */
101 0, /* tp_compare */
86 0, /* tp_repr */
102 0, /* tp_repr */
87 0, /* tp_as_number */
103 0, /* tp_as_number */
88 &DictParameters_sq, /* tp_as_sequence */
104 &DictParameters_sq, /* tp_as_sequence */
89 0, /* tp_as_mapping */
105 0, /* tp_as_mapping */
90 0, /* tp_hash */
106 0, /* tp_hash */
91 0, /* tp_call */
107 0, /* tp_call */
92 0, /* tp_str */
108 0, /* tp_str */
93 0, /* tp_getattro */
109 0, /* tp_getattro */
94 0, /* tp_setattro */
110 0, /* tp_setattro */
95 0, /* tp_as_buffer */
111 0, /* tp_as_buffer */
96 Py_TPFLAGS_DEFAULT, /* tp_flags */
112 Py_TPFLAGS_DEFAULT, /* tp_flags */
97 DictParameters__doc__, /* tp_doc */
113 DictParameters__doc__, /* tp_doc */
98 0, /* tp_traverse */
114 0, /* tp_traverse */
99 0, /* tp_clear */
115 0, /* tp_clear */
100 0, /* tp_richcompare */
116 0, /* tp_richcompare */
101 0, /* tp_weaklistoffset */
117 0, /* tp_weaklistoffset */
102 0, /* tp_iter */
118 0, /* tp_iter */
103 0, /* tp_iternext */
119 0, /* tp_iternext */
104 0, /* tp_methods */
120 0, /* tp_methods */
105 0, /* tp_members */
121 DictParameters_members, /* tp_members */
106 0, /* tp_getset */
122 0, /* tp_getset */
107 0, /* tp_base */
123 0, /* tp_base */
108 0, /* tp_dict */
124 0, /* tp_dict */
109 0, /* tp_descr_get */
125 0, /* tp_descr_get */
110 0, /* tp_descr_set */
126 0, /* tp_descr_set */
111 0, /* tp_dictoffset */
127 0, /* tp_dictoffset */
112 0, /* tp_init */
128 0, /* tp_init */
113 0, /* tp_alloc */
129 0, /* tp_alloc */
114 DictParameters_new, /* tp_new */
130 DictParameters_new, /* tp_new */
115 };
131 };
116
132
117 void dictparams_module_init(PyObject* mod) {
133 void dictparams_module_init(PyObject* mod) {
118 Py_TYPE(&DictParametersType) = &PyType_Type;
134 Py_TYPE(&DictParametersType) = &PyType_Type;
119 if (PyType_Ready(&DictParametersType) < 0) {
135 if (PyType_Ready(&DictParametersType) < 0) {
120 return;
136 return;
121 }
137 }
122
138
123 Py_IncRef((PyObject*)&DictParametersType);
139 Py_IncRef((PyObject*)&DictParametersType);
124 PyModule_AddObject(mod, "DictParameters", (PyObject*)&DictParametersType);
140 PyModule_AddObject(mod, "DictParameters", (PyObject*)&DictParametersType);
125 }
141 }
@@ -1,178 +1,190 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #define PY_SSIZE_T_CLEAN
9 #define PY_SSIZE_T_CLEAN
10 #include <Python.h>
10 #include <Python.h>
11 #include "structmember.h"
11
12
12 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZDICT_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
14 #include "mem.h"
15 #include "mem.h"
15 #include "zstd.h"
16 #include "zstd.h"
16 #include "zdict.h"
17 #include "zdict.h"
17
18
18 #define PYTHON_ZSTANDARD_VERSION "0.6.0"
19 #define PYTHON_ZSTANDARD_VERSION "0.7.0"
19
20
20 typedef enum {
21 typedef enum {
21 compressorobj_flush_finish,
22 compressorobj_flush_finish,
22 compressorobj_flush_block,
23 compressorobj_flush_block,
23 } CompressorObj_Flush;
24 } CompressorObj_Flush;
24
25
25 typedef struct {
26 typedef struct {
26 PyObject_HEAD
27 PyObject_HEAD
27 unsigned windowLog;
28 unsigned windowLog;
28 unsigned chainLog;
29 unsigned chainLog;
29 unsigned hashLog;
30 unsigned hashLog;
30 unsigned searchLog;
31 unsigned searchLog;
31 unsigned searchLength;
32 unsigned searchLength;
32 unsigned targetLength;
33 unsigned targetLength;
33 ZSTD_strategy strategy;
34 ZSTD_strategy strategy;
34 } CompressionParametersObject;
35 } CompressionParametersObject;
35
36
36 extern PyTypeObject CompressionParametersType;
37 extern PyTypeObject CompressionParametersType;
37
38
38 typedef struct {
39 typedef struct {
39 PyObject_HEAD
40 PyObject_HEAD
41 unsigned long long frameContentSize;
42 unsigned windowSize;
43 unsigned dictID;
44 char checksumFlag;
45 } FrameParametersObject;
46
47 extern PyTypeObject FrameParametersType;
48
49 typedef struct {
50 PyObject_HEAD
40 unsigned selectivityLevel;
51 unsigned selectivityLevel;
41 int compressionLevel;
52 int compressionLevel;
42 unsigned notificationLevel;
53 unsigned notificationLevel;
43 unsigned dictID;
54 unsigned dictID;
44 } DictParametersObject;
55 } DictParametersObject;
45
56
46 extern PyTypeObject DictParametersType;
57 extern PyTypeObject DictParametersType;
47
58
48 typedef struct {
59 typedef struct {
49 PyObject_HEAD
60 PyObject_HEAD
50
61
51 void* dictData;
62 void* dictData;
52 size_t dictSize;
63 size_t dictSize;
53 } ZstdCompressionDict;
64 } ZstdCompressionDict;
54
65
55 extern PyTypeObject ZstdCompressionDictType;
66 extern PyTypeObject ZstdCompressionDictType;
56
67
57 typedef struct {
68 typedef struct {
58 PyObject_HEAD
69 PyObject_HEAD
59
70
60 int compressionLevel;
71 int compressionLevel;
61 ZstdCompressionDict* dict;
72 ZstdCompressionDict* dict;
62 ZSTD_CCtx* cctx;
73 ZSTD_CCtx* cctx;
63 ZSTD_CDict* cdict;
74 ZSTD_CDict* cdict;
64 CompressionParametersObject* cparams;
75 CompressionParametersObject* cparams;
65 ZSTD_frameParameters fparams;
76 ZSTD_frameParameters fparams;
66 } ZstdCompressor;
77 } ZstdCompressor;
67
78
68 extern PyTypeObject ZstdCompressorType;
79 extern PyTypeObject ZstdCompressorType;
69
80
70 typedef struct {
81 typedef struct {
71 PyObject_HEAD
82 PyObject_HEAD
72
83
73 ZstdCompressor* compressor;
84 ZstdCompressor* compressor;
74 ZSTD_CStream* cstream;
85 ZSTD_CStream* cstream;
75 ZSTD_outBuffer output;
86 ZSTD_outBuffer output;
76 int finished;
87 int finished;
77 } ZstdCompressionObj;
88 } ZstdCompressionObj;
78
89
79 extern PyTypeObject ZstdCompressionObjType;
90 extern PyTypeObject ZstdCompressionObjType;
80
91
81 typedef struct {
92 typedef struct {
82 PyObject_HEAD
93 PyObject_HEAD
83
94
84 ZstdCompressor* compressor;
95 ZstdCompressor* compressor;
85 PyObject* writer;
96 PyObject* writer;
86 Py_ssize_t sourceSize;
97 Py_ssize_t sourceSize;
87 size_t outSize;
98 size_t outSize;
88 ZSTD_CStream* cstream;
99 ZSTD_CStream* cstream;
89 int entered;
100 int entered;
90 } ZstdCompressionWriter;
101 } ZstdCompressionWriter;
91
102
92 extern PyTypeObject ZstdCompressionWriterType;
103 extern PyTypeObject ZstdCompressionWriterType;
93
104
94 typedef struct {
105 typedef struct {
95 PyObject_HEAD
106 PyObject_HEAD
96
107
97 ZstdCompressor* compressor;
108 ZstdCompressor* compressor;
98 PyObject* reader;
109 PyObject* reader;
99 Py_buffer* buffer;
110 Py_buffer* buffer;
100 Py_ssize_t bufferOffset;
111 Py_ssize_t bufferOffset;
101 Py_ssize_t sourceSize;
112 Py_ssize_t sourceSize;
102 size_t inSize;
113 size_t inSize;
103 size_t outSize;
114 size_t outSize;
104
115
105 ZSTD_CStream* cstream;
116 ZSTD_CStream* cstream;
106 ZSTD_inBuffer input;
117 ZSTD_inBuffer input;
107 ZSTD_outBuffer output;
118 ZSTD_outBuffer output;
108 int finishedOutput;
119 int finishedOutput;
109 int finishedInput;
120 int finishedInput;
110 PyObject* readResult;
121 PyObject* readResult;
111 } ZstdCompressorIterator;
122 } ZstdCompressorIterator;
112
123
113 extern PyTypeObject ZstdCompressorIteratorType;
124 extern PyTypeObject ZstdCompressorIteratorType;
114
125
115 typedef struct {
126 typedef struct {
116 PyObject_HEAD
127 PyObject_HEAD
117
128
118 ZSTD_DCtx* refdctx;
129 ZSTD_DCtx* dctx;
119
130
120 ZstdCompressionDict* dict;
131 ZstdCompressionDict* dict;
121 ZSTD_DDict* ddict;
132 ZSTD_DDict* ddict;
122 } ZstdDecompressor;
133 } ZstdDecompressor;
123
134
124 extern PyTypeObject ZstdDecompressorType;
135 extern PyTypeObject ZstdDecompressorType;
125
136
126 typedef struct {
137 typedef struct {
127 PyObject_HEAD
138 PyObject_HEAD
128
139
129 ZstdDecompressor* decompressor;
140 ZstdDecompressor* decompressor;
130 ZSTD_DStream* dstream;
141 ZSTD_DStream* dstream;
131 int finished;
142 int finished;
132 } ZstdDecompressionObj;
143 } ZstdDecompressionObj;
133
144
134 extern PyTypeObject ZstdDecompressionObjType;
145 extern PyTypeObject ZstdDecompressionObjType;
135
146
136 typedef struct {
147 typedef struct {
137 PyObject_HEAD
148 PyObject_HEAD
138
149
139 ZstdDecompressor* decompressor;
150 ZstdDecompressor* decompressor;
140 PyObject* writer;
151 PyObject* writer;
141 size_t outSize;
152 size_t outSize;
142 ZSTD_DStream* dstream;
153 ZSTD_DStream* dstream;
143 int entered;
154 int entered;
144 } ZstdDecompressionWriter;
155 } ZstdDecompressionWriter;
145
156
146 extern PyTypeObject ZstdDecompressionWriterType;
157 extern PyTypeObject ZstdDecompressionWriterType;
147
158
148 typedef struct {
159 typedef struct {
149 PyObject_HEAD
160 PyObject_HEAD
150
161
151 ZstdDecompressor* decompressor;
162 ZstdDecompressor* decompressor;
152 PyObject* reader;
163 PyObject* reader;
153 Py_buffer* buffer;
164 Py_buffer* buffer;
154 Py_ssize_t bufferOffset;
165 Py_ssize_t bufferOffset;
155 size_t inSize;
166 size_t inSize;
156 size_t outSize;
167 size_t outSize;
157 size_t skipBytes;
168 size_t skipBytes;
158 ZSTD_DStream* dstream;
169 ZSTD_DStream* dstream;
159 ZSTD_inBuffer input;
170 ZSTD_inBuffer input;
160 ZSTD_outBuffer output;
171 ZSTD_outBuffer output;
161 Py_ssize_t readCount;
172 Py_ssize_t readCount;
162 int finishedInput;
173 int finishedInput;
163 int finishedOutput;
174 int finishedOutput;
164 } ZstdDecompressorIterator;
175 } ZstdDecompressorIterator;
165
176
166 extern PyTypeObject ZstdDecompressorIteratorType;
177 extern PyTypeObject ZstdDecompressorIteratorType;
167
178
168 typedef struct {
179 typedef struct {
169 int errored;
180 int errored;
170 PyObject* chunk;
181 PyObject* chunk;
171 } DecompressorIteratorResult;
182 } DecompressorIteratorResult;
172
183
173 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
184 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
174 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
185 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
186 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
175 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
187 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
176 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
188 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
177 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
189 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
178 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
190 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
@@ -1,108 +1,154 b''
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import cffi
9 import cffi
10 import distutils.ccompiler
10 import distutils.ccompiler
11 import os
11 import os
12 import re
12 import subprocess
13 import subprocess
13 import tempfile
14 import tempfile
14
15
15
16
16 HERE = os.path.abspath(os.path.dirname(__file__))
17 HERE = os.path.abspath(os.path.dirname(__file__))
17
18
18 SOURCES = ['zstd/%s' % p for p in (
19 SOURCES = ['zstd/%s' % p for p in (
19 'common/entropy_common.c',
20 'common/entropy_common.c',
20 'common/error_private.c',
21 'common/error_private.c',
21 'common/fse_decompress.c',
22 'common/fse_decompress.c',
23 'common/pool.c',
24 'common/threading.c',
22 'common/xxhash.c',
25 'common/xxhash.c',
23 'common/zstd_common.c',
26 'common/zstd_common.c',
24 'compress/fse_compress.c',
27 'compress/fse_compress.c',
25 'compress/huf_compress.c',
28 'compress/huf_compress.c',
26 'compress/zstd_compress.c',
29 'compress/zstd_compress.c',
27 'decompress/huf_decompress.c',
30 'decompress/huf_decompress.c',
28 'decompress/zstd_decompress.c',
31 'decompress/zstd_decompress.c',
32 'dictBuilder/cover.c',
29 'dictBuilder/divsufsort.c',
33 'dictBuilder/divsufsort.c',
30 'dictBuilder/zdict.c',
34 'dictBuilder/zdict.c',
31 )]
35 )]
32
36
37 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
38 ('zstd.h',),
39 ('common', 'pool.h'),
40 ('dictBuilder', 'zdict.h'),
41 )]
42
33 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
43 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
34 'zstd',
44 'zstd',
35 'zstd/common',
45 'zstd/common',
36 'zstd/compress',
46 'zstd/compress',
37 'zstd/decompress',
47 'zstd/decompress',
38 'zstd/dictBuilder',
48 'zstd/dictBuilder',
39 )]
49 )]
40
50
41 # cffi can't parse some of the primitives in zstd.h. So we invoke the
51 # cffi can't parse some of the primitives in zstd.h. So we invoke the
42 # preprocessor and feed its output into cffi.
52 # preprocessor and feed its output into cffi.
43 compiler = distutils.ccompiler.new_compiler()
53 compiler = distutils.ccompiler.new_compiler()
44
54
45 # Needed for MSVC.
55 # Needed for MSVC.
46 if hasattr(compiler, 'initialize'):
56 if hasattr(compiler, 'initialize'):
47 compiler.initialize()
57 compiler.initialize()
48
58
49 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
59 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
50 # manually.
60 # manually.
51 if compiler.compiler_type == 'unix':
61 if compiler.compiler_type == 'unix':
52 args = list(compiler.executables['compiler'])
62 args = list(compiler.executables['compiler'])
53 args.extend([
63 args.extend([
54 '-E',
64 '-E',
55 '-DZSTD_STATIC_LINKING_ONLY',
65 '-DZSTD_STATIC_LINKING_ONLY',
66 '-DZDICT_STATIC_LINKING_ONLY',
56 ])
67 ])
57 elif compiler.compiler_type == 'msvc':
68 elif compiler.compiler_type == 'msvc':
58 args = [compiler.cc]
69 args = [compiler.cc]
59 args.extend([
70 args.extend([
60 '/EP',
71 '/EP',
61 '/DZSTD_STATIC_LINKING_ONLY',
72 '/DZSTD_STATIC_LINKING_ONLY',
73 '/DZDICT_STATIC_LINKING_ONLY',
62 ])
74 ])
63 else:
75 else:
64 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
76 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
65
77
66 # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
78 def preprocess(path):
67 # This can lead to duplicate declarations. So we strip this include from the
79 # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
68 # preprocessor invocation.
80 # This can lead to duplicate declarations. So we strip this include from the
81 # preprocessor invocation.
82 with open(path, 'rb') as fh:
83 lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
69
84
70 with open(os.path.join(HERE, 'zstd', 'zstd.h'), 'rb') as fh:
85 fd, input_file = tempfile.mkstemp(suffix='.h')
71 lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
86 os.write(fd, b''.join(lines))
72
87 os.close(fd)
73 fd, input_file = tempfile.mkstemp(suffix='.h')
74 os.write(fd, b''.join(lines))
75 os.close(fd)
76
88
77 args.append(input_file)
89 try:
90 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
91 output = process.communicate()[0]
92 ret = process.poll()
93 if ret:
94 raise Exception('preprocessor exited with error')
78
95
79 try:
96 return output
80 process = subprocess.Popen(args, stdout=subprocess.PIPE)
97 finally:
81 output = process.communicate()[0]
98 os.unlink(input_file)
82 ret = process.poll()
83 if ret:
84 raise Exception('preprocessor exited with error')
85 finally:
86 os.unlink(input_file)
87
99
88 def normalize_output():
100
101 def normalize_output(output):
89 lines = []
102 lines = []
90 for line in output.splitlines():
103 for line in output.splitlines():
91 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
104 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
92 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
105 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
93 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
106 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
94
107
108 if line.startswith(b'__attribute__((deprecated('):
109 continue
110 elif b'__declspec(deprecated(' in line:
111 continue
112
95 lines.append(line)
113 lines.append(line)
96
114
97 return b'\n'.join(lines)
115 return b'\n'.join(lines)
98
116
117
99 ffi = cffi.FFI()
118 ffi = cffi.FFI()
100 ffi.set_source('_zstd_cffi', '''
119 ffi.set_source('_zstd_cffi', '''
120 #include "mem.h"
101 #define ZSTD_STATIC_LINKING_ONLY
121 #define ZSTD_STATIC_LINKING_ONLY
102 #include "zstd.h"
122 #include "zstd.h"
123 #define ZDICT_STATIC_LINKING_ONLY
124 #include "pool.h"
125 #include "zdict.h"
103 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
126 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
104
127
105 ffi.cdef(normalize_output().decode('latin1'))
128 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
129
130 sources = []
131
132 for header in HEADERS:
133 preprocessed = preprocess(header)
134 sources.append(normalize_output(preprocessed))
135
136 # Do another pass over source and find constants that were preprocessed
137 # away.
138 with open(header, 'rb') as fh:
139 for line in fh:
140 line = line.strip()
141 m = DEFINE.match(line)
142 if not m:
143 continue
144
145 # The parser doesn't like some constants with complex values.
146 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
147 continue
148
149 sources.append(m.group(0) + b' ...')
150
151 ffi.cdef(u'\n'.join(s.decode('latin1') for s in sources))
106
152
107 if __name__ == '__main__':
153 if __name__ == '__main__':
108 ffi.compile()
154 ffi.compile()
@@ -1,69 +1,70 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # Copyright (c) 2016-present, Gregory Szorc
2 # Copyright (c) 2016-present, Gregory Szorc
3 # All rights reserved.
3 # All rights reserved.
4 #
4 #
5 # This software may be modified and distributed under the terms
5 # This software may be modified and distributed under the terms
6 # of the BSD license. See the LICENSE file for details.
6 # of the BSD license. See the LICENSE file for details.
7
7
8 import sys
8 import sys
9 from setuptools import setup
9 from setuptools import setup
10
10
11 try:
11 try:
12 import cffi
12 import cffi
13 except ImportError:
13 except ImportError:
14 cffi = None
14 cffi = None
15
15
16 import setup_zstd
16 import setup_zstd
17
17
18 SUPPORT_LEGACY = False
18 SUPPORT_LEGACY = False
19
19
20 if "--legacy" in sys.argv:
20 if "--legacy" in sys.argv:
21 SUPPORT_LEGACY = True
21 SUPPORT_LEGACY = True
22 sys.argv.remove("--legacy")
22 sys.argv.remove("--legacy")
23
23
24 # Code for obtaining the Extension instance is in its own module to
24 # Code for obtaining the Extension instance is in its own module to
25 # facilitate reuse in other projects.
25 # facilitate reuse in other projects.
26 extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
26 extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
27
27
28 if cffi:
28 if cffi:
29 import make_cffi
29 import make_cffi
30 extensions.append(make_cffi.ffi.distutils_extension())
30 extensions.append(make_cffi.ffi.distutils_extension())
31
31
32 version = None
32 version = None
33
33
34 with open('c-ext/python-zstandard.h', 'r') as fh:
34 with open('c-ext/python-zstandard.h', 'r') as fh:
35 for line in fh:
35 for line in fh:
36 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
36 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
37 continue
37 continue
38
38
39 version = line.split()[2][1:-1]
39 version = line.split()[2][1:-1]
40 break
40 break
41
41
42 if not version:
42 if not version:
43 raise Exception('could not resolve package version; '
43 raise Exception('could not resolve package version; '
44 'this should never happen')
44 'this should never happen')
45
45
46 setup(
46 setup(
47 name='zstandard',
47 name='zstandard',
48 version=version,
48 version=version,
49 description='Zstandard bindings for Python',
49 description='Zstandard bindings for Python',
50 long_description=open('README.rst', 'r').read(),
50 long_description=open('README.rst', 'r').read(),
51 url='https://github.com/indygreg/python-zstandard',
51 url='https://github.com/indygreg/python-zstandard',
52 author='Gregory Szorc',
52 author='Gregory Szorc',
53 author_email='gregory.szorc@gmail.com',
53 author_email='gregory.szorc@gmail.com',
54 license='BSD',
54 license='BSD',
55 classifiers=[
55 classifiers=[
56 'Development Status :: 4 - Beta',
56 'Development Status :: 4 - Beta',
57 'Intended Audience :: Developers',
57 'Intended Audience :: Developers',
58 'License :: OSI Approved :: BSD License',
58 'License :: OSI Approved :: BSD License',
59 'Programming Language :: C',
59 'Programming Language :: C',
60 'Programming Language :: Python :: 2.6',
60 'Programming Language :: Python :: 2.6',
61 'Programming Language :: Python :: 2.7',
61 'Programming Language :: Python :: 2.7',
62 'Programming Language :: Python :: 3.3',
62 'Programming Language :: Python :: 3.3',
63 'Programming Language :: Python :: 3.4',
63 'Programming Language :: Python :: 3.4',
64 'Programming Language :: Python :: 3.5',
64 'Programming Language :: Python :: 3.5',
65 'Programming Language :: Python :: 3.6',
65 ],
66 ],
66 keywords='zstandard zstd compression',
67 keywords='zstandard zstd compression',
67 ext_modules=extensions,
68 ext_modules=extensions,
68 test_suite='tests',
69 test_suite='tests',
69 )
70 )
@@ -1,91 +1,96 b''
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 import os
7 import os
8 from distutils.extension import Extension
8 from distutils.extension import Extension
9
9
10
10
11 zstd_sources = ['zstd/%s' % p for p in (
11 zstd_sources = ['zstd/%s' % p for p in (
12 'common/entropy_common.c',
12 'common/entropy_common.c',
13 'common/error_private.c',
13 'common/error_private.c',
14 'common/fse_decompress.c',
14 'common/fse_decompress.c',
15 'common/pool.c',
16 'common/threading.c',
15 'common/xxhash.c',
17 'common/xxhash.c',
16 'common/zstd_common.c',
18 'common/zstd_common.c',
17 'compress/fse_compress.c',
19 'compress/fse_compress.c',
18 'compress/huf_compress.c',
20 'compress/huf_compress.c',
19 'compress/zstd_compress.c',
21 'compress/zstd_compress.c',
20 'decompress/huf_decompress.c',
22 'decompress/huf_decompress.c',
21 'decompress/zstd_decompress.c',
23 'decompress/zstd_decompress.c',
24 'dictBuilder/cover.c',
22 'dictBuilder/divsufsort.c',
25 'dictBuilder/divsufsort.c',
23 'dictBuilder/zdict.c',
26 'dictBuilder/zdict.c',
24 )]
27 )]
25
28
26 zstd_sources_legacy = ['zstd/%s' % p for p in (
29 zstd_sources_legacy = ['zstd/%s' % p for p in (
30 'deprecated/zbuff_common.c',
27 'deprecated/zbuff_compress.c',
31 'deprecated/zbuff_compress.c',
28 'deprecated/zbuff_decompress.c',
32 'deprecated/zbuff_decompress.c',
29 'legacy/zstd_v01.c',
33 'legacy/zstd_v01.c',
30 'legacy/zstd_v02.c',
34 'legacy/zstd_v02.c',
31 'legacy/zstd_v03.c',
35 'legacy/zstd_v03.c',
32 'legacy/zstd_v04.c',
36 'legacy/zstd_v04.c',
33 'legacy/zstd_v05.c',
37 'legacy/zstd_v05.c',
34 'legacy/zstd_v06.c',
38 'legacy/zstd_v06.c',
35 'legacy/zstd_v07.c'
39 'legacy/zstd_v07.c'
36 )]
40 )]
37
41
38 zstd_includes = [
42 zstd_includes = [
39 'c-ext',
43 'c-ext',
40 'zstd',
44 'zstd',
41 'zstd/common',
45 'zstd/common',
42 'zstd/compress',
46 'zstd/compress',
43 'zstd/decompress',
47 'zstd/decompress',
44 'zstd/dictBuilder',
48 'zstd/dictBuilder',
45 ]
49 ]
46
50
47 zstd_includes_legacy = [
51 zstd_includes_legacy = [
48 'zstd/deprecated',
52 'zstd/deprecated',
49 'zstd/legacy',
53 'zstd/legacy',
50 ]
54 ]
51
55
52 ext_sources = [
56 ext_sources = [
53 'zstd.c',
57 'zstd.c',
54 'c-ext/compressiondict.c',
58 'c-ext/compressiondict.c',
55 'c-ext/compressobj.c',
59 'c-ext/compressobj.c',
56 'c-ext/compressor.c',
60 'c-ext/compressor.c',
57 'c-ext/compressoriterator.c',
61 'c-ext/compressoriterator.c',
58 'c-ext/compressionparams.c',
62 'c-ext/compressionparams.c',
59 'c-ext/compressionwriter.c',
63 'c-ext/compressionwriter.c',
60 'c-ext/constants.c',
64 'c-ext/constants.c',
61 'c-ext/decompressobj.c',
65 'c-ext/decompressobj.c',
62 'c-ext/decompressor.c',
66 'c-ext/decompressor.c',
63 'c-ext/decompressoriterator.c',
67 'c-ext/decompressoriterator.c',
64 'c-ext/decompressionwriter.c',
68 'c-ext/decompressionwriter.c',
65 'c-ext/dictparams.c',
69 'c-ext/dictparams.c',
70 'c-ext/frameparams.c',
66 ]
71 ]
67
72
68 zstd_depends = [
73 zstd_depends = [
69 'c-ext/python-zstandard.h',
74 'c-ext/python-zstandard.h',
70 ]
75 ]
71
76
72
77
73 def get_c_extension(support_legacy=False, name='zstd'):
78 def get_c_extension(support_legacy=False, name='zstd'):
74 """Obtain a distutils.extension.Extension for the C extension."""
79 """Obtain a distutils.extension.Extension for the C extension."""
75 root = os.path.abspath(os.path.dirname(__file__))
80 root = os.path.abspath(os.path.dirname(__file__))
76
81
77 sources = [os.path.join(root, p) for p in zstd_sources + ext_sources]
82 sources = [os.path.join(root, p) for p in zstd_sources + ext_sources]
78 if support_legacy:
83 if support_legacy:
79 sources.extend([os.path.join(root, p) for p in zstd_sources_legacy])
84 sources.extend([os.path.join(root, p) for p in zstd_sources_legacy])
80
85
81 include_dirs = [os.path.join(root, d) for d in zstd_includes]
86 include_dirs = [os.path.join(root, d) for d in zstd_includes]
82 if support_legacy:
87 if support_legacy:
83 include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy])
88 include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy])
84
89
85 depends = [os.path.join(root, p) for p in zstd_depends]
90 depends = [os.path.join(root, p) for p in zstd_depends]
86
91
87 # TODO compile with optimizations.
92 # TODO compile with optimizations.
88 return Extension(name, sources,
93 return Extension(name, sources,
89 include_dirs=include_dirs,
94 include_dirs=include_dirs,
90 depends=depends,
95 depends=depends,
91 extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else [])
96 extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else [])
@@ -1,15 +1,61 b''
1 import inspect
1 import io
2 import io
3 import types
4
5
6 def make_cffi(cls):
7 """Decorator to add CFFI versions of each test method."""
8
9 try:
10 import zstd_cffi
11 except ImportError:
12 return cls
13
14 # If CFFI version is available, dynamically construct test methods
15 # that use it.
16
17 for attr in dir(cls):
18 fn = getattr(cls, attr)
19 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
20 continue
21
22 if not fn.__name__.startswith('test_'):
23 continue
24
25 name = '%s_cffi' % fn.__name__
26
27 # Replace the "zstd" symbol with the CFFI module instance. Then copy
28 # the function object and install it in a new attribute.
29 if isinstance(fn, types.FunctionType):
30 globs = dict(fn.__globals__)
31 globs['zstd'] = zstd_cffi
32 new_fn = types.FunctionType(fn.__code__, globs, name,
33 fn.__defaults__, fn.__closure__)
34 new_method = new_fn
35 else:
36 globs = dict(fn.__func__.func_globals)
37 globs['zstd'] = zstd_cffi
38 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
39 fn.__func__.func_defaults,
40 fn.__func__.func_closure)
41 new_method = types.UnboundMethodType(new_fn, fn.im_self,
42 fn.im_class)
43
44 setattr(cls, name, new_method)
45
46 return cls
47
2
48
3 class OpCountingBytesIO(io.BytesIO):
49 class OpCountingBytesIO(io.BytesIO):
4 def __init__(self, *args, **kwargs):
50 def __init__(self, *args, **kwargs):
5 self._read_count = 0
51 self._read_count = 0
6 self._write_count = 0
52 self._write_count = 0
7 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
53 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
8
54
9 def read(self, *args):
55 def read(self, *args):
10 self._read_count += 1
56 self._read_count += 1
11 return super(OpCountingBytesIO, self).read(*args)
57 return super(OpCountingBytesIO, self).read(*args)
12
58
13 def write(self, data):
59 def write(self, data):
14 self._write_count += 1
60 self._write_count += 1
15 return super(OpCountingBytesIO, self).write(data)
61 return super(OpCountingBytesIO, self).write(data)
@@ -1,536 +1,675 b''
1 import hashlib
1 import hashlib
2 import io
2 import io
3 import struct
3 import struct
4 import sys
4 import sys
5
5
6 try:
6 try:
7 import unittest2 as unittest
7 import unittest2 as unittest
8 except ImportError:
8 except ImportError:
9 import unittest
9 import unittest
10
10
11 import zstd
11 import zstd
12
12
13 from .common import OpCountingBytesIO
13 from .common import (
14 make_cffi,
15 OpCountingBytesIO,
16 )
14
17
15
18
16 if sys.version_info[0] >= 3:
19 if sys.version_info[0] >= 3:
17 next = lambda it: it.__next__()
20 next = lambda it: it.__next__()
18 else:
21 else:
19 next = lambda it: it.next()
22 next = lambda it: it.next()
20
23
21
24
25 @make_cffi
22 class TestCompressor(unittest.TestCase):
26 class TestCompressor(unittest.TestCase):
23 def test_level_bounds(self):
27 def test_level_bounds(self):
24 with self.assertRaises(ValueError):
28 with self.assertRaises(ValueError):
25 zstd.ZstdCompressor(level=0)
29 zstd.ZstdCompressor(level=0)
26
30
27 with self.assertRaises(ValueError):
31 with self.assertRaises(ValueError):
28 zstd.ZstdCompressor(level=23)
32 zstd.ZstdCompressor(level=23)
29
33
30
34
35 @make_cffi
31 class TestCompressor_compress(unittest.TestCase):
36 class TestCompressor_compress(unittest.TestCase):
32 def test_compress_empty(self):
37 def test_compress_empty(self):
33 cctx = zstd.ZstdCompressor(level=1)
38 cctx = zstd.ZstdCompressor(level=1)
34 cctx.compress(b'')
39 result = cctx.compress(b'')
35
40 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
36 cctx = zstd.ZstdCompressor(level=22)
41 params = zstd.get_frame_parameters(result)
37 cctx.compress(b'')
42 self.assertEqual(params.content_size, 0)
38
43 self.assertEqual(params.window_size, 524288)
39 def test_compress_empty(self):
44 self.assertEqual(params.dict_id, 0)
40 cctx = zstd.ZstdCompressor(level=1)
45 self.assertFalse(params.has_checksum, 0)
41 self.assertEqual(cctx.compress(b''),
42 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
43
46
44 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
47 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
45 # is fixed.
48 # is fixed.
46 cctx = zstd.ZstdCompressor(write_content_size=True)
49 cctx = zstd.ZstdCompressor(write_content_size=True)
47 with self.assertRaises(ValueError):
50 with self.assertRaises(ValueError):
48 cctx.compress(b'')
51 cctx.compress(b'')
49
52
50 cctx.compress(b'', allow_empty=True)
53 cctx.compress(b'', allow_empty=True)
51
54
52 def test_compress_large(self):
55 def test_compress_large(self):
53 chunks = []
56 chunks = []
54 for i in range(255):
57 for i in range(255):
55 chunks.append(struct.Struct('>B').pack(i) * 16384)
58 chunks.append(struct.Struct('>B').pack(i) * 16384)
56
59
57 cctx = zstd.ZstdCompressor(level=3)
60 cctx = zstd.ZstdCompressor(level=3)
58 result = cctx.compress(b''.join(chunks))
61 result = cctx.compress(b''.join(chunks))
59 self.assertEqual(len(result), 999)
62 self.assertEqual(len(result), 999)
60 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
63 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
61
64
65 # This matches the test for read_from() below.
66 cctx = zstd.ZstdCompressor(level=1)
67 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
68 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
69 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
70 b'\x02\x09\x00\x00\x6f')
71
62 def test_write_checksum(self):
72 def test_write_checksum(self):
63 cctx = zstd.ZstdCompressor(level=1)
73 cctx = zstd.ZstdCompressor(level=1)
64 no_checksum = cctx.compress(b'foobar')
74 no_checksum = cctx.compress(b'foobar')
65 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
75 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
66 with_checksum = cctx.compress(b'foobar')
76 with_checksum = cctx.compress(b'foobar')
67
77
68 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
78 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
69
79
80 no_params = zstd.get_frame_parameters(no_checksum)
81 with_params = zstd.get_frame_parameters(with_checksum)
82
83 self.assertFalse(no_params.has_checksum)
84 self.assertTrue(with_params.has_checksum)
85
70 def test_write_content_size(self):
86 def test_write_content_size(self):
71 cctx = zstd.ZstdCompressor(level=1)
87 cctx = zstd.ZstdCompressor(level=1)
72 no_size = cctx.compress(b'foobar' * 256)
88 no_size = cctx.compress(b'foobar' * 256)
73 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
89 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
74 with_size = cctx.compress(b'foobar' * 256)
90 with_size = cctx.compress(b'foobar' * 256)
75
91
76 self.assertEqual(len(with_size), len(no_size) + 1)
92 self.assertEqual(len(with_size), len(no_size) + 1)
77
93
94 no_params = zstd.get_frame_parameters(no_size)
95 with_params = zstd.get_frame_parameters(with_size)
96 self.assertEqual(no_params.content_size, 0)
97 self.assertEqual(with_params.content_size, 1536)
98
78 def test_no_dict_id(self):
99 def test_no_dict_id(self):
79 samples = []
100 samples = []
80 for i in range(128):
101 for i in range(128):
81 samples.append(b'foo' * 64)
102 samples.append(b'foo' * 64)
82 samples.append(b'bar' * 64)
103 samples.append(b'bar' * 64)
83 samples.append(b'foobar' * 64)
104 samples.append(b'foobar' * 64)
84
105
85 d = zstd.train_dictionary(1024, samples)
106 d = zstd.train_dictionary(1024, samples)
86
107
87 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
108 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
88 with_dict_id = cctx.compress(b'foobarfoobar')
109 with_dict_id = cctx.compress(b'foobarfoobar')
89
110
90 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
111 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
91 no_dict_id = cctx.compress(b'foobarfoobar')
112 no_dict_id = cctx.compress(b'foobarfoobar')
92
113
93 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
114 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
94
115
116 no_params = zstd.get_frame_parameters(no_dict_id)
117 with_params = zstd.get_frame_parameters(with_dict_id)
118 self.assertEqual(no_params.dict_id, 0)
119 self.assertEqual(with_params.dict_id, 1584102229)
120
95 def test_compress_dict_multiple(self):
121 def test_compress_dict_multiple(self):
96 samples = []
122 samples = []
97 for i in range(128):
123 for i in range(128):
98 samples.append(b'foo' * 64)
124 samples.append(b'foo' * 64)
99 samples.append(b'bar' * 64)
125 samples.append(b'bar' * 64)
100 samples.append(b'foobar' * 64)
126 samples.append(b'foobar' * 64)
101
127
102 d = zstd.train_dictionary(8192, samples)
128 d = zstd.train_dictionary(8192, samples)
103
129
104 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
130 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
105
131
106 for i in range(32):
132 for i in range(32):
107 cctx.compress(b'foo bar foobar foo bar foobar')
133 cctx.compress(b'foo bar foobar foo bar foobar')
108
134
109
135
136 @make_cffi
110 class TestCompressor_compressobj(unittest.TestCase):
137 class TestCompressor_compressobj(unittest.TestCase):
111 def test_compressobj_empty(self):
138 def test_compressobj_empty(self):
112 cctx = zstd.ZstdCompressor(level=1)
139 cctx = zstd.ZstdCompressor(level=1)
113 cobj = cctx.compressobj()
140 cobj = cctx.compressobj()
114 self.assertEqual(cobj.compress(b''), b'')
141 self.assertEqual(cobj.compress(b''), b'')
115 self.assertEqual(cobj.flush(),
142 self.assertEqual(cobj.flush(),
116 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
143 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
117
144
118 def test_compressobj_large(self):
145 def test_compressobj_large(self):
119 chunks = []
146 chunks = []
120 for i in range(255):
147 for i in range(255):
121 chunks.append(struct.Struct('>B').pack(i) * 16384)
148 chunks.append(struct.Struct('>B').pack(i) * 16384)
122
149
123 cctx = zstd.ZstdCompressor(level=3)
150 cctx = zstd.ZstdCompressor(level=3)
124 cobj = cctx.compressobj()
151 cobj = cctx.compressobj()
125
152
126 result = cobj.compress(b''.join(chunks)) + cobj.flush()
153 result = cobj.compress(b''.join(chunks)) + cobj.flush()
127 self.assertEqual(len(result), 999)
154 self.assertEqual(len(result), 999)
128 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
155 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
129
156
157 params = zstd.get_frame_parameters(result)
158 self.assertEqual(params.content_size, 0)
159 self.assertEqual(params.window_size, 1048576)
160 self.assertEqual(params.dict_id, 0)
161 self.assertFalse(params.has_checksum)
162
130 def test_write_checksum(self):
163 def test_write_checksum(self):
131 cctx = zstd.ZstdCompressor(level=1)
164 cctx = zstd.ZstdCompressor(level=1)
132 cobj = cctx.compressobj()
165 cobj = cctx.compressobj()
133 no_checksum = cobj.compress(b'foobar') + cobj.flush()
166 no_checksum = cobj.compress(b'foobar') + cobj.flush()
134 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
167 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
135 cobj = cctx.compressobj()
168 cobj = cctx.compressobj()
136 with_checksum = cobj.compress(b'foobar') + cobj.flush()
169 with_checksum = cobj.compress(b'foobar') + cobj.flush()
137
170
171 no_params = zstd.get_frame_parameters(no_checksum)
172 with_params = zstd.get_frame_parameters(with_checksum)
173 self.assertEqual(no_params.content_size, 0)
174 self.assertEqual(with_params.content_size, 0)
175 self.assertEqual(no_params.dict_id, 0)
176 self.assertEqual(with_params.dict_id, 0)
177 self.assertFalse(no_params.has_checksum)
178 self.assertTrue(with_params.has_checksum)
179
138 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
180 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
139
181
140 def test_write_content_size(self):
182 def test_write_content_size(self):
141 cctx = zstd.ZstdCompressor(level=1)
183 cctx = zstd.ZstdCompressor(level=1)
142 cobj = cctx.compressobj(size=len(b'foobar' * 256))
184 cobj = cctx.compressobj(size=len(b'foobar' * 256))
143 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
185 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
144 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
186 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
145 cobj = cctx.compressobj(size=len(b'foobar' * 256))
187 cobj = cctx.compressobj(size=len(b'foobar' * 256))
146 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
188 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
147
189
190 no_params = zstd.get_frame_parameters(no_size)
191 with_params = zstd.get_frame_parameters(with_size)
192 self.assertEqual(no_params.content_size, 0)
193 self.assertEqual(with_params.content_size, 1536)
194 self.assertEqual(no_params.dict_id, 0)
195 self.assertEqual(with_params.dict_id, 0)
196 self.assertFalse(no_params.has_checksum)
197 self.assertFalse(with_params.has_checksum)
198
148 self.assertEqual(len(with_size), len(no_size) + 1)
199 self.assertEqual(len(with_size), len(no_size) + 1)
149
200
150 def test_compress_after_finished(self):
201 def test_compress_after_finished(self):
151 cctx = zstd.ZstdCompressor()
202 cctx = zstd.ZstdCompressor()
152 cobj = cctx.compressobj()
203 cobj = cctx.compressobj()
153
204
154 cobj.compress(b'foo')
205 cobj.compress(b'foo')
155 cobj.flush()
206 cobj.flush()
156
207
157 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'):
208 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'):
158 cobj.compress(b'foo')
209 cobj.compress(b'foo')
159
210
160 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
211 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
161 cobj.flush()
212 cobj.flush()
162
213
163 def test_flush_block_repeated(self):
214 def test_flush_block_repeated(self):
164 cctx = zstd.ZstdCompressor(level=1)
215 cctx = zstd.ZstdCompressor(level=1)
165 cobj = cctx.compressobj()
216 cobj = cctx.compressobj()
166
217
167 self.assertEqual(cobj.compress(b'foo'), b'')
218 self.assertEqual(cobj.compress(b'foo'), b'')
168 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
219 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
169 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
220 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
170 self.assertEqual(cobj.compress(b'bar'), b'')
221 self.assertEqual(cobj.compress(b'bar'), b'')
171 # 3 byte header plus content.
222 # 3 byte header plus content.
172 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
223 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
173
224
174 def test_flush_empty_block(self):
225 def test_flush_empty_block(self):
175 cctx = zstd.ZstdCompressor(write_checksum=True)
226 cctx = zstd.ZstdCompressor(write_checksum=True)
176 cobj = cctx.compressobj()
227 cobj = cctx.compressobj()
177
228
178 cobj.compress(b'foobar')
229 cobj.compress(b'foobar')
179 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
230 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
180 # No-op if no block is active (this is internal to zstd).
231 # No-op if no block is active (this is internal to zstd).
181 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
232 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
182
233
183 trailing = cobj.flush()
234 trailing = cobj.flush()
184 # 3 bytes block header + 4 bytes frame checksum
235 # 3 bytes block header + 4 bytes frame checksum
185 self.assertEqual(len(trailing), 7)
236 self.assertEqual(len(trailing), 7)
186 header = trailing[0:3]
237 header = trailing[0:3]
187 self.assertEqual(header, b'\x01\x00\x00')
238 self.assertEqual(header, b'\x01\x00\x00')
188
239
189
240
241 @make_cffi
190 class TestCompressor_copy_stream(unittest.TestCase):
242 class TestCompressor_copy_stream(unittest.TestCase):
191 def test_no_read(self):
243 def test_no_read(self):
192 source = object()
244 source = object()
193 dest = io.BytesIO()
245 dest = io.BytesIO()
194
246
195 cctx = zstd.ZstdCompressor()
247 cctx = zstd.ZstdCompressor()
196 with self.assertRaises(ValueError):
248 with self.assertRaises(ValueError):
197 cctx.copy_stream(source, dest)
249 cctx.copy_stream(source, dest)
198
250
199 def test_no_write(self):
251 def test_no_write(self):
200 source = io.BytesIO()
252 source = io.BytesIO()
201 dest = object()
253 dest = object()
202
254
203 cctx = zstd.ZstdCompressor()
255 cctx = zstd.ZstdCompressor()
204 with self.assertRaises(ValueError):
256 with self.assertRaises(ValueError):
205 cctx.copy_stream(source, dest)
257 cctx.copy_stream(source, dest)
206
258
207 def test_empty(self):
259 def test_empty(self):
208 source = io.BytesIO()
260 source = io.BytesIO()
209 dest = io.BytesIO()
261 dest = io.BytesIO()
210
262
211 cctx = zstd.ZstdCompressor(level=1)
263 cctx = zstd.ZstdCompressor(level=1)
212 r, w = cctx.copy_stream(source, dest)
264 r, w = cctx.copy_stream(source, dest)
213 self.assertEqual(int(r), 0)
265 self.assertEqual(int(r), 0)
214 self.assertEqual(w, 9)
266 self.assertEqual(w, 9)
215
267
216 self.assertEqual(dest.getvalue(),
268 self.assertEqual(dest.getvalue(),
217 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
269 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
218
270
219 def test_large_data(self):
271 def test_large_data(self):
220 source = io.BytesIO()
272 source = io.BytesIO()
221 for i in range(255):
273 for i in range(255):
222 source.write(struct.Struct('>B').pack(i) * 16384)
274 source.write(struct.Struct('>B').pack(i) * 16384)
223 source.seek(0)
275 source.seek(0)
224
276
225 dest = io.BytesIO()
277 dest = io.BytesIO()
226 cctx = zstd.ZstdCompressor()
278 cctx = zstd.ZstdCompressor()
227 r, w = cctx.copy_stream(source, dest)
279 r, w = cctx.copy_stream(source, dest)
228
280
229 self.assertEqual(r, 255 * 16384)
281 self.assertEqual(r, 255 * 16384)
230 self.assertEqual(w, 999)
282 self.assertEqual(w, 999)
231
283
284 params = zstd.get_frame_parameters(dest.getvalue())
285 self.assertEqual(params.content_size, 0)
286 self.assertEqual(params.window_size, 1048576)
287 self.assertEqual(params.dict_id, 0)
288 self.assertFalse(params.has_checksum)
289
232 def test_write_checksum(self):
290 def test_write_checksum(self):
233 source = io.BytesIO(b'foobar')
291 source = io.BytesIO(b'foobar')
234 no_checksum = io.BytesIO()
292 no_checksum = io.BytesIO()
235
293
236 cctx = zstd.ZstdCompressor(level=1)
294 cctx = zstd.ZstdCompressor(level=1)
237 cctx.copy_stream(source, no_checksum)
295 cctx.copy_stream(source, no_checksum)
238
296
239 source.seek(0)
297 source.seek(0)
240 with_checksum = io.BytesIO()
298 with_checksum = io.BytesIO()
241 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
299 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
242 cctx.copy_stream(source, with_checksum)
300 cctx.copy_stream(source, with_checksum)
243
301
244 self.assertEqual(len(with_checksum.getvalue()),
302 self.assertEqual(len(with_checksum.getvalue()),
245 len(no_checksum.getvalue()) + 4)
303 len(no_checksum.getvalue()) + 4)
246
304
305 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
306 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
307 self.assertEqual(no_params.content_size, 0)
308 self.assertEqual(with_params.content_size, 0)
309 self.assertEqual(no_params.dict_id, 0)
310 self.assertEqual(with_params.dict_id, 0)
311 self.assertFalse(no_params.has_checksum)
312 self.assertTrue(with_params.has_checksum)
313
247 def test_write_content_size(self):
314 def test_write_content_size(self):
248 source = io.BytesIO(b'foobar' * 256)
315 source = io.BytesIO(b'foobar' * 256)
249 no_size = io.BytesIO()
316 no_size = io.BytesIO()
250
317
251 cctx = zstd.ZstdCompressor(level=1)
318 cctx = zstd.ZstdCompressor(level=1)
252 cctx.copy_stream(source, no_size)
319 cctx.copy_stream(source, no_size)
253
320
254 source.seek(0)
321 source.seek(0)
255 with_size = io.BytesIO()
322 with_size = io.BytesIO()
256 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
323 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
257 cctx.copy_stream(source, with_size)
324 cctx.copy_stream(source, with_size)
258
325
259 # Source content size is unknown, so no content size written.
326 # Source content size is unknown, so no content size written.
260 self.assertEqual(len(with_size.getvalue()),
327 self.assertEqual(len(with_size.getvalue()),
261 len(no_size.getvalue()))
328 len(no_size.getvalue()))
262
329
263 source.seek(0)
330 source.seek(0)
264 with_size = io.BytesIO()
331 with_size = io.BytesIO()
265 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
332 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
266
333
267 # We specified source size, so content size header is present.
334 # We specified source size, so content size header is present.
268 self.assertEqual(len(with_size.getvalue()),
335 self.assertEqual(len(with_size.getvalue()),
269 len(no_size.getvalue()) + 1)
336 len(no_size.getvalue()) + 1)
270
337
338 no_params = zstd.get_frame_parameters(no_size.getvalue())
339 with_params = zstd.get_frame_parameters(with_size.getvalue())
340 self.assertEqual(no_params.content_size, 0)
341 self.assertEqual(with_params.content_size, 1536)
342 self.assertEqual(no_params.dict_id, 0)
343 self.assertEqual(with_params.dict_id, 0)
344 self.assertFalse(no_params.has_checksum)
345 self.assertFalse(with_params.has_checksum)
346
271 def test_read_write_size(self):
347 def test_read_write_size(self):
272 source = OpCountingBytesIO(b'foobarfoobar')
348 source = OpCountingBytesIO(b'foobarfoobar')
273 dest = OpCountingBytesIO()
349 dest = OpCountingBytesIO()
274 cctx = zstd.ZstdCompressor()
350 cctx = zstd.ZstdCompressor()
275 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
351 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
276
352
277 self.assertEqual(r, len(source.getvalue()))
353 self.assertEqual(r, len(source.getvalue()))
278 self.assertEqual(w, 21)
354 self.assertEqual(w, 21)
279 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
355 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
280 self.assertEqual(dest._write_count, len(dest.getvalue()))
356 self.assertEqual(dest._write_count, len(dest.getvalue()))
281
357
282
358
283 def compress(data, level):
359 def compress(data, level):
284 buffer = io.BytesIO()
360 buffer = io.BytesIO()
285 cctx = zstd.ZstdCompressor(level=level)
361 cctx = zstd.ZstdCompressor(level=level)
286 with cctx.write_to(buffer) as compressor:
362 with cctx.write_to(buffer) as compressor:
287 compressor.write(data)
363 compressor.write(data)
288 return buffer.getvalue()
364 return buffer.getvalue()
289
365
290
366
367 @make_cffi
291 class TestCompressor_write_to(unittest.TestCase):
368 class TestCompressor_write_to(unittest.TestCase):
292 def test_empty(self):
369 def test_empty(self):
293 self.assertEqual(compress(b'', 1),
370 result = compress(b'', 1)
294 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
371 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
372
373 params = zstd.get_frame_parameters(result)
374 self.assertEqual(params.content_size, 0)
375 self.assertEqual(params.window_size, 524288)
376 self.assertEqual(params.dict_id, 0)
377 self.assertFalse(params.has_checksum)
295
378
296 def test_multiple_compress(self):
379 def test_multiple_compress(self):
297 buffer = io.BytesIO()
380 buffer = io.BytesIO()
298 cctx = zstd.ZstdCompressor(level=5)
381 cctx = zstd.ZstdCompressor(level=5)
299 with cctx.write_to(buffer) as compressor:
382 with cctx.write_to(buffer) as compressor:
300 compressor.write(b'foo')
383 self.assertEqual(compressor.write(b'foo'), 0)
301 compressor.write(b'bar')
384 self.assertEqual(compressor.write(b'bar'), 0)
302 compressor.write(b'x' * 8192)
385 self.assertEqual(compressor.write(b'x' * 8192), 0)
303
386
304 result = buffer.getvalue()
387 result = buffer.getvalue()
305 self.assertEqual(result,
388 self.assertEqual(result,
306 b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f'
389 b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f'
307 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
390 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
308
391
309 def test_dictionary(self):
392 def test_dictionary(self):
310 samples = []
393 samples = []
311 for i in range(128):
394 for i in range(128):
312 samples.append(b'foo' * 64)
395 samples.append(b'foo' * 64)
313 samples.append(b'bar' * 64)
396 samples.append(b'bar' * 64)
314 samples.append(b'foobar' * 64)
397 samples.append(b'foobar' * 64)
315
398
316 d = zstd.train_dictionary(8192, samples)
399 d = zstd.train_dictionary(8192, samples)
317
400
318 buffer = io.BytesIO()
401 buffer = io.BytesIO()
319 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
402 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
320 with cctx.write_to(buffer) as compressor:
403 with cctx.write_to(buffer) as compressor:
321 compressor.write(b'foo')
404 self.assertEqual(compressor.write(b'foo'), 0)
322 compressor.write(b'bar')
405 self.assertEqual(compressor.write(b'bar'), 0)
323 compressor.write(b'foo' * 16384)
406 self.assertEqual(compressor.write(b'foo' * 16384), 634)
324
407
325 compressed = buffer.getvalue()
408 compressed = buffer.getvalue()
409
410 params = zstd.get_frame_parameters(compressed)
411 self.assertEqual(params.content_size, 0)
412 self.assertEqual(params.window_size, 1024)
413 self.assertEqual(params.dict_id, d.dict_id())
414 self.assertFalse(params.has_checksum)
415
416 self.assertEqual(compressed[0:32],
417 b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
418 b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
419 b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
420
326 h = hashlib.sha1(compressed).hexdigest()
421 h = hashlib.sha1(compressed).hexdigest()
327 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
422 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
328
423
329 def test_compression_params(self):
424 def test_compression_params(self):
330 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
425 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
331
426
332 buffer = io.BytesIO()
427 buffer = io.BytesIO()
333 cctx = zstd.ZstdCompressor(compression_params=params)
428 cctx = zstd.ZstdCompressor(compression_params=params)
334 with cctx.write_to(buffer) as compressor:
429 with cctx.write_to(buffer) as compressor:
335 compressor.write(b'foo')
430 self.assertEqual(compressor.write(b'foo'), 0)
336 compressor.write(b'bar')
431 self.assertEqual(compressor.write(b'bar'), 0)
337 compressor.write(b'foobar' * 16384)
432 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
338
433
339 compressed = buffer.getvalue()
434 compressed = buffer.getvalue()
435
436 params = zstd.get_frame_parameters(compressed)
437 self.assertEqual(params.content_size, 0)
438 self.assertEqual(params.window_size, 1048576)
439 self.assertEqual(params.dict_id, 0)
440 self.assertFalse(params.has_checksum)
441
340 h = hashlib.sha1(compressed).hexdigest()
442 h = hashlib.sha1(compressed).hexdigest()
341 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
443 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
342
444
343 def test_write_checksum(self):
445 def test_write_checksum(self):
344 no_checksum = io.BytesIO()
446 no_checksum = io.BytesIO()
345 cctx = zstd.ZstdCompressor(level=1)
447 cctx = zstd.ZstdCompressor(level=1)
346 with cctx.write_to(no_checksum) as compressor:
448 with cctx.write_to(no_checksum) as compressor:
347 compressor.write(b'foobar')
449 self.assertEqual(compressor.write(b'foobar'), 0)
348
450
349 with_checksum = io.BytesIO()
451 with_checksum = io.BytesIO()
350 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
452 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
351 with cctx.write_to(with_checksum) as compressor:
453 with cctx.write_to(with_checksum) as compressor:
352 compressor.write(b'foobar')
454 self.assertEqual(compressor.write(b'foobar'), 0)
455
456 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
457 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
458 self.assertEqual(no_params.content_size, 0)
459 self.assertEqual(with_params.content_size, 0)
460 self.assertEqual(no_params.dict_id, 0)
461 self.assertEqual(with_params.dict_id, 0)
462 self.assertFalse(no_params.has_checksum)
463 self.assertTrue(with_params.has_checksum)
353
464
354 self.assertEqual(len(with_checksum.getvalue()),
465 self.assertEqual(len(with_checksum.getvalue()),
355 len(no_checksum.getvalue()) + 4)
466 len(no_checksum.getvalue()) + 4)
356
467
357 def test_write_content_size(self):
468 def test_write_content_size(self):
358 no_size = io.BytesIO()
469 no_size = io.BytesIO()
359 cctx = zstd.ZstdCompressor(level=1)
470 cctx = zstd.ZstdCompressor(level=1)
360 with cctx.write_to(no_size) as compressor:
471 with cctx.write_to(no_size) as compressor:
361 compressor.write(b'foobar' * 256)
472 self.assertEqual(compressor.write(b'foobar' * 256), 0)
362
473
363 with_size = io.BytesIO()
474 with_size = io.BytesIO()
364 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
475 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
365 with cctx.write_to(with_size) as compressor:
476 with cctx.write_to(with_size) as compressor:
366 compressor.write(b'foobar' * 256)
477 self.assertEqual(compressor.write(b'foobar' * 256), 0)
367
478
368 # Source size is not known in streaming mode, so header not
479 # Source size is not known in streaming mode, so header not
369 # written.
480 # written.
370 self.assertEqual(len(with_size.getvalue()),
481 self.assertEqual(len(with_size.getvalue()),
371 len(no_size.getvalue()))
482 len(no_size.getvalue()))
372
483
373 # Declaring size will write the header.
484 # Declaring size will write the header.
374 with_size = io.BytesIO()
485 with_size = io.BytesIO()
375 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
486 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
376 compressor.write(b'foobar' * 256)
487 self.assertEqual(compressor.write(b'foobar' * 256), 0)
488
489 no_params = zstd.get_frame_parameters(no_size.getvalue())
490 with_params = zstd.get_frame_parameters(with_size.getvalue())
491 self.assertEqual(no_params.content_size, 0)
492 self.assertEqual(with_params.content_size, 1536)
493 self.assertEqual(no_params.dict_id, 0)
494 self.assertEqual(with_params.dict_id, 0)
495 self.assertFalse(no_params.has_checksum)
496 self.assertFalse(with_params.has_checksum)
377
497
378 self.assertEqual(len(with_size.getvalue()),
498 self.assertEqual(len(with_size.getvalue()),
379 len(no_size.getvalue()) + 1)
499 len(no_size.getvalue()) + 1)
380
500
381 def test_no_dict_id(self):
501 def test_no_dict_id(self):
382 samples = []
502 samples = []
383 for i in range(128):
503 for i in range(128):
384 samples.append(b'foo' * 64)
504 samples.append(b'foo' * 64)
385 samples.append(b'bar' * 64)
505 samples.append(b'bar' * 64)
386 samples.append(b'foobar' * 64)
506 samples.append(b'foobar' * 64)
387
507
388 d = zstd.train_dictionary(1024, samples)
508 d = zstd.train_dictionary(1024, samples)
389
509
390 with_dict_id = io.BytesIO()
510 with_dict_id = io.BytesIO()
391 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
511 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
392 with cctx.write_to(with_dict_id) as compressor:
512 with cctx.write_to(with_dict_id) as compressor:
393 compressor.write(b'foobarfoobar')
513 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
394
514
395 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
515 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
396 no_dict_id = io.BytesIO()
516 no_dict_id = io.BytesIO()
397 with cctx.write_to(no_dict_id) as compressor:
517 with cctx.write_to(no_dict_id) as compressor:
398 compressor.write(b'foobarfoobar')
518 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
519
520 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
521 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
522 self.assertEqual(no_params.content_size, 0)
523 self.assertEqual(with_params.content_size, 0)
524 self.assertEqual(no_params.dict_id, 0)
525 self.assertEqual(with_params.dict_id, d.dict_id())
526 self.assertFalse(no_params.has_checksum)
527 self.assertFalse(with_params.has_checksum)
399
528
400 self.assertEqual(len(with_dict_id.getvalue()),
529 self.assertEqual(len(with_dict_id.getvalue()),
401 len(no_dict_id.getvalue()) + 4)
530 len(no_dict_id.getvalue()) + 4)
402
531
403 def test_memory_size(self):
532 def test_memory_size(self):
404 cctx = zstd.ZstdCompressor(level=3)
533 cctx = zstd.ZstdCompressor(level=3)
405 buffer = io.BytesIO()
534 buffer = io.BytesIO()
406 with cctx.write_to(buffer) as compressor:
535 with cctx.write_to(buffer) as compressor:
407 size = compressor.memory_size()
536 size = compressor.memory_size()
408
537
409 self.assertGreater(size, 100000)
538 self.assertGreater(size, 100000)
410
539
411 def test_write_size(self):
540 def test_write_size(self):
412 cctx = zstd.ZstdCompressor(level=3)
541 cctx = zstd.ZstdCompressor(level=3)
413 dest = OpCountingBytesIO()
542 dest = OpCountingBytesIO()
414 with cctx.write_to(dest, write_size=1) as compressor:
543 with cctx.write_to(dest, write_size=1) as compressor:
415 compressor.write(b'foo')
544 self.assertEqual(compressor.write(b'foo'), 0)
416 compressor.write(b'bar')
545 self.assertEqual(compressor.write(b'bar'), 0)
417 compressor.write(b'foobar')
546 self.assertEqual(compressor.write(b'foobar'), 0)
418
547
419 self.assertEqual(len(dest.getvalue()), dest._write_count)
548 self.assertEqual(len(dest.getvalue()), dest._write_count)
420
549
421 def test_flush_repeated(self):
550 def test_flush_repeated(self):
422 cctx = zstd.ZstdCompressor(level=3)
551 cctx = zstd.ZstdCompressor(level=3)
423 dest = OpCountingBytesIO()
552 dest = OpCountingBytesIO()
424 with cctx.write_to(dest) as compressor:
553 with cctx.write_to(dest) as compressor:
425 compressor.write(b'foo')
554 self.assertEqual(compressor.write(b'foo'), 0)
426 self.assertEqual(dest._write_count, 0)
555 self.assertEqual(dest._write_count, 0)
427 compressor.flush()
556 self.assertEqual(compressor.flush(), 12)
428 self.assertEqual(dest._write_count, 1)
557 self.assertEqual(dest._write_count, 1)
429 compressor.write(b'bar')
558 self.assertEqual(compressor.write(b'bar'), 0)
430 self.assertEqual(dest._write_count, 1)
559 self.assertEqual(dest._write_count, 1)
431 compressor.flush()
560 self.assertEqual(compressor.flush(), 6)
432 self.assertEqual(dest._write_count, 2)
561 self.assertEqual(dest._write_count, 2)
433 compressor.write(b'baz')
562 self.assertEqual(compressor.write(b'baz'), 0)
434
563
435 self.assertEqual(dest._write_count, 3)
564 self.assertEqual(dest._write_count, 3)
436
565
437 def test_flush_empty_block(self):
566 def test_flush_empty_block(self):
438 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
567 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
439 dest = OpCountingBytesIO()
568 dest = OpCountingBytesIO()
440 with cctx.write_to(dest) as compressor:
569 with cctx.write_to(dest) as compressor:
441 compressor.write(b'foobar' * 8192)
570 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
442 count = dest._write_count
571 count = dest._write_count
443 offset = dest.tell()
572 offset = dest.tell()
444 compressor.flush()
573 self.assertEqual(compressor.flush(), 23)
445 self.assertGreater(dest._write_count, count)
574 self.assertGreater(dest._write_count, count)
446 self.assertGreater(dest.tell(), offset)
575 self.assertGreater(dest.tell(), offset)
447 offset = dest.tell()
576 offset = dest.tell()
448 # Ending the write here should cause an empty block to be written
577 # Ending the write here should cause an empty block to be written
449 # to denote end of frame.
578 # to denote end of frame.
450
579
451 trailing = dest.getvalue()[offset:]
580 trailing = dest.getvalue()[offset:]
452 # 3 bytes block header + 4 bytes frame checksum
581 # 3 bytes block header + 4 bytes frame checksum
453 self.assertEqual(len(trailing), 7)
582 self.assertEqual(len(trailing), 7)
454
583
455 header = trailing[0:3]
584 header = trailing[0:3]
456 self.assertEqual(header, b'\x01\x00\x00')
585 self.assertEqual(header, b'\x01\x00\x00')
457
586
458
587
588 @make_cffi
459 class TestCompressor_read_from(unittest.TestCase):
589 class TestCompressor_read_from(unittest.TestCase):
460 def test_type_validation(self):
590 def test_type_validation(self):
461 cctx = zstd.ZstdCompressor()
591 cctx = zstd.ZstdCompressor()
462
592
463 # Object with read() works.
593 # Object with read() works.
464 cctx.read_from(io.BytesIO())
594 for chunk in cctx.read_from(io.BytesIO()):
595 pass
465
596
466 # Buffer protocol works.
597 # Buffer protocol works.
467 cctx.read_from(b'foobar')
598 for chunk in cctx.read_from(b'foobar'):
599 pass
468
600
469 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
601 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
470 cctx.read_from(True)
602 for chunk in cctx.read_from(True):
603 pass
471
604
472 def test_read_empty(self):
605 def test_read_empty(self):
473 cctx = zstd.ZstdCompressor(level=1)
606 cctx = zstd.ZstdCompressor(level=1)
474
607
475 source = io.BytesIO()
608 source = io.BytesIO()
476 it = cctx.read_from(source)
609 it = cctx.read_from(source)
477 chunks = list(it)
610 chunks = list(it)
478 self.assertEqual(len(chunks), 1)
611 self.assertEqual(len(chunks), 1)
479 compressed = b''.join(chunks)
612 compressed = b''.join(chunks)
480 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
613 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
481
614
482 # And again with the buffer protocol.
615 # And again with the buffer protocol.
483 it = cctx.read_from(b'')
616 it = cctx.read_from(b'')
484 chunks = list(it)
617 chunks = list(it)
485 self.assertEqual(len(chunks), 1)
618 self.assertEqual(len(chunks), 1)
486 compressed2 = b''.join(chunks)
619 compressed2 = b''.join(chunks)
487 self.assertEqual(compressed2, compressed)
620 self.assertEqual(compressed2, compressed)
488
621
489 def test_read_large(self):
622 def test_read_large(self):
490 cctx = zstd.ZstdCompressor(level=1)
623 cctx = zstd.ZstdCompressor(level=1)
491
624
492 source = io.BytesIO()
625 source = io.BytesIO()
493 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
626 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
494 source.write(b'o')
627 source.write(b'o')
495 source.seek(0)
628 source.seek(0)
496
629
497 # Creating an iterator should not perform any compression until
630 # Creating an iterator should not perform any compression until
498 # first read.
631 # first read.
499 it = cctx.read_from(source, size=len(source.getvalue()))
632 it = cctx.read_from(source, size=len(source.getvalue()))
500 self.assertEqual(source.tell(), 0)
633 self.assertEqual(source.tell(), 0)
501
634
502 # We should have exactly 2 output chunks.
635 # We should have exactly 2 output chunks.
503 chunks = []
636 chunks = []
504 chunk = next(it)
637 chunk = next(it)
505 self.assertIsNotNone(chunk)
638 self.assertIsNotNone(chunk)
506 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
639 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
507 chunks.append(chunk)
640 chunks.append(chunk)
508 chunk = next(it)
641 chunk = next(it)
509 self.assertIsNotNone(chunk)
642 self.assertIsNotNone(chunk)
510 chunks.append(chunk)
643 chunks.append(chunk)
511
644
512 self.assertEqual(source.tell(), len(source.getvalue()))
645 self.assertEqual(source.tell(), len(source.getvalue()))
513
646
514 with self.assertRaises(StopIteration):
647 with self.assertRaises(StopIteration):
515 next(it)
648 next(it)
516
649
517 # And again for good measure.
650 # And again for good measure.
518 with self.assertRaises(StopIteration):
651 with self.assertRaises(StopIteration):
519 next(it)
652 next(it)
520
653
521 # We should get the same output as the one-shot compression mechanism.
654 # We should get the same output as the one-shot compression mechanism.
522 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
655 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
523
656
657 params = zstd.get_frame_parameters(b''.join(chunks))
658 self.assertEqual(params.content_size, 0)
659 self.assertEqual(params.window_size, 262144)
660 self.assertEqual(params.dict_id, 0)
661 self.assertFalse(params.has_checksum)
662
524 # Now check the buffer protocol.
663 # Now check the buffer protocol.
525 it = cctx.read_from(source.getvalue())
664 it = cctx.read_from(source.getvalue())
526 chunks = list(it)
665 chunks = list(it)
527 self.assertEqual(len(chunks), 2)
666 self.assertEqual(len(chunks), 2)
528 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
667 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
529
668
530 def test_read_write_size(self):
669 def test_read_write_size(self):
531 source = OpCountingBytesIO(b'foobarfoobar')
670 source = OpCountingBytesIO(b'foobarfoobar')
532 cctx = zstd.ZstdCompressor(level=3)
671 cctx = zstd.ZstdCompressor(level=3)
533 for chunk in cctx.read_from(source, read_size=1, write_size=1):
672 for chunk in cctx.read_from(source, read_size=1, write_size=1):
534 self.assertEqual(len(chunk), 1)
673 self.assertEqual(len(chunk), 1)
535
674
536 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
675 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
@@ -1,107 +1,186 b''
1 import io
1 import io
2
2
3 try:
3 try:
4 import unittest2 as unittest
4 import unittest2 as unittest
5 except ImportError:
5 except ImportError:
6 import unittest
6 import unittest
7
7
8 try:
8 try:
9 import hypothesis
9 import hypothesis
10 import hypothesis.strategies as strategies
10 import hypothesis.strategies as strategies
11 except ImportError:
11 except ImportError:
12 hypothesis = None
12 hypothesis = None
13
13
14 import zstd
14 import zstd
15
15
16 from . common import (
17 make_cffi,
18 )
19
20
21 @make_cffi
16 class TestCompressionParameters(unittest.TestCase):
22 class TestCompressionParameters(unittest.TestCase):
17 def test_init_bad_arg_type(self):
23 def test_init_bad_arg_type(self):
18 with self.assertRaises(TypeError):
24 with self.assertRaises(TypeError):
19 zstd.CompressionParameters()
25 zstd.CompressionParameters()
20
26
21 with self.assertRaises(TypeError):
27 with self.assertRaises(TypeError):
22 zstd.CompressionParameters(0, 1)
28 zstd.CompressionParameters(0, 1)
23
29
24 def test_bounds(self):
30 def test_bounds(self):
25 zstd.CompressionParameters(zstd.WINDOWLOG_MIN,
31 zstd.CompressionParameters(zstd.WINDOWLOG_MIN,
26 zstd.CHAINLOG_MIN,
32 zstd.CHAINLOG_MIN,
27 zstd.HASHLOG_MIN,
33 zstd.HASHLOG_MIN,
28 zstd.SEARCHLOG_MIN,
34 zstd.SEARCHLOG_MIN,
29 zstd.SEARCHLENGTH_MIN,
35 zstd.SEARCHLENGTH_MIN,
30 zstd.TARGETLENGTH_MIN,
36 zstd.TARGETLENGTH_MIN,
31 zstd.STRATEGY_FAST)
37 zstd.STRATEGY_FAST)
32
38
33 zstd.CompressionParameters(zstd.WINDOWLOG_MAX,
39 zstd.CompressionParameters(zstd.WINDOWLOG_MAX,
34 zstd.CHAINLOG_MAX,
40 zstd.CHAINLOG_MAX,
35 zstd.HASHLOG_MAX,
41 zstd.HASHLOG_MAX,
36 zstd.SEARCHLOG_MAX,
42 zstd.SEARCHLOG_MAX,
37 zstd.SEARCHLENGTH_MAX,
43 zstd.SEARCHLENGTH_MAX,
38 zstd.TARGETLENGTH_MAX,
44 zstd.TARGETLENGTH_MAX,
39 zstd.STRATEGY_BTOPT)
45 zstd.STRATEGY_BTOPT)
40
46
41 def test_get_compression_parameters(self):
47 def test_get_compression_parameters(self):
42 p = zstd.get_compression_parameters(1)
48 p = zstd.get_compression_parameters(1)
43 self.assertIsInstance(p, zstd.CompressionParameters)
49 self.assertIsInstance(p, zstd.CompressionParameters)
44
50
45 self.assertEqual(p[0], 19)
51 self.assertEqual(p.window_log, 19)
52
53 def test_members(self):
54 p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
55 self.assertEqual(p.window_log, 10)
56 self.assertEqual(p.chain_log, 6)
57 self.assertEqual(p.hash_log, 7)
58 self.assertEqual(p.search_log, 4)
59 self.assertEqual(p.search_length, 5)
60 self.assertEqual(p.target_length, 8)
61 self.assertEqual(p.strategy, 1)
62
63
64 @make_cffi
65 class TestFrameParameters(unittest.TestCase):
66 def test_invalid_type(self):
67 with self.assertRaises(TypeError):
68 zstd.get_frame_parameters(None)
69
70 with self.assertRaises(TypeError):
71 zstd.get_frame_parameters(u'foobarbaz')
72
73 def test_invalid_input_sizes(self):
74 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
75 zstd.get_frame_parameters(b'')
76
77 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
78 zstd.get_frame_parameters(zstd.FRAME_HEADER)
79
80 def test_invalid_frame(self):
81 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
82 zstd.get_frame_parameters(b'foobarbaz')
83
84 def test_attributes(self):
85 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
86 self.assertEqual(params.content_size, 0)
87 self.assertEqual(params.window_size, 1024)
88 self.assertEqual(params.dict_id, 0)
89 self.assertFalse(params.has_checksum)
90
91 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
92 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
93 self.assertEqual(params.content_size, 0)
94 self.assertEqual(params.window_size, 1024)
95 self.assertEqual(params.dict_id, 255)
96 self.assertFalse(params.has_checksum)
97
98 # Lowest 3rd bit indicates if checksum is present.
99 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
100 self.assertEqual(params.content_size, 0)
101 self.assertEqual(params.window_size, 1024)
102 self.assertEqual(params.dict_id, 0)
103 self.assertTrue(params.has_checksum)
104
105 # Upper 2 bits indicate content size.
106 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
107 self.assertEqual(params.content_size, 511)
108 self.assertEqual(params.window_size, 1024)
109 self.assertEqual(params.dict_id, 0)
110 self.assertFalse(params.has_checksum)
111
112 # Window descriptor is 2nd byte after frame header.
113 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
114 self.assertEqual(params.content_size, 0)
115 self.assertEqual(params.window_size, 262144)
116 self.assertEqual(params.dict_id, 0)
117 self.assertFalse(params.has_checksum)
118
119 # Set multiple things.
120 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
121 self.assertEqual(params.content_size, 272)
122 self.assertEqual(params.window_size, 262144)
123 self.assertEqual(params.dict_id, 15)
124 self.assertTrue(params.has_checksum)
125
46
126
47 if hypothesis:
127 if hypothesis:
48 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
128 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
49 max_value=zstd.WINDOWLOG_MAX)
129 max_value=zstd.WINDOWLOG_MAX)
50 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
130 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
51 max_value=zstd.CHAINLOG_MAX)
131 max_value=zstd.CHAINLOG_MAX)
52 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
132 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
53 max_value=zstd.HASHLOG_MAX)
133 max_value=zstd.HASHLOG_MAX)
54 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
134 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
55 max_value=zstd.SEARCHLOG_MAX)
135 max_value=zstd.SEARCHLOG_MAX)
56 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
136 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
57 max_value=zstd.SEARCHLENGTH_MAX)
137 max_value=zstd.SEARCHLENGTH_MAX)
58 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
138 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
59 max_value=zstd.TARGETLENGTH_MAX)
139 max_value=zstd.TARGETLENGTH_MAX)
60 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
140 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
61 zstd.STRATEGY_DFAST,
141 zstd.STRATEGY_DFAST,
62 zstd.STRATEGY_GREEDY,
142 zstd.STRATEGY_GREEDY,
63 zstd.STRATEGY_LAZY,
143 zstd.STRATEGY_LAZY,
64 zstd.STRATEGY_LAZY2,
144 zstd.STRATEGY_LAZY2,
65 zstd.STRATEGY_BTLAZY2,
145 zstd.STRATEGY_BTLAZY2,
66 zstd.STRATEGY_BTOPT))
146 zstd.STRATEGY_BTOPT))
67
147
148
149 @make_cffi
68 class TestCompressionParametersHypothesis(unittest.TestCase):
150 class TestCompressionParametersHypothesis(unittest.TestCase):
69 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
151 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
70 s_searchlength, s_targetlength, s_strategy)
152 s_searchlength, s_targetlength, s_strategy)
71 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
153 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
72 searchlength, targetlength, strategy):
154 searchlength, targetlength, strategy):
73 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
155 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
74 searchlog, searchlength,
156 searchlog, searchlength,
75 targetlength, strategy)
157 targetlength, strategy)
76 self.assertEqual(tuple(p),
77 (windowlog, chainlog, hashlog, searchlog,
78 searchlength, targetlength, strategy))
79
158
80 # Verify we can instantiate a compressor with the supplied values.
159 # Verify we can instantiate a compressor with the supplied values.
81 # ZSTD_checkCParams moves the goal posts on us from what's advertised
160 # ZSTD_checkCParams moves the goal posts on us from what's advertised
82 # in the constants. So move along with them.
161 # in the constants. So move along with them.
83 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
162 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
84 searchlength += 1
163 searchlength += 1
85 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
164 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
86 searchlog, searchlength,
165 searchlog, searchlength,
87 targetlength, strategy)
166 targetlength, strategy)
88 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
167 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
89 searchlength -= 1
168 searchlength -= 1
90 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
169 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
91 searchlog, searchlength,
170 searchlog, searchlength,
92 targetlength, strategy)
171 targetlength, strategy)
93
172
94 cctx = zstd.ZstdCompressor(compression_params=p)
173 cctx = zstd.ZstdCompressor(compression_params=p)
95 with cctx.write_to(io.BytesIO()):
174 with cctx.write_to(io.BytesIO()):
96 pass
175 pass
97
176
98 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
177 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
99 s_searchlength, s_targetlength, s_strategy)
178 s_searchlength, s_targetlength, s_strategy)
100 def test_estimate_compression_context_size(self, windowlog, chainlog,
179 def test_estimate_compression_context_size(self, windowlog, chainlog,
101 hashlog, searchlog,
180 hashlog, searchlog,
102 searchlength, targetlength,
181 searchlength, targetlength,
103 strategy):
182 strategy):
104 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
183 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
105 searchlog, searchlength,
184 searchlog, searchlength,
106 targetlength, strategy)
185 targetlength, strategy)
107 size = zstd.estimate_compression_context_size(p)
186 size = zstd.estimate_compression_context_size(p)
@@ -1,478 +1,577 b''
1 import io
1 import io
2 import random
2 import random
3 import struct
3 import struct
4 import sys
4 import sys
5
5
6 try:
6 try:
7 import unittest2 as unittest
7 import unittest2 as unittest
8 except ImportError:
8 except ImportError:
9 import unittest
9 import unittest
10
10
11 import zstd
11 import zstd
12
12
13 from .common import OpCountingBytesIO
13 from .common import (
14 make_cffi,
15 OpCountingBytesIO,
16 )
14
17
15
18
16 if sys.version_info[0] >= 3:
19 if sys.version_info[0] >= 3:
17 next = lambda it: it.__next__()
20 next = lambda it: it.__next__()
18 else:
21 else:
19 next = lambda it: it.next()
22 next = lambda it: it.next()
20
23
21
24
25 @make_cffi
22 class TestDecompressor_decompress(unittest.TestCase):
26 class TestDecompressor_decompress(unittest.TestCase):
23 def test_empty_input(self):
27 def test_empty_input(self):
24 dctx = zstd.ZstdDecompressor()
28 dctx = zstd.ZstdDecompressor()
25
29
26 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
30 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
27 dctx.decompress(b'')
31 dctx.decompress(b'')
28
32
29 def test_invalid_input(self):
33 def test_invalid_input(self):
30 dctx = zstd.ZstdDecompressor()
34 dctx = zstd.ZstdDecompressor()
31
35
32 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
36 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
33 dctx.decompress(b'foobar')
37 dctx.decompress(b'foobar')
34
38
35 def test_no_content_size_in_frame(self):
39 def test_no_content_size_in_frame(self):
36 cctx = zstd.ZstdCompressor(write_content_size=False)
40 cctx = zstd.ZstdCompressor(write_content_size=False)
37 compressed = cctx.compress(b'foobar')
41 compressed = cctx.compress(b'foobar')
38
42
39 dctx = zstd.ZstdDecompressor()
43 dctx = zstd.ZstdDecompressor()
40 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
44 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
41 dctx.decompress(compressed)
45 dctx.decompress(compressed)
42
46
43 def test_content_size_present(self):
47 def test_content_size_present(self):
44 cctx = zstd.ZstdCompressor(write_content_size=True)
48 cctx = zstd.ZstdCompressor(write_content_size=True)
45 compressed = cctx.compress(b'foobar')
49 compressed = cctx.compress(b'foobar')
46
50
47 dctx = zstd.ZstdDecompressor()
51 dctx = zstd.ZstdDecompressor()
48 decompressed = dctx.decompress(compressed)
52 decompressed = dctx.decompress(compressed)
49 self.assertEqual(decompressed, b'foobar')
53 self.assertEqual(decompressed, b'foobar')
50
54
51 def test_max_output_size(self):
55 def test_max_output_size(self):
52 cctx = zstd.ZstdCompressor(write_content_size=False)
56 cctx = zstd.ZstdCompressor(write_content_size=False)
53 source = b'foobar' * 256
57 source = b'foobar' * 256
54 compressed = cctx.compress(source)
58 compressed = cctx.compress(source)
55
59
56 dctx = zstd.ZstdDecompressor()
60 dctx = zstd.ZstdDecompressor()
57 # Will fit into buffer exactly the size of input.
61 # Will fit into buffer exactly the size of input.
58 decompressed = dctx.decompress(compressed, max_output_size=len(source))
62 decompressed = dctx.decompress(compressed, max_output_size=len(source))
59 self.assertEqual(decompressed, source)
63 self.assertEqual(decompressed, source)
60
64
61 # Input size - 1 fails
65 # Input size - 1 fails
62 with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'):
66 with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'):
63 dctx.decompress(compressed, max_output_size=len(source) - 1)
67 dctx.decompress(compressed, max_output_size=len(source) - 1)
64
68
65 # Input size + 1 works
69 # Input size + 1 works
66 decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
70 decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
67 self.assertEqual(decompressed, source)
71 self.assertEqual(decompressed, source)
68
72
69 # A much larger buffer works.
73 # A much larger buffer works.
70 decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
74 decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
71 self.assertEqual(decompressed, source)
75 self.assertEqual(decompressed, source)
72
76
73 def test_stupidly_large_output_buffer(self):
77 def test_stupidly_large_output_buffer(self):
74 cctx = zstd.ZstdCompressor(write_content_size=False)
78 cctx = zstd.ZstdCompressor(write_content_size=False)
75 compressed = cctx.compress(b'foobar' * 256)
79 compressed = cctx.compress(b'foobar' * 256)
76 dctx = zstd.ZstdDecompressor()
80 dctx = zstd.ZstdDecompressor()
77
81
78 # Will get OverflowError on some Python distributions that can't
82 # Will get OverflowError on some Python distributions that can't
79 # handle really large integers.
83 # handle really large integers.
80 with self.assertRaises((MemoryError, OverflowError)):
84 with self.assertRaises((MemoryError, OverflowError)):
81 dctx.decompress(compressed, max_output_size=2**62)
85 dctx.decompress(compressed, max_output_size=2**62)
82
86
83 def test_dictionary(self):
87 def test_dictionary(self):
84 samples = []
88 samples = []
85 for i in range(128):
89 for i in range(128):
86 samples.append(b'foo' * 64)
90 samples.append(b'foo' * 64)
87 samples.append(b'bar' * 64)
91 samples.append(b'bar' * 64)
88 samples.append(b'foobar' * 64)
92 samples.append(b'foobar' * 64)
89
93
90 d = zstd.train_dictionary(8192, samples)
94 d = zstd.train_dictionary(8192, samples)
91
95
92 orig = b'foobar' * 16384
96 orig = b'foobar' * 16384
93 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
97 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
94 compressed = cctx.compress(orig)
98 compressed = cctx.compress(orig)
95
99
96 dctx = zstd.ZstdDecompressor(dict_data=d)
100 dctx = zstd.ZstdDecompressor(dict_data=d)
97 decompressed = dctx.decompress(compressed)
101 decompressed = dctx.decompress(compressed)
98
102
99 self.assertEqual(decompressed, orig)
103 self.assertEqual(decompressed, orig)
100
104
101 def test_dictionary_multiple(self):
105 def test_dictionary_multiple(self):
102 samples = []
106 samples = []
103 for i in range(128):
107 for i in range(128):
104 samples.append(b'foo' * 64)
108 samples.append(b'foo' * 64)
105 samples.append(b'bar' * 64)
109 samples.append(b'bar' * 64)
106 samples.append(b'foobar' * 64)
110 samples.append(b'foobar' * 64)
107
111
108 d = zstd.train_dictionary(8192, samples)
112 d = zstd.train_dictionary(8192, samples)
109
113
110 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
114 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
111 compressed = []
115 compressed = []
112 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
116 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
113 for source in sources:
117 for source in sources:
114 compressed.append(cctx.compress(source))
118 compressed.append(cctx.compress(source))
115
119
116 dctx = zstd.ZstdDecompressor(dict_data=d)
120 dctx = zstd.ZstdDecompressor(dict_data=d)
117 for i in range(len(sources)):
121 for i in range(len(sources)):
118 decompressed = dctx.decompress(compressed[i])
122 decompressed = dctx.decompress(compressed[i])
119 self.assertEqual(decompressed, sources[i])
123 self.assertEqual(decompressed, sources[i])
120
124
121
125
126 @make_cffi
122 class TestDecompressor_copy_stream(unittest.TestCase):
127 class TestDecompressor_copy_stream(unittest.TestCase):
123 def test_no_read(self):
128 def test_no_read(self):
124 source = object()
129 source = object()
125 dest = io.BytesIO()
130 dest = io.BytesIO()
126
131
127 dctx = zstd.ZstdDecompressor()
132 dctx = zstd.ZstdDecompressor()
128 with self.assertRaises(ValueError):
133 with self.assertRaises(ValueError):
129 dctx.copy_stream(source, dest)
134 dctx.copy_stream(source, dest)
130
135
131 def test_no_write(self):
136 def test_no_write(self):
132 source = io.BytesIO()
137 source = io.BytesIO()
133 dest = object()
138 dest = object()
134
139
135 dctx = zstd.ZstdDecompressor()
140 dctx = zstd.ZstdDecompressor()
136 with self.assertRaises(ValueError):
141 with self.assertRaises(ValueError):
137 dctx.copy_stream(source, dest)
142 dctx.copy_stream(source, dest)
138
143
139 def test_empty(self):
144 def test_empty(self):
140 source = io.BytesIO()
145 source = io.BytesIO()
141 dest = io.BytesIO()
146 dest = io.BytesIO()
142
147
143 dctx = zstd.ZstdDecompressor()
148 dctx = zstd.ZstdDecompressor()
144 # TODO should this raise an error?
149 # TODO should this raise an error?
145 r, w = dctx.copy_stream(source, dest)
150 r, w = dctx.copy_stream(source, dest)
146
151
147 self.assertEqual(r, 0)
152 self.assertEqual(r, 0)
148 self.assertEqual(w, 0)
153 self.assertEqual(w, 0)
149 self.assertEqual(dest.getvalue(), b'')
154 self.assertEqual(dest.getvalue(), b'')
150
155
151 def test_large_data(self):
156 def test_large_data(self):
152 source = io.BytesIO()
157 source = io.BytesIO()
153 for i in range(255):
158 for i in range(255):
154 source.write(struct.Struct('>B').pack(i) * 16384)
159 source.write(struct.Struct('>B').pack(i) * 16384)
155 source.seek(0)
160 source.seek(0)
156
161
157 compressed = io.BytesIO()
162 compressed = io.BytesIO()
158 cctx = zstd.ZstdCompressor()
163 cctx = zstd.ZstdCompressor()
159 cctx.copy_stream(source, compressed)
164 cctx.copy_stream(source, compressed)
160
165
161 compressed.seek(0)
166 compressed.seek(0)
162 dest = io.BytesIO()
167 dest = io.BytesIO()
163 dctx = zstd.ZstdDecompressor()
168 dctx = zstd.ZstdDecompressor()
164 r, w = dctx.copy_stream(compressed, dest)
169 r, w = dctx.copy_stream(compressed, dest)
165
170
166 self.assertEqual(r, len(compressed.getvalue()))
171 self.assertEqual(r, len(compressed.getvalue()))
167 self.assertEqual(w, len(source.getvalue()))
172 self.assertEqual(w, len(source.getvalue()))
168
173
169 def test_read_write_size(self):
174 def test_read_write_size(self):
170 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
175 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
171 b'foobarfoobar'))
176 b'foobarfoobar'))
172
177
173 dest = OpCountingBytesIO()
178 dest = OpCountingBytesIO()
174 dctx = zstd.ZstdDecompressor()
179 dctx = zstd.ZstdDecompressor()
175 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
180 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
176
181
177 self.assertEqual(r, len(source.getvalue()))
182 self.assertEqual(r, len(source.getvalue()))
178 self.assertEqual(w, len(b'foobarfoobar'))
183 self.assertEqual(w, len(b'foobarfoobar'))
179 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
184 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
180 self.assertEqual(dest._write_count, len(dest.getvalue()))
185 self.assertEqual(dest._write_count, len(dest.getvalue()))
181
186
182
187
188 @make_cffi
183 class TestDecompressor_decompressobj(unittest.TestCase):
189 class TestDecompressor_decompressobj(unittest.TestCase):
184 def test_simple(self):
190 def test_simple(self):
185 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
191 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
186
192
187 dctx = zstd.ZstdDecompressor()
193 dctx = zstd.ZstdDecompressor()
188 dobj = dctx.decompressobj()
194 dobj = dctx.decompressobj()
189 self.assertEqual(dobj.decompress(data), b'foobar')
195 self.assertEqual(dobj.decompress(data), b'foobar')
190
196
191 def test_reuse(self):
197 def test_reuse(self):
192 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
198 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
193
199
194 dctx = zstd.ZstdDecompressor()
200 dctx = zstd.ZstdDecompressor()
195 dobj = dctx.decompressobj()
201 dobj = dctx.decompressobj()
196 dobj.decompress(data)
202 dobj.decompress(data)
197
203
198 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
204 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
199 dobj.decompress(data)
205 dobj.decompress(data)
200
206
201
207
202 def decompress_via_writer(data):
208 def decompress_via_writer(data):
203 buffer = io.BytesIO()
209 buffer = io.BytesIO()
204 dctx = zstd.ZstdDecompressor()
210 dctx = zstd.ZstdDecompressor()
205 with dctx.write_to(buffer) as decompressor:
211 with dctx.write_to(buffer) as decompressor:
206 decompressor.write(data)
212 decompressor.write(data)
207 return buffer.getvalue()
213 return buffer.getvalue()
208
214
209
215
216 @make_cffi
210 class TestDecompressor_write_to(unittest.TestCase):
217 class TestDecompressor_write_to(unittest.TestCase):
211 def test_empty_roundtrip(self):
218 def test_empty_roundtrip(self):
212 cctx = zstd.ZstdCompressor()
219 cctx = zstd.ZstdCompressor()
213 empty = cctx.compress(b'')
220 empty = cctx.compress(b'')
214 self.assertEqual(decompress_via_writer(empty), b'')
221 self.assertEqual(decompress_via_writer(empty), b'')
215
222
216 def test_large_roundtrip(self):
223 def test_large_roundtrip(self):
217 chunks = []
224 chunks = []
218 for i in range(255):
225 for i in range(255):
219 chunks.append(struct.Struct('>B').pack(i) * 16384)
226 chunks.append(struct.Struct('>B').pack(i) * 16384)
220 orig = b''.join(chunks)
227 orig = b''.join(chunks)
221 cctx = zstd.ZstdCompressor()
228 cctx = zstd.ZstdCompressor()
222 compressed = cctx.compress(orig)
229 compressed = cctx.compress(orig)
223
230
224 self.assertEqual(decompress_via_writer(compressed), orig)
231 self.assertEqual(decompress_via_writer(compressed), orig)
225
232
226 def test_multiple_calls(self):
233 def test_multiple_calls(self):
227 chunks = []
234 chunks = []
228 for i in range(255):
235 for i in range(255):
229 for j in range(255):
236 for j in range(255):
230 chunks.append(struct.Struct('>B').pack(j) * i)
237 chunks.append(struct.Struct('>B').pack(j) * i)
231
238
232 orig = b''.join(chunks)
239 orig = b''.join(chunks)
233 cctx = zstd.ZstdCompressor()
240 cctx = zstd.ZstdCompressor()
234 compressed = cctx.compress(orig)
241 compressed = cctx.compress(orig)
235
242
236 buffer = io.BytesIO()
243 buffer = io.BytesIO()
237 dctx = zstd.ZstdDecompressor()
244 dctx = zstd.ZstdDecompressor()
238 with dctx.write_to(buffer) as decompressor:
245 with dctx.write_to(buffer) as decompressor:
239 pos = 0
246 pos = 0
240 while pos < len(compressed):
247 while pos < len(compressed):
241 pos2 = pos + 8192
248 pos2 = pos + 8192
242 decompressor.write(compressed[pos:pos2])
249 decompressor.write(compressed[pos:pos2])
243 pos += 8192
250 pos += 8192
244 self.assertEqual(buffer.getvalue(), orig)
251 self.assertEqual(buffer.getvalue(), orig)
245
252
246 def test_dictionary(self):
253 def test_dictionary(self):
247 samples = []
254 samples = []
248 for i in range(128):
255 for i in range(128):
249 samples.append(b'foo' * 64)
256 samples.append(b'foo' * 64)
250 samples.append(b'bar' * 64)
257 samples.append(b'bar' * 64)
251 samples.append(b'foobar' * 64)
258 samples.append(b'foobar' * 64)
252
259
253 d = zstd.train_dictionary(8192, samples)
260 d = zstd.train_dictionary(8192, samples)
254
261
255 orig = b'foobar' * 16384
262 orig = b'foobar' * 16384
256 buffer = io.BytesIO()
263 buffer = io.BytesIO()
257 cctx = zstd.ZstdCompressor(dict_data=d)
264 cctx = zstd.ZstdCompressor(dict_data=d)
258 with cctx.write_to(buffer) as compressor:
265 with cctx.write_to(buffer) as compressor:
259 compressor.write(orig)
266 self.assertEqual(compressor.write(orig), 1544)
260
267
261 compressed = buffer.getvalue()
268 compressed = buffer.getvalue()
262 buffer = io.BytesIO()
269 buffer = io.BytesIO()
263
270
264 dctx = zstd.ZstdDecompressor(dict_data=d)
271 dctx = zstd.ZstdDecompressor(dict_data=d)
265 with dctx.write_to(buffer) as decompressor:
272 with dctx.write_to(buffer) as decompressor:
266 decompressor.write(compressed)
273 self.assertEqual(decompressor.write(compressed), len(orig))
267
274
268 self.assertEqual(buffer.getvalue(), orig)
275 self.assertEqual(buffer.getvalue(), orig)
269
276
270 def test_memory_size(self):
277 def test_memory_size(self):
271 dctx = zstd.ZstdDecompressor()
278 dctx = zstd.ZstdDecompressor()
272 buffer = io.BytesIO()
279 buffer = io.BytesIO()
273 with dctx.write_to(buffer) as decompressor:
280 with dctx.write_to(buffer) as decompressor:
274 size = decompressor.memory_size()
281 size = decompressor.memory_size()
275
282
276 self.assertGreater(size, 100000)
283 self.assertGreater(size, 100000)
277
284
278 def test_write_size(self):
285 def test_write_size(self):
279 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
286 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
280 dest = OpCountingBytesIO()
287 dest = OpCountingBytesIO()
281 dctx = zstd.ZstdDecompressor()
288 dctx = zstd.ZstdDecompressor()
282 with dctx.write_to(dest, write_size=1) as decompressor:
289 with dctx.write_to(dest, write_size=1) as decompressor:
283 s = struct.Struct('>B')
290 s = struct.Struct('>B')
284 for c in source:
291 for c in source:
285 if not isinstance(c, str):
292 if not isinstance(c, str):
286 c = s.pack(c)
293 c = s.pack(c)
287 decompressor.write(c)
294 decompressor.write(c)
288
295
289
296
290 self.assertEqual(dest.getvalue(), b'foobarfoobar')
297 self.assertEqual(dest.getvalue(), b'foobarfoobar')
291 self.assertEqual(dest._write_count, len(dest.getvalue()))
298 self.assertEqual(dest._write_count, len(dest.getvalue()))
292
299
293
300
301 @make_cffi
294 class TestDecompressor_read_from(unittest.TestCase):
302 class TestDecompressor_read_from(unittest.TestCase):
295 def test_type_validation(self):
303 def test_type_validation(self):
296 dctx = zstd.ZstdDecompressor()
304 dctx = zstd.ZstdDecompressor()
297
305
298 # Object with read() works.
306 # Object with read() works.
299 dctx.read_from(io.BytesIO())
307 dctx.read_from(io.BytesIO())
300
308
301 # Buffer protocol works.
309 # Buffer protocol works.
302 dctx.read_from(b'foobar')
310 dctx.read_from(b'foobar')
303
311
304 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
312 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
305 dctx.read_from(True)
313 b''.join(dctx.read_from(True))
306
314
307 def test_empty_input(self):
315 def test_empty_input(self):
308 dctx = zstd.ZstdDecompressor()
316 dctx = zstd.ZstdDecompressor()
309
317
310 source = io.BytesIO()
318 source = io.BytesIO()
311 it = dctx.read_from(source)
319 it = dctx.read_from(source)
312 # TODO this is arguably wrong. Should get an error about missing frame foo.
320 # TODO this is arguably wrong. Should get an error about missing frame foo.
313 with self.assertRaises(StopIteration):
321 with self.assertRaises(StopIteration):
314 next(it)
322 next(it)
315
323
316 it = dctx.read_from(b'')
324 it = dctx.read_from(b'')
317 with self.assertRaises(StopIteration):
325 with self.assertRaises(StopIteration):
318 next(it)
326 next(it)
319
327
320 def test_invalid_input(self):
328 def test_invalid_input(self):
321 dctx = zstd.ZstdDecompressor()
329 dctx = zstd.ZstdDecompressor()
322
330
323 source = io.BytesIO(b'foobar')
331 source = io.BytesIO(b'foobar')
324 it = dctx.read_from(source)
332 it = dctx.read_from(source)
325 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
333 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
326 next(it)
334 next(it)
327
335
328 it = dctx.read_from(b'foobar')
336 it = dctx.read_from(b'foobar')
329 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
337 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
330 next(it)
338 next(it)
331
339
332 def test_empty_roundtrip(self):
340 def test_empty_roundtrip(self):
333 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
341 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
334 empty = cctx.compress(b'')
342 empty = cctx.compress(b'')
335
343
336 source = io.BytesIO(empty)
344 source = io.BytesIO(empty)
337 source.seek(0)
345 source.seek(0)
338
346
339 dctx = zstd.ZstdDecompressor()
347 dctx = zstd.ZstdDecompressor()
340 it = dctx.read_from(source)
348 it = dctx.read_from(source)
341
349
342 # No chunks should be emitted since there is no data.
350 # No chunks should be emitted since there is no data.
343 with self.assertRaises(StopIteration):
351 with self.assertRaises(StopIteration):
344 next(it)
352 next(it)
345
353
346 # Again for good measure.
354 # Again for good measure.
347 with self.assertRaises(StopIteration):
355 with self.assertRaises(StopIteration):
348 next(it)
356 next(it)
349
357
350 def test_skip_bytes_too_large(self):
358 def test_skip_bytes_too_large(self):
351 dctx = zstd.ZstdDecompressor()
359 dctx = zstd.ZstdDecompressor()
352
360
353 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
361 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
354 dctx.read_from(b'', skip_bytes=1, read_size=1)
362 b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
355
363
356 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
364 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
357 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
365 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
358
366
359 def test_skip_bytes(self):
367 def test_skip_bytes(self):
360 cctx = zstd.ZstdCompressor(write_content_size=False)
368 cctx = zstd.ZstdCompressor(write_content_size=False)
361 compressed = cctx.compress(b'foobar')
369 compressed = cctx.compress(b'foobar')
362
370
363 dctx = zstd.ZstdDecompressor()
371 dctx = zstd.ZstdDecompressor()
364 output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
372 output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
365 self.assertEqual(output, b'foobar')
373 self.assertEqual(output, b'foobar')
366
374
367 def test_large_output(self):
375 def test_large_output(self):
368 source = io.BytesIO()
376 source = io.BytesIO()
369 source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
377 source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
370 source.write(b'o')
378 source.write(b'o')
371 source.seek(0)
379 source.seek(0)
372
380
373 cctx = zstd.ZstdCompressor(level=1)
381 cctx = zstd.ZstdCompressor(level=1)
374 compressed = io.BytesIO(cctx.compress(source.getvalue()))
382 compressed = io.BytesIO(cctx.compress(source.getvalue()))
375 compressed.seek(0)
383 compressed.seek(0)
376
384
377 dctx = zstd.ZstdDecompressor()
385 dctx = zstd.ZstdDecompressor()
378 it = dctx.read_from(compressed)
386 it = dctx.read_from(compressed)
379
387
380 chunks = []
388 chunks = []
381 chunks.append(next(it))
389 chunks.append(next(it))
382 chunks.append(next(it))
390 chunks.append(next(it))
383
391
384 with self.assertRaises(StopIteration):
392 with self.assertRaises(StopIteration):
385 next(it)
393 next(it)
386
394
387 decompressed = b''.join(chunks)
395 decompressed = b''.join(chunks)
388 self.assertEqual(decompressed, source.getvalue())
396 self.assertEqual(decompressed, source.getvalue())
389
397
390 # And again with buffer protocol.
398 # And again with buffer protocol.
391 it = dctx.read_from(compressed.getvalue())
399 it = dctx.read_from(compressed.getvalue())
392 chunks = []
400 chunks = []
393 chunks.append(next(it))
401 chunks.append(next(it))
394 chunks.append(next(it))
402 chunks.append(next(it))
395
403
396 with self.assertRaises(StopIteration):
404 with self.assertRaises(StopIteration):
397 next(it)
405 next(it)
398
406
399 decompressed = b''.join(chunks)
407 decompressed = b''.join(chunks)
400 self.assertEqual(decompressed, source.getvalue())
408 self.assertEqual(decompressed, source.getvalue())
401
409
402 def test_large_input(self):
410 def test_large_input(self):
403 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
411 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
404 compressed = io.BytesIO()
412 compressed = io.BytesIO()
405 input_size = 0
413 input_size = 0
406 cctx = zstd.ZstdCompressor(level=1)
414 cctx = zstd.ZstdCompressor(level=1)
407 with cctx.write_to(compressed) as compressor:
415 with cctx.write_to(compressed) as compressor:
408 while True:
416 while True:
409 compressor.write(random.choice(bytes))
417 compressor.write(random.choice(bytes))
410 input_size += 1
418 input_size += 1
411
419
412 have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
420 have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
413 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
421 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
414 if have_compressed and have_raw:
422 if have_compressed and have_raw:
415 break
423 break
416
424
417 compressed.seek(0)
425 compressed.seek(0)
418 self.assertGreater(len(compressed.getvalue()),
426 self.assertGreater(len(compressed.getvalue()),
419 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
427 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
420
428
421 dctx = zstd.ZstdDecompressor()
429 dctx = zstd.ZstdDecompressor()
422 it = dctx.read_from(compressed)
430 it = dctx.read_from(compressed)
423
431
424 chunks = []
432 chunks = []
425 chunks.append(next(it))
433 chunks.append(next(it))
426 chunks.append(next(it))
434 chunks.append(next(it))
427 chunks.append(next(it))
435 chunks.append(next(it))
428
436
429 with self.assertRaises(StopIteration):
437 with self.assertRaises(StopIteration):
430 next(it)
438 next(it)
431
439
432 decompressed = b''.join(chunks)
440 decompressed = b''.join(chunks)
433 self.assertEqual(len(decompressed), input_size)
441 self.assertEqual(len(decompressed), input_size)
434
442
435 # And again with buffer protocol.
443 # And again with buffer protocol.
436 it = dctx.read_from(compressed.getvalue())
444 it = dctx.read_from(compressed.getvalue())
437
445
438 chunks = []
446 chunks = []
439 chunks.append(next(it))
447 chunks.append(next(it))
440 chunks.append(next(it))
448 chunks.append(next(it))
441 chunks.append(next(it))
449 chunks.append(next(it))
442
450
443 with self.assertRaises(StopIteration):
451 with self.assertRaises(StopIteration):
444 next(it)
452 next(it)
445
453
446 decompressed = b''.join(chunks)
454 decompressed = b''.join(chunks)
447 self.assertEqual(len(decompressed), input_size)
455 self.assertEqual(len(decompressed), input_size)
448
456
449 def test_interesting(self):
457 def test_interesting(self):
450 # Found this edge case via fuzzing.
458 # Found this edge case via fuzzing.
451 cctx = zstd.ZstdCompressor(level=1)
459 cctx = zstd.ZstdCompressor(level=1)
452
460
453 source = io.BytesIO()
461 source = io.BytesIO()
454
462
455 compressed = io.BytesIO()
463 compressed = io.BytesIO()
456 with cctx.write_to(compressed) as compressor:
464 with cctx.write_to(compressed) as compressor:
457 for i in range(256):
465 for i in range(256):
458 chunk = b'\0' * 1024
466 chunk = b'\0' * 1024
459 compressor.write(chunk)
467 compressor.write(chunk)
460 source.write(chunk)
468 source.write(chunk)
461
469
462 dctx = zstd.ZstdDecompressor()
470 dctx = zstd.ZstdDecompressor()
463
471
464 simple = dctx.decompress(compressed.getvalue(),
472 simple = dctx.decompress(compressed.getvalue(),
465 max_output_size=len(source.getvalue()))
473 max_output_size=len(source.getvalue()))
466 self.assertEqual(simple, source.getvalue())
474 self.assertEqual(simple, source.getvalue())
467
475
468 compressed.seek(0)
476 compressed.seek(0)
469 streamed = b''.join(dctx.read_from(compressed))
477 streamed = b''.join(dctx.read_from(compressed))
470 self.assertEqual(streamed, source.getvalue())
478 self.assertEqual(streamed, source.getvalue())
471
479
472 def test_read_write_size(self):
480 def test_read_write_size(self):
473 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
481 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
474 dctx = zstd.ZstdDecompressor()
482 dctx = zstd.ZstdDecompressor()
475 for chunk in dctx.read_from(source, read_size=1, write_size=1):
483 for chunk in dctx.read_from(source, read_size=1, write_size=1):
476 self.assertEqual(len(chunk), 1)
484 self.assertEqual(len(chunk), 1)
477
485
478 self.assertEqual(source._read_count, len(source.getvalue()))
486 self.assertEqual(source._read_count, len(source.getvalue()))
487
488
489 @make_cffi
490 class TestDecompressor_content_dict_chain(unittest.TestCase):
491 def test_bad_inputs_simple(self):
492 dctx = zstd.ZstdDecompressor()
493
494 with self.assertRaises(TypeError):
495 dctx.decompress_content_dict_chain(b'foo')
496
497 with self.assertRaises(TypeError):
498 dctx.decompress_content_dict_chain((b'foo', b'bar'))
499
500 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
501 dctx.decompress_content_dict_chain([])
502
503 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
504 dctx.decompress_content_dict_chain([u'foo'])
505
506 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
507 dctx.decompress_content_dict_chain([True])
508
509 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
510 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
511
512 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
513 dctx.decompress_content_dict_chain([b'foo' * 8])
514
515 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
516
517 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
518 dctx.decompress_content_dict_chain([no_size])
519
520 # Corrupt first frame.
521 frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
522 frame = frame[0:12] + frame[15:]
523 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
524 dctx.decompress_content_dict_chain([frame])
525
526 def test_bad_subsequent_input(self):
527 initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
528
529 dctx = zstd.ZstdDecompressor()
530
531 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
532 dctx.decompress_content_dict_chain([initial, u'foo'])
533
534 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
535 dctx.decompress_content_dict_chain([initial, None])
536
537 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
538 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
539
540 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
541 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
542
543 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
544
545 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
546 dctx.decompress_content_dict_chain([initial, no_size])
547
548 # Corrupt second frame.
549 cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
550 frame = cctx.compress(b'bar' * 64)
551 frame = frame[0:12] + frame[15:]
552
553 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
554 dctx.decompress_content_dict_chain([initial, frame])
555
556 def test_simple(self):
557 original = [
558 b'foo' * 64,
559 b'foobar' * 64,
560 b'baz' * 64,
561 b'foobaz' * 64,
562 b'foobarbaz' * 64,
563 ]
564
565 chunks = []
566 chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
567 for i, chunk in enumerate(original[1:]):
568 d = zstd.ZstdCompressionDict(original[i])
569 cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
570 chunks.append(cctx.compress(chunk))
571
572 for i in range(1, len(original)):
573 chain = chunks[0:i]
574 expected = original[i - 1]
575 dctx = zstd.ZstdDecompressor()
576 decompressed = dctx.decompress_content_dict_chain(chain)
577 self.assertEqual(decompressed, expected)
@@ -1,17 +1,22 b''
1 try:
1 try:
2 import unittest2 as unittest
2 import unittest2 as unittest
3 except ImportError:
3 except ImportError:
4 import unittest
4 import unittest
5
5
6 import zstd
6 import zstd
7
7
8 from . common import (
9 make_cffi,
10 )
8
11
12
13 @make_cffi
9 class TestSizes(unittest.TestCase):
14 class TestSizes(unittest.TestCase):
10 def test_decompression_size(self):
15 def test_decompression_size(self):
11 size = zstd.estimate_decompression_context_size()
16 size = zstd.estimate_decompression_context_size()
12 self.assertGreater(size, 100000)
17 self.assertGreater(size, 100000)
13
18
14 def test_compression_size(self):
19 def test_compression_size(self):
15 params = zstd.get_compression_parameters(3)
20 params = zstd.get_compression_parameters(3)
16 size = zstd.estimate_compression_context_size(params)
21 size = zstd.estimate_compression_context_size(params)
17 self.assertGreater(size, 100000)
22 self.assertGreater(size, 100000)
@@ -1,48 +1,54 b''
1 from __future__ import unicode_literals
1 from __future__ import unicode_literals
2
2
3 try:
3 try:
4 import unittest2 as unittest
4 import unittest2 as unittest
5 except ImportError:
5 except ImportError:
6 import unittest
6 import unittest
7
7
8 import zstd
8 import zstd
9
9
10 from . common import (
11 make_cffi,
12 )
13
14
15 @make_cffi
10 class TestModuleAttributes(unittest.TestCase):
16 class TestModuleAttributes(unittest.TestCase):
11 def test_version(self):
17 def test_version(self):
12 self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 2))
18 self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3))
13
19
14 def test_constants(self):
20 def test_constants(self):
15 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
21 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
16 self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
22 self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
17
23
18 def test_hasattr(self):
24 def test_hasattr(self):
19 attrs = (
25 attrs = (
20 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
26 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
21 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
27 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
22 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
28 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
23 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
29 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
24 'MAGIC_NUMBER',
30 'MAGIC_NUMBER',
25 'WINDOWLOG_MIN',
31 'WINDOWLOG_MIN',
26 'WINDOWLOG_MAX',
32 'WINDOWLOG_MAX',
27 'CHAINLOG_MIN',
33 'CHAINLOG_MIN',
28 'CHAINLOG_MAX',
34 'CHAINLOG_MAX',
29 'HASHLOG_MIN',
35 'HASHLOG_MIN',
30 'HASHLOG_MAX',
36 'HASHLOG_MAX',
31 'HASHLOG3_MAX',
37 'HASHLOG3_MAX',
32 'SEARCHLOG_MIN',
38 'SEARCHLOG_MIN',
33 'SEARCHLOG_MAX',
39 'SEARCHLOG_MAX',
34 'SEARCHLENGTH_MIN',
40 'SEARCHLENGTH_MIN',
35 'SEARCHLENGTH_MAX',
41 'SEARCHLENGTH_MAX',
36 'TARGETLENGTH_MIN',
42 'TARGETLENGTH_MIN',
37 'TARGETLENGTH_MAX',
43 'TARGETLENGTH_MAX',
38 'STRATEGY_FAST',
44 'STRATEGY_FAST',
39 'STRATEGY_DFAST',
45 'STRATEGY_DFAST',
40 'STRATEGY_GREEDY',
46 'STRATEGY_GREEDY',
41 'STRATEGY_LAZY',
47 'STRATEGY_LAZY',
42 'STRATEGY_LAZY2',
48 'STRATEGY_LAZY2',
43 'STRATEGY_BTLAZY2',
49 'STRATEGY_BTLAZY2',
44 'STRATEGY_BTOPT',
50 'STRATEGY_BTOPT',
45 )
51 )
46
52
47 for a in attrs:
53 for a in attrs:
48 self.assertTrue(hasattr(zstd, a))
54 self.assertTrue(hasattr(zstd, a), a)
@@ -1,64 +1,68 b''
1 import io
1 import io
2
2
3 try:
3 try:
4 import unittest2 as unittest
4 import unittest2 as unittest
5 except ImportError:
5 except ImportError:
6 import unittest
6 import unittest
7
7
8 try:
8 try:
9 import hypothesis
9 import hypothesis
10 import hypothesis.strategies as strategies
10 import hypothesis.strategies as strategies
11 except ImportError:
11 except ImportError:
12 raise unittest.SkipTest('hypothesis not available')
12 raise unittest.SkipTest('hypothesis not available')
13
13
14 import zstd
14 import zstd
15
15
16 from .common import (
17 make_cffi,
18 )
16
19
17 compression_levels = strategies.integers(min_value=1, max_value=22)
20 compression_levels = strategies.integers(min_value=1, max_value=22)
18
21
19
22
23 @make_cffi
20 class TestRoundTrip(unittest.TestCase):
24 class TestRoundTrip(unittest.TestCase):
21 @hypothesis.given(strategies.binary(), compression_levels)
25 @hypothesis.given(strategies.binary(), compression_levels)
22 def test_compress_write_to(self, data, level):
26 def test_compress_write_to(self, data, level):
23 """Random data from compress() roundtrips via write_to."""
27 """Random data from compress() roundtrips via write_to."""
24 cctx = zstd.ZstdCompressor(level=level)
28 cctx = zstd.ZstdCompressor(level=level)
25 compressed = cctx.compress(data)
29 compressed = cctx.compress(data)
26
30
27 buffer = io.BytesIO()
31 buffer = io.BytesIO()
28 dctx = zstd.ZstdDecompressor()
32 dctx = zstd.ZstdDecompressor()
29 with dctx.write_to(buffer) as decompressor:
33 with dctx.write_to(buffer) as decompressor:
30 decompressor.write(compressed)
34 decompressor.write(compressed)
31
35
32 self.assertEqual(buffer.getvalue(), data)
36 self.assertEqual(buffer.getvalue(), data)
33
37
34 @hypothesis.given(strategies.binary(), compression_levels)
38 @hypothesis.given(strategies.binary(), compression_levels)
35 def test_compressor_write_to_decompressor_write_to(self, data, level):
39 def test_compressor_write_to_decompressor_write_to(self, data, level):
36 """Random data from compressor write_to roundtrips via write_to."""
40 """Random data from compressor write_to roundtrips via write_to."""
37 compress_buffer = io.BytesIO()
41 compress_buffer = io.BytesIO()
38 decompressed_buffer = io.BytesIO()
42 decompressed_buffer = io.BytesIO()
39
43
40 cctx = zstd.ZstdCompressor(level=level)
44 cctx = zstd.ZstdCompressor(level=level)
41 with cctx.write_to(compress_buffer) as compressor:
45 with cctx.write_to(compress_buffer) as compressor:
42 compressor.write(data)
46 compressor.write(data)
43
47
44 dctx = zstd.ZstdDecompressor()
48 dctx = zstd.ZstdDecompressor()
45 with dctx.write_to(decompressed_buffer) as decompressor:
49 with dctx.write_to(decompressed_buffer) as decompressor:
46 decompressor.write(compress_buffer.getvalue())
50 decompressor.write(compress_buffer.getvalue())
47
51
48 self.assertEqual(decompressed_buffer.getvalue(), data)
52 self.assertEqual(decompressed_buffer.getvalue(), data)
49
53
50 @hypothesis.given(strategies.binary(average_size=1048576))
54 @hypothesis.given(strategies.binary(average_size=1048576))
51 @hypothesis.settings(perform_health_check=False)
55 @hypothesis.settings(perform_health_check=False)
52 def test_compressor_write_to_decompressor_write_to_larger(self, data):
56 def test_compressor_write_to_decompressor_write_to_larger(self, data):
53 compress_buffer = io.BytesIO()
57 compress_buffer = io.BytesIO()
54 decompressed_buffer = io.BytesIO()
58 decompressed_buffer = io.BytesIO()
55
59
56 cctx = zstd.ZstdCompressor(level=5)
60 cctx = zstd.ZstdCompressor(level=5)
57 with cctx.write_to(compress_buffer) as compressor:
61 with cctx.write_to(compress_buffer) as compressor:
58 compressor.write(data)
62 compressor.write(data)
59
63
60 dctx = zstd.ZstdDecompressor()
64 dctx = zstd.ZstdDecompressor()
61 with dctx.write_to(decompressed_buffer) as decompressor:
65 with dctx.write_to(decompressed_buffer) as decompressor:
62 decompressor.write(compress_buffer.getvalue())
66 decompressor.write(compress_buffer.getvalue())
63
67
64 self.assertEqual(decompressed_buffer.getvalue(), data)
68 self.assertEqual(decompressed_buffer.getvalue(), data)
@@ -1,46 +1,50 b''
1 import sys
1 import sys
2
2
3 try:
3 try:
4 import unittest2 as unittest
4 import unittest2 as unittest
5 except ImportError:
5 except ImportError:
6 import unittest
6 import unittest
7
7
8 import zstd
8 import zstd
9
9
10 from . common import (
11 make_cffi,
12 )
10
13
11 if sys.version_info[0] >= 3:
14 if sys.version_info[0] >= 3:
12 int_type = int
15 int_type = int
13 else:
16 else:
14 int_type = long
17 int_type = long
15
18
16
19
20 @make_cffi
17 class TestTrainDictionary(unittest.TestCase):
21 class TestTrainDictionary(unittest.TestCase):
18 def test_no_args(self):
22 def test_no_args(self):
19 with self.assertRaises(TypeError):
23 with self.assertRaises(TypeError):
20 zstd.train_dictionary()
24 zstd.train_dictionary()
21
25
22 def test_bad_args(self):
26 def test_bad_args(self):
23 with self.assertRaises(TypeError):
27 with self.assertRaises(TypeError):
24 zstd.train_dictionary(8192, u'foo')
28 zstd.train_dictionary(8192, u'foo')
25
29
26 with self.assertRaises(ValueError):
30 with self.assertRaises(ValueError):
27 zstd.train_dictionary(8192, [u'foo'])
31 zstd.train_dictionary(8192, [u'foo'])
28
32
29 def test_basic(self):
33 def test_basic(self):
30 samples = []
34 samples = []
31 for i in range(128):
35 for i in range(128):
32 samples.append(b'foo' * 64)
36 samples.append(b'foo' * 64)
33 samples.append(b'bar' * 64)
37 samples.append(b'bar' * 64)
34 samples.append(b'foobar' * 64)
38 samples.append(b'foobar' * 64)
35 samples.append(b'baz' * 64)
39 samples.append(b'baz' * 64)
36 samples.append(b'foobaz' * 64)
40 samples.append(b'foobaz' * 64)
37 samples.append(b'bazfoo' * 64)
41 samples.append(b'bazfoo' * 64)
38
42
39 d = zstd.train_dictionary(8192, samples)
43 d = zstd.train_dictionary(8192, samples)
40 self.assertLessEqual(len(d), 8192)
44 self.assertLessEqual(len(d), 8192)
41
45
42 dict_id = d.dict_id()
46 dict_id = d.dict_id()
43 self.assertIsInstance(dict_id, int_type)
47 self.assertIsInstance(dict_id, int_type)
44
48
45 data = d.as_bytes()
49 data = d.as_bytes()
46 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
50 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
@@ -1,136 +1,145 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 /* A Python C extension for Zstandard. */
9 /* A Python C extension for Zstandard. */
10
10
11 #include "python-zstandard.h"
11 #include "python-zstandard.h"
12
12
13 PyObject *ZstdError;
13 PyObject *ZstdError;
14
14
15 PyDoc_STRVAR(estimate_compression_context_size__doc__,
15 PyDoc_STRVAR(estimate_compression_context_size__doc__,
16 "estimate_compression_context_size(compression_parameters)\n"
16 "estimate_compression_context_size(compression_parameters)\n"
17 "\n"
17 "\n"
18 "Give the amount of memory allocated for a compression context given a\n"
18 "Give the amount of memory allocated for a compression context given a\n"
19 "CompressionParameters instance");
19 "CompressionParameters instance");
20
20
21 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
21 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
22 "estimate_decompression_context_size()\n"
22 "estimate_decompression_context_size()\n"
23 "\n"
23 "\n"
24 "Estimate the amount of memory allocated to a decompression context.\n"
24 "Estimate the amount of memory allocated to a decompression context.\n"
25 );
25 );
26
26
27 static PyObject* estimate_decompression_context_size(PyObject* self) {
27 static PyObject* estimate_decompression_context_size(PyObject* self) {
28 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
28 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
29 }
29 }
30
30
31 PyDoc_STRVAR(get_compression_parameters__doc__,
31 PyDoc_STRVAR(get_compression_parameters__doc__,
32 "get_compression_parameters(compression_level[, source_size[, dict_size]])\n"
32 "get_compression_parameters(compression_level[, source_size[, dict_size]])\n"
33 "\n"
33 "\n"
34 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
34 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
35 "optional input size and dictionary size");
35 "optional input size and dictionary size");
36
36
37 PyDoc_STRVAR(get_frame_parameters__doc__,
38 "get_frame_parameters(data)\n"
39 "\n"
40 "Obtains a ``FrameParameters`` instance by parsing data.\n");
41
37 PyDoc_STRVAR(train_dictionary__doc__,
42 PyDoc_STRVAR(train_dictionary__doc__,
38 "train_dictionary(dict_size, samples)\n"
43 "train_dictionary(dict_size, samples)\n"
39 "\n"
44 "\n"
40 "Train a dictionary from sample data.\n"
45 "Train a dictionary from sample data.\n"
41 "\n"
46 "\n"
42 "A compression dictionary of size ``dict_size`` will be created from the\n"
47 "A compression dictionary of size ``dict_size`` will be created from the\n"
43 "iterable of samples provided by ``samples``.\n"
48 "iterable of samples provided by ``samples``.\n"
44 "\n"
49 "\n"
45 "The raw dictionary content will be returned\n");
50 "The raw dictionary content will be returned\n");
46
51
47 static char zstd_doc[] = "Interface to zstandard";
52 static char zstd_doc[] = "Interface to zstandard";
48
53
49 static PyMethodDef zstd_methods[] = {
54 static PyMethodDef zstd_methods[] = {
50 { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
55 { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
51 METH_VARARGS, estimate_compression_context_size__doc__ },
56 METH_VARARGS, estimate_compression_context_size__doc__ },
52 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
57 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
53 METH_NOARGS, estimate_decompression_context_size__doc__ },
58 METH_NOARGS, estimate_decompression_context_size__doc__ },
54 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
59 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
55 METH_VARARGS, get_compression_parameters__doc__ },
60 METH_VARARGS, get_compression_parameters__doc__ },
61 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
62 METH_VARARGS, get_frame_parameters__doc__ },
56 { "train_dictionary", (PyCFunction)train_dictionary,
63 { "train_dictionary", (PyCFunction)train_dictionary,
57 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
64 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
58 { NULL, NULL }
65 { NULL, NULL }
59 };
66 };
60
67
61 void compressobj_module_init(PyObject* mod);
68 void compressobj_module_init(PyObject* mod);
62 void compressor_module_init(PyObject* mod);
69 void compressor_module_init(PyObject* mod);
63 void compressionparams_module_init(PyObject* mod);
70 void compressionparams_module_init(PyObject* mod);
64 void constants_module_init(PyObject* mod);
71 void constants_module_init(PyObject* mod);
65 void dictparams_module_init(PyObject* mod);
72 void dictparams_module_init(PyObject* mod);
66 void compressiondict_module_init(PyObject* mod);
73 void compressiondict_module_init(PyObject* mod);
67 void compressionwriter_module_init(PyObject* mod);
74 void compressionwriter_module_init(PyObject* mod);
68 void compressoriterator_module_init(PyObject* mod);
75 void compressoriterator_module_init(PyObject* mod);
69 void decompressor_module_init(PyObject* mod);
76 void decompressor_module_init(PyObject* mod);
70 void decompressobj_module_init(PyObject* mod);
77 void decompressobj_module_init(PyObject* mod);
71 void decompressionwriter_module_init(PyObject* mod);
78 void decompressionwriter_module_init(PyObject* mod);
72 void decompressoriterator_module_init(PyObject* mod);
79 void decompressoriterator_module_init(PyObject* mod);
80 void frameparams_module_init(PyObject* mod);
73
81
74 void zstd_module_init(PyObject* m) {
82 void zstd_module_init(PyObject* m) {
75 /* python-zstandard relies on unstable zstd C API features. This means
83 /* python-zstandard relies on unstable zstd C API features. This means
76 that changes in zstd may break expectations in python-zstandard.
84 that changes in zstd may break expectations in python-zstandard.
77
85
78 python-zstandard is distributed with a copy of the zstd sources.
86 python-zstandard is distributed with a copy of the zstd sources.
79 python-zstandard is only guaranteed to work with the bundled version
87 python-zstandard is only guaranteed to work with the bundled version
80 of zstd.
88 of zstd.
81
89
82 However, downstream redistributors or packagers may unbundle zstd
90 However, downstream redistributors or packagers may unbundle zstd
83 from python-zstandard. This can result in a mismatch between zstd
91 from python-zstandard. This can result in a mismatch between zstd
84 versions and API semantics. This essentially "voids the warranty"
92 versions and API semantics. This essentially "voids the warranty"
85 of python-zstandard and may cause undefined behavior.
93 of python-zstandard and may cause undefined behavior.
86
94
87 We detect this mismatch here and refuse to load the module if this
95 We detect this mismatch here and refuse to load the module if this
88 scenario is detected.
96 scenario is detected.
89 */
97 */
90 if (ZSTD_VERSION_NUMBER != 10102 || ZSTD_versionNumber() != 10102) {
98 if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
91 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
99 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
92 return;
100 return;
93 }
101 }
94
102
95 compressionparams_module_init(m);
103 compressionparams_module_init(m);
96 dictparams_module_init(m);
104 dictparams_module_init(m);
97 compressiondict_module_init(m);
105 compressiondict_module_init(m);
98 compressobj_module_init(m);
106 compressobj_module_init(m);
99 compressor_module_init(m);
107 compressor_module_init(m);
100 compressionwriter_module_init(m);
108 compressionwriter_module_init(m);
101 compressoriterator_module_init(m);
109 compressoriterator_module_init(m);
102 constants_module_init(m);
110 constants_module_init(m);
103 decompressor_module_init(m);
111 decompressor_module_init(m);
104 decompressobj_module_init(m);
112 decompressobj_module_init(m);
105 decompressionwriter_module_init(m);
113 decompressionwriter_module_init(m);
106 decompressoriterator_module_init(m);
114 decompressoriterator_module_init(m);
115 frameparams_module_init(m);
107 }
116 }
108
117
109 #if PY_MAJOR_VERSION >= 3
118 #if PY_MAJOR_VERSION >= 3
110 static struct PyModuleDef zstd_module = {
119 static struct PyModuleDef zstd_module = {
111 PyModuleDef_HEAD_INIT,
120 PyModuleDef_HEAD_INIT,
112 "zstd",
121 "zstd",
113 zstd_doc,
122 zstd_doc,
114 -1,
123 -1,
115 zstd_methods
124 zstd_methods
116 };
125 };
117
126
118 PyMODINIT_FUNC PyInit_zstd(void) {
127 PyMODINIT_FUNC PyInit_zstd(void) {
119 PyObject *m = PyModule_Create(&zstd_module);
128 PyObject *m = PyModule_Create(&zstd_module);
120 if (m) {
129 if (m) {
121 zstd_module_init(m);
130 zstd_module_init(m);
122 if (PyErr_Occurred()) {
131 if (PyErr_Occurred()) {
123 Py_DECREF(m);
132 Py_DECREF(m);
124 m = NULL;
133 m = NULL;
125 }
134 }
126 }
135 }
127 return m;
136 return m;
128 }
137 }
129 #else
138 #else
130 PyMODINIT_FUNC initzstd(void) {
139 PyMODINIT_FUNC initzstd(void) {
131 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
140 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
132 if (m) {
141 if (m) {
133 zstd_module_init(m);
142 zstd_module_init(m);
134 }
143 }
135 }
144 }
136 #endif
145 #endif
@@ -1,372 +1,372 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
8 */
9
9
10 #ifndef MEM_H_MODULE
10 #ifndef MEM_H_MODULE
11 #define MEM_H_MODULE
11 #define MEM_H_MODULE
12
12
13 #if defined (__cplusplus)
13 #if defined (__cplusplus)
14 extern "C" {
14 extern "C" {
15 #endif
15 #endif
16
16
17 /*-****************************************
17 /*-****************************************
18 * Dependencies
18 * Dependencies
19 ******************************************/
19 ******************************************/
20 #include <stddef.h> /* size_t, ptrdiff_t */
20 #include <stddef.h> /* size_t, ptrdiff_t */
21 #include <string.h> /* memcpy */
21 #include <string.h> /* memcpy */
22
22
23
23
24 /*-****************************************
24 /*-****************************************
25 * Compiler specifics
25 * Compiler specifics
26 ******************************************/
26 ******************************************/
27 #if defined(_MSC_VER) /* Visual Studio */
27 #if defined(_MSC_VER) /* Visual Studio */
28 # include <stdlib.h> /* _byteswap_ulong */
28 # include <stdlib.h> /* _byteswap_ulong */
29 # include <intrin.h> /* _byteswap_* */
29 # include <intrin.h> /* _byteswap_* */
30 #endif
30 #endif
31 #if defined(__GNUC__)
31 #if defined(__GNUC__)
32 # define MEM_STATIC static __inline __attribute__((unused))
32 # define MEM_STATIC static __inline __attribute__((unused))
33 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
33 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
34 # define MEM_STATIC static inline
34 # define MEM_STATIC static inline
35 #elif defined(_MSC_VER)
35 #elif defined(_MSC_VER)
36 # define MEM_STATIC static __inline
36 # define MEM_STATIC static __inline
37 #else
37 #else
38 # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
38 # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
39 #endif
39 #endif
40
40
41 /* code only tested on 32 and 64 bits systems */
41 /* code only tested on 32 and 64 bits systems */
42 #define MEM_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
42 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
43 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
43 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
44
44
45
45
46 /*-**************************************************************
46 /*-**************************************************************
47 * Basic Types
47 * Basic Types
48 *****************************************************************/
48 *****************************************************************/
49 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
49 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
50 # include <stdint.h>
50 # include <stdint.h>
51 typedef uint8_t BYTE;
51 typedef uint8_t BYTE;
52 typedef uint16_t U16;
52 typedef uint16_t U16;
53 typedef int16_t S16;
53 typedef int16_t S16;
54 typedef uint32_t U32;
54 typedef uint32_t U32;
55 typedef int32_t S32;
55 typedef int32_t S32;
56 typedef uint64_t U64;
56 typedef uint64_t U64;
57 typedef int64_t S64;
57 typedef int64_t S64;
58 typedef intptr_t iPtrDiff;
58 typedef intptr_t iPtrDiff;
59 #else
59 #else
60 typedef unsigned char BYTE;
60 typedef unsigned char BYTE;
61 typedef unsigned short U16;
61 typedef unsigned short U16;
62 typedef signed short S16;
62 typedef signed short S16;
63 typedef unsigned int U32;
63 typedef unsigned int U32;
64 typedef signed int S32;
64 typedef signed int S32;
65 typedef unsigned long long U64;
65 typedef unsigned long long U64;
66 typedef signed long long S64;
66 typedef signed long long S64;
67 typedef ptrdiff_t iPtrDiff;
67 typedef ptrdiff_t iPtrDiff;
68 #endif
68 #endif
69
69
70
70
71 /*-**************************************************************
71 /*-**************************************************************
72 * Memory I/O
72 * Memory I/O
73 *****************************************************************/
73 *****************************************************************/
74 /* MEM_FORCE_MEMORY_ACCESS :
74 /* MEM_FORCE_MEMORY_ACCESS :
75 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
75 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
76 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
76 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
77 * The below switch allow to select different access method for improved performance.
77 * The below switch allow to select different access method for improved performance.
78 * Method 0 (default) : use `memcpy()`. Safe and portable.
78 * Method 0 (default) : use `memcpy()`. Safe and portable.
79 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
79 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
80 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
80 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
81 * Method 2 : direct access. This method is portable but violate C standard.
81 * Method 2 : direct access. This method is portable but violate C standard.
82 * It can generate buggy code on targets depending on alignment.
82 * It can generate buggy code on targets depending on alignment.
83 * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
83 * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
84 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
84 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
85 * Prefer these methods in priority order (0 > 1 > 2)
85 * Prefer these methods in priority order (0 > 1 > 2)
86 */
86 */
87 #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
87 #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
88 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
88 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
89 # define MEM_FORCE_MEMORY_ACCESS 2
89 # define MEM_FORCE_MEMORY_ACCESS 2
90 # elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
90 # elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
91 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
91 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
92 # define MEM_FORCE_MEMORY_ACCESS 1
92 # define MEM_FORCE_MEMORY_ACCESS 1
93 # endif
93 # endif
94 #endif
94 #endif
95
95
96 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
96 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
97 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
97 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
98
98
99 MEM_STATIC unsigned MEM_isLittleEndian(void)
99 MEM_STATIC unsigned MEM_isLittleEndian(void)
100 {
100 {
101 const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
101 const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
102 return one.c[0];
102 return one.c[0];
103 }
103 }
104
104
105 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
105 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
106
106
107 /* violates C standard, by lying on structure alignment.
107 /* violates C standard, by lying on structure alignment.
108 Only use if no other choice to achieve best performance on target platform */
108 Only use if no other choice to achieve best performance on target platform */
109 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
109 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
110 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
110 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
111 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
111 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
112 MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
112 MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
113
113
114 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
114 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
115 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
115 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
116 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
116 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
117
117
118 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
118 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
119
119
120 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
120 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
121 /* currently only defined for gcc and icc */
121 /* currently only defined for gcc and icc */
122 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
122 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
123 __pragma( pack(push, 1) )
123 __pragma( pack(push, 1) )
124 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
124 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
125 __pragma( pack(pop) )
125 __pragma( pack(pop) )
126 #else
126 #else
127 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
127 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
128 #endif
128 #endif
129
129
130 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
130 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
131 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
131 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
132 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
132 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
133 MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
133 MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
134
134
135 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
135 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
136 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
136 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
137 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
137 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
138
138
139 #else
139 #else
140
140
141 /* default method, safe and standard.
141 /* default method, safe and standard.
142 can sometimes prove slower */
142 can sometimes prove slower */
143
143
144 MEM_STATIC U16 MEM_read16(const void* memPtr)
144 MEM_STATIC U16 MEM_read16(const void* memPtr)
145 {
145 {
146 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
146 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
147 }
147 }
148
148
149 MEM_STATIC U32 MEM_read32(const void* memPtr)
149 MEM_STATIC U32 MEM_read32(const void* memPtr)
150 {
150 {
151 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
151 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
152 }
152 }
153
153
154 MEM_STATIC U64 MEM_read64(const void* memPtr)
154 MEM_STATIC U64 MEM_read64(const void* memPtr)
155 {
155 {
156 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
156 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
157 }
157 }
158
158
159 MEM_STATIC size_t MEM_readST(const void* memPtr)
159 MEM_STATIC size_t MEM_readST(const void* memPtr)
160 {
160 {
161 size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
161 size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
162 }
162 }
163
163
164 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
164 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
165 {
165 {
166 memcpy(memPtr, &value, sizeof(value));
166 memcpy(memPtr, &value, sizeof(value));
167 }
167 }
168
168
169 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
169 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
170 {
170 {
171 memcpy(memPtr, &value, sizeof(value));
171 memcpy(memPtr, &value, sizeof(value));
172 }
172 }
173
173
174 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
174 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
175 {
175 {
176 memcpy(memPtr, &value, sizeof(value));
176 memcpy(memPtr, &value, sizeof(value));
177 }
177 }
178
178
179 #endif /* MEM_FORCE_MEMORY_ACCESS */
179 #endif /* MEM_FORCE_MEMORY_ACCESS */
180
180
181 MEM_STATIC U32 MEM_swap32(U32 in)
181 MEM_STATIC U32 MEM_swap32(U32 in)
182 {
182 {
183 #if defined(_MSC_VER) /* Visual Studio */
183 #if defined(_MSC_VER) /* Visual Studio */
184 return _byteswap_ulong(in);
184 return _byteswap_ulong(in);
185 #elif defined (__GNUC__)
185 #elif defined (__GNUC__)
186 return __builtin_bswap32(in);
186 return __builtin_bswap32(in);
187 #else
187 #else
188 return ((in << 24) & 0xff000000 ) |
188 return ((in << 24) & 0xff000000 ) |
189 ((in << 8) & 0x00ff0000 ) |
189 ((in << 8) & 0x00ff0000 ) |
190 ((in >> 8) & 0x0000ff00 ) |
190 ((in >> 8) & 0x0000ff00 ) |
191 ((in >> 24) & 0x000000ff );
191 ((in >> 24) & 0x000000ff );
192 #endif
192 #endif
193 }
193 }
194
194
195 MEM_STATIC U64 MEM_swap64(U64 in)
195 MEM_STATIC U64 MEM_swap64(U64 in)
196 {
196 {
197 #if defined(_MSC_VER) /* Visual Studio */
197 #if defined(_MSC_VER) /* Visual Studio */
198 return _byteswap_uint64(in);
198 return _byteswap_uint64(in);
199 #elif defined (__GNUC__)
199 #elif defined (__GNUC__)
200 return __builtin_bswap64(in);
200 return __builtin_bswap64(in);
201 #else
201 #else
202 return ((in << 56) & 0xff00000000000000ULL) |
202 return ((in << 56) & 0xff00000000000000ULL) |
203 ((in << 40) & 0x00ff000000000000ULL) |
203 ((in << 40) & 0x00ff000000000000ULL) |
204 ((in << 24) & 0x0000ff0000000000ULL) |
204 ((in << 24) & 0x0000ff0000000000ULL) |
205 ((in << 8) & 0x000000ff00000000ULL) |
205 ((in << 8) & 0x000000ff00000000ULL) |
206 ((in >> 8) & 0x00000000ff000000ULL) |
206 ((in >> 8) & 0x00000000ff000000ULL) |
207 ((in >> 24) & 0x0000000000ff0000ULL) |
207 ((in >> 24) & 0x0000000000ff0000ULL) |
208 ((in >> 40) & 0x000000000000ff00ULL) |
208 ((in >> 40) & 0x000000000000ff00ULL) |
209 ((in >> 56) & 0x00000000000000ffULL);
209 ((in >> 56) & 0x00000000000000ffULL);
210 #endif
210 #endif
211 }
211 }
212
212
213 MEM_STATIC size_t MEM_swapST(size_t in)
213 MEM_STATIC size_t MEM_swapST(size_t in)
214 {
214 {
215 if (MEM_32bits())
215 if (MEM_32bits())
216 return (size_t)MEM_swap32((U32)in);
216 return (size_t)MEM_swap32((U32)in);
217 else
217 else
218 return (size_t)MEM_swap64((U64)in);
218 return (size_t)MEM_swap64((U64)in);
219 }
219 }
220
220
221 /*=== Little endian r/w ===*/
221 /*=== Little endian r/w ===*/
222
222
223 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
223 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
224 {
224 {
225 if (MEM_isLittleEndian())
225 if (MEM_isLittleEndian())
226 return MEM_read16(memPtr);
226 return MEM_read16(memPtr);
227 else {
227 else {
228 const BYTE* p = (const BYTE*)memPtr;
228 const BYTE* p = (const BYTE*)memPtr;
229 return (U16)(p[0] + (p[1]<<8));
229 return (U16)(p[0] + (p[1]<<8));
230 }
230 }
231 }
231 }
232
232
233 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
233 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
234 {
234 {
235 if (MEM_isLittleEndian()) {
235 if (MEM_isLittleEndian()) {
236 MEM_write16(memPtr, val);
236 MEM_write16(memPtr, val);
237 } else {
237 } else {
238 BYTE* p = (BYTE*)memPtr;
238 BYTE* p = (BYTE*)memPtr;
239 p[0] = (BYTE)val;
239 p[0] = (BYTE)val;
240 p[1] = (BYTE)(val>>8);
240 p[1] = (BYTE)(val>>8);
241 }
241 }
242 }
242 }
243
243
244 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
244 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
245 {
245 {
246 return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
246 return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
247 }
247 }
248
248
249 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
249 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
250 {
250 {
251 MEM_writeLE16(memPtr, (U16)val);
251 MEM_writeLE16(memPtr, (U16)val);
252 ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
252 ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
253 }
253 }
254
254
255 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
255 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
256 {
256 {
257 if (MEM_isLittleEndian())
257 if (MEM_isLittleEndian())
258 return MEM_read32(memPtr);
258 return MEM_read32(memPtr);
259 else
259 else
260 return MEM_swap32(MEM_read32(memPtr));
260 return MEM_swap32(MEM_read32(memPtr));
261 }
261 }
262
262
263 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
263 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
264 {
264 {
265 if (MEM_isLittleEndian())
265 if (MEM_isLittleEndian())
266 MEM_write32(memPtr, val32);
266 MEM_write32(memPtr, val32);
267 else
267 else
268 MEM_write32(memPtr, MEM_swap32(val32));
268 MEM_write32(memPtr, MEM_swap32(val32));
269 }
269 }
270
270
271 MEM_STATIC U64 MEM_readLE64(const void* memPtr)
271 MEM_STATIC U64 MEM_readLE64(const void* memPtr)
272 {
272 {
273 if (MEM_isLittleEndian())
273 if (MEM_isLittleEndian())
274 return MEM_read64(memPtr);
274 return MEM_read64(memPtr);
275 else
275 else
276 return MEM_swap64(MEM_read64(memPtr));
276 return MEM_swap64(MEM_read64(memPtr));
277 }
277 }
278
278
279 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
279 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
280 {
280 {
281 if (MEM_isLittleEndian())
281 if (MEM_isLittleEndian())
282 MEM_write64(memPtr, val64);
282 MEM_write64(memPtr, val64);
283 else
283 else
284 MEM_write64(memPtr, MEM_swap64(val64));
284 MEM_write64(memPtr, MEM_swap64(val64));
285 }
285 }
286
286
287 MEM_STATIC size_t MEM_readLEST(const void* memPtr)
287 MEM_STATIC size_t MEM_readLEST(const void* memPtr)
288 {
288 {
289 if (MEM_32bits())
289 if (MEM_32bits())
290 return (size_t)MEM_readLE32(memPtr);
290 return (size_t)MEM_readLE32(memPtr);
291 else
291 else
292 return (size_t)MEM_readLE64(memPtr);
292 return (size_t)MEM_readLE64(memPtr);
293 }
293 }
294
294
295 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
295 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
296 {
296 {
297 if (MEM_32bits())
297 if (MEM_32bits())
298 MEM_writeLE32(memPtr, (U32)val);
298 MEM_writeLE32(memPtr, (U32)val);
299 else
299 else
300 MEM_writeLE64(memPtr, (U64)val);
300 MEM_writeLE64(memPtr, (U64)val);
301 }
301 }
302
302
303 /*=== Big endian r/w ===*/
303 /*=== Big endian r/w ===*/
304
304
305 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
305 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
306 {
306 {
307 if (MEM_isLittleEndian())
307 if (MEM_isLittleEndian())
308 return MEM_swap32(MEM_read32(memPtr));
308 return MEM_swap32(MEM_read32(memPtr));
309 else
309 else
310 return MEM_read32(memPtr);
310 return MEM_read32(memPtr);
311 }
311 }
312
312
313 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
313 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
314 {
314 {
315 if (MEM_isLittleEndian())
315 if (MEM_isLittleEndian())
316 MEM_write32(memPtr, MEM_swap32(val32));
316 MEM_write32(memPtr, MEM_swap32(val32));
317 else
317 else
318 MEM_write32(memPtr, val32);
318 MEM_write32(memPtr, val32);
319 }
319 }
320
320
321 MEM_STATIC U64 MEM_readBE64(const void* memPtr)
321 MEM_STATIC U64 MEM_readBE64(const void* memPtr)
322 {
322 {
323 if (MEM_isLittleEndian())
323 if (MEM_isLittleEndian())
324 return MEM_swap64(MEM_read64(memPtr));
324 return MEM_swap64(MEM_read64(memPtr));
325 else
325 else
326 return MEM_read64(memPtr);
326 return MEM_read64(memPtr);
327 }
327 }
328
328
329 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
329 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
330 {
330 {
331 if (MEM_isLittleEndian())
331 if (MEM_isLittleEndian())
332 MEM_write64(memPtr, MEM_swap64(val64));
332 MEM_write64(memPtr, MEM_swap64(val64));
333 else
333 else
334 MEM_write64(memPtr, val64);
334 MEM_write64(memPtr, val64);
335 }
335 }
336
336
337 MEM_STATIC size_t MEM_readBEST(const void* memPtr)
337 MEM_STATIC size_t MEM_readBEST(const void* memPtr)
338 {
338 {
339 if (MEM_32bits())
339 if (MEM_32bits())
340 return (size_t)MEM_readBE32(memPtr);
340 return (size_t)MEM_readBE32(memPtr);
341 else
341 else
342 return (size_t)MEM_readBE64(memPtr);
342 return (size_t)MEM_readBE64(memPtr);
343 }
343 }
344
344
345 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
345 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
346 {
346 {
347 if (MEM_32bits())
347 if (MEM_32bits())
348 MEM_writeBE32(memPtr, (U32)val);
348 MEM_writeBE32(memPtr, (U32)val);
349 else
349 else
350 MEM_writeBE64(memPtr, (U64)val);
350 MEM_writeBE64(memPtr, (U64)val);
351 }
351 }
352
352
353
353
354 /* function safe only for comparisons */
354 /* function safe only for comparisons */
355 MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
355 MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
356 {
356 {
357 switch (length)
357 switch (length)
358 {
358 {
359 default :
359 default :
360 case 4 : return MEM_read32(memPtr);
360 case 4 : return MEM_read32(memPtr);
361 case 3 : if (MEM_isLittleEndian())
361 case 3 : if (MEM_isLittleEndian())
362 return MEM_read32(memPtr)<<8;
362 return MEM_read32(memPtr)<<8;
363 else
363 else
364 return MEM_read32(memPtr)>>8;
364 return MEM_read32(memPtr)>>8;
365 }
365 }
366 }
366 }
367
367
368 #if defined (__cplusplus)
368 #if defined (__cplusplus)
369 }
369 }
370 #endif
370 #endif
371
371
372 #endif /* MEM_H_MODULE */
372 #endif /* MEM_H_MODULE */
@@ -1,77 +1,73 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
8 */
9
9
10
10
11
11
12 /*-*************************************
12 /*-*************************************
13 * Dependencies
13 * Dependencies
14 ***************************************/
14 ***************************************/
15 #include <stdlib.h> /* malloc */
15 #include <stdlib.h> /* malloc */
16 #include "error_private.h"
16 #include "error_private.h"
17 #define ZSTD_STATIC_LINKING_ONLY
17 #define ZSTD_STATIC_LINKING_ONLY
18 #include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
18 #include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
19
19
20
20
21 /*-****************************************
21 /*-****************************************
22 * Version
22 * Version
23 ******************************************/
23 ******************************************/
24 unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
24 unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
25
25
26
26
27 /*-****************************************
27 /*-****************************************
28 * ZSTD Error Management
28 * ZSTD Error Management
29 ******************************************/
29 ******************************************/
30 /*! ZSTD_isError() :
30 /*! ZSTD_isError() :
31 * tells if a return value is an error code */
31 * tells if a return value is an error code */
32 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
32 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
33
33
34 /*! ZSTD_getErrorName() :
34 /*! ZSTD_getErrorName() :
35 * provides error code string from function result (useful for debugging) */
35 * provides error code string from function result (useful for debugging) */
36 const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
36 const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
37
37
38 /*! ZSTD_getError() :
38 /*! ZSTD_getError() :
39 * convert a `size_t` function result into a proper ZSTD_errorCode enum */
39 * convert a `size_t` function result into a proper ZSTD_errorCode enum */
40 ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
40 ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
41
41
42 /*! ZSTD_getErrorString() :
42 /*! ZSTD_getErrorString() :
43 * provides error code string from enum */
43 * provides error code string from enum */
44 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
44 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
45
45
46 /* --- ZBUFF Error Management (deprecated) --- */
47 unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
48 const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
49
50
46
51 /*=**************************************************************
47 /*=**************************************************************
52 * Custom allocator
48 * Custom allocator
53 ****************************************************************/
49 ****************************************************************/
54 /* default uses stdlib */
50 /* default uses stdlib */
55 void* ZSTD_defaultAllocFunction(void* opaque, size_t size)
51 void* ZSTD_defaultAllocFunction(void* opaque, size_t size)
56 {
52 {
57 void* address = malloc(size);
53 void* address = malloc(size);
58 (void)opaque;
54 (void)opaque;
59 return address;
55 return address;
60 }
56 }
61
57
62 void ZSTD_defaultFreeFunction(void* opaque, void* address)
58 void ZSTD_defaultFreeFunction(void* opaque, void* address)
63 {
59 {
64 (void)opaque;
60 (void)opaque;
65 free(address);
61 free(address);
66 }
62 }
67
63
68 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
64 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
69 {
65 {
70 return customMem.customAlloc(customMem.opaque, size);
66 return customMem.customAlloc(customMem.opaque, size);
71 }
67 }
72
68
73 void ZSTD_free(void* ptr, ZSTD_customMem customMem)
69 void ZSTD_free(void* ptr, ZSTD_customMem customMem)
74 {
70 {
75 if (ptr!=NULL)
71 if (ptr!=NULL)
76 customMem.customFree(customMem.opaque, ptr);
72 customMem.customFree(customMem.opaque, ptr);
77 }
73 }
@@ -1,60 +1,74 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
8 */
9
9
10 #ifndef ZSTD_ERRORS_H_398273423
10 #ifndef ZSTD_ERRORS_H_398273423
11 #define ZSTD_ERRORS_H_398273423
11 #define ZSTD_ERRORS_H_398273423
12
12
13 #if defined (__cplusplus)
13 #if defined (__cplusplus)
14 extern "C" {
14 extern "C" {
15 #endif
15 #endif
16
16
17 /*===== dependency =====*/
17 /*===== dependency =====*/
18 #include <stddef.h> /* size_t */
18 #include <stddef.h> /* size_t */
19
19
20
20
21 /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
22 #if defined(__GNUC__) && (__GNUC__ >= 4)
23 # define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
24 #else
25 # define ZSTDERRORLIB_VISIBILITY
26 #endif
27 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28 # define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
29 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
30 # define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
31 #else
32 # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
33 #endif
34
21 /*-****************************************
35 /*-****************************************
22 * error codes list
36 * error codes list
23 ******************************************/
37 ******************************************/
24 typedef enum {
38 typedef enum {
25 ZSTD_error_no_error,
39 ZSTD_error_no_error,
26 ZSTD_error_GENERIC,
40 ZSTD_error_GENERIC,
27 ZSTD_error_prefix_unknown,
41 ZSTD_error_prefix_unknown,
28 ZSTD_error_version_unsupported,
42 ZSTD_error_version_unsupported,
29 ZSTD_error_parameter_unknown,
43 ZSTD_error_parameter_unknown,
30 ZSTD_error_frameParameter_unsupported,
44 ZSTD_error_frameParameter_unsupported,
31 ZSTD_error_frameParameter_unsupportedBy32bits,
45 ZSTD_error_frameParameter_unsupportedBy32bits,
32 ZSTD_error_frameParameter_windowTooLarge,
46 ZSTD_error_frameParameter_windowTooLarge,
33 ZSTD_error_compressionParameter_unsupported,
47 ZSTD_error_compressionParameter_unsupported,
34 ZSTD_error_init_missing,
48 ZSTD_error_init_missing,
35 ZSTD_error_memory_allocation,
49 ZSTD_error_memory_allocation,
36 ZSTD_error_stage_wrong,
50 ZSTD_error_stage_wrong,
37 ZSTD_error_dstSize_tooSmall,
51 ZSTD_error_dstSize_tooSmall,
38 ZSTD_error_srcSize_wrong,
52 ZSTD_error_srcSize_wrong,
39 ZSTD_error_corruption_detected,
53 ZSTD_error_corruption_detected,
40 ZSTD_error_checksum_wrong,
54 ZSTD_error_checksum_wrong,
41 ZSTD_error_tableLog_tooLarge,
55 ZSTD_error_tableLog_tooLarge,
42 ZSTD_error_maxSymbolValue_tooLarge,
56 ZSTD_error_maxSymbolValue_tooLarge,
43 ZSTD_error_maxSymbolValue_tooSmall,
57 ZSTD_error_maxSymbolValue_tooSmall,
44 ZSTD_error_dictionary_corrupted,
58 ZSTD_error_dictionary_corrupted,
45 ZSTD_error_dictionary_wrong,
59 ZSTD_error_dictionary_wrong,
46 ZSTD_error_maxCode
60 ZSTD_error_maxCode
47 } ZSTD_ErrorCode;
61 } ZSTD_ErrorCode;
48
62
49 /*! ZSTD_getErrorCode() :
63 /*! ZSTD_getErrorCode() :
50 convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
64 convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
51 which can be used to compare directly with enum list published into "error_public.h" */
65 which can be used to compare directly with enum list published into "error_public.h" */
52 ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
66 ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
53 const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
67 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
54
68
55
69
56 #if defined (__cplusplus)
70 #if defined (__cplusplus)
57 }
71 }
58 #endif
72 #endif
59
73
60 #endif /* ZSTD_ERRORS_H_398273423 */
74 #endif /* ZSTD_ERRORS_H_398273423 */
@@ -1,270 +1,279 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
8 */
9
9
10 #ifndef ZSTD_CCOMMON_H_MODULE
10 #ifndef ZSTD_CCOMMON_H_MODULE
11 #define ZSTD_CCOMMON_H_MODULE
11 #define ZSTD_CCOMMON_H_MODULE
12
12
13 /*-*******************************************************
13 /*-*******************************************************
14 * Compiler specifics
14 * Compiler specifics
15 *********************************************************/
15 *********************************************************/
16 #ifdef _MSC_VER /* Visual Studio */
16 #ifdef _MSC_VER /* Visual Studio */
17 # define FORCE_INLINE static __forceinline
17 # define FORCE_INLINE static __forceinline
18 # include <intrin.h> /* For Visual 2005 */
18 # include <intrin.h> /* For Visual 2005 */
19 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
19 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
20 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
20 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
21 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
21 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
22 #else
22 #else
23 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
23 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
24 # ifdef __GNUC__
24 # ifdef __GNUC__
25 # define FORCE_INLINE static inline __attribute__((always_inline))
25 # define FORCE_INLINE static inline __attribute__((always_inline))
26 # else
26 # else
27 # define FORCE_INLINE static inline
27 # define FORCE_INLINE static inline
28 # endif
28 # endif
29 # else
29 # else
30 # define FORCE_INLINE static
30 # define FORCE_INLINE static
31 # endif /* __STDC_VERSION__ */
31 # endif /* __STDC_VERSION__ */
32 #endif
32 #endif
33
33
34 #ifdef _MSC_VER
34 #ifdef _MSC_VER
35 # define FORCE_NOINLINE static __declspec(noinline)
35 # define FORCE_NOINLINE static __declspec(noinline)
36 #else
36 #else
37 # ifdef __GNUC__
37 # ifdef __GNUC__
38 # define FORCE_NOINLINE static __attribute__((__noinline__))
38 # define FORCE_NOINLINE static __attribute__((__noinline__))
39 # else
39 # else
40 # define FORCE_NOINLINE static
40 # define FORCE_NOINLINE static
41 # endif
41 # endif
42 #endif
42 #endif
43
43
44
44
45 /*-*************************************
45 /*-*************************************
46 * Dependencies
46 * Dependencies
47 ***************************************/
47 ***************************************/
48 #include "mem.h"
48 #include "mem.h"
49 #include "error_private.h"
49 #include "error_private.h"
50 #define ZSTD_STATIC_LINKING_ONLY
50 #define ZSTD_STATIC_LINKING_ONLY
51 #include "zstd.h"
51 #include "zstd.h"
52
52
53
53
54 /*-*************************************
54 /*-*************************************
55 * shared macros
55 * shared macros
56 ***************************************/
56 ***************************************/
57 #define MIN(a,b) ((a)<(b) ? (a) : (b))
57 #define MIN(a,b) ((a)<(b) ? (a) : (b))
58 #define MAX(a,b) ((a)>(b) ? (a) : (b))
58 #define MAX(a,b) ((a)>(b) ? (a) : (b))
59 #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
59 #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
60 #define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */
60 #define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */
61
61
62
62
63 /*-*************************************
63 /*-*************************************
64 * Common constants
64 * Common constants
65 ***************************************/
65 ***************************************/
66 #define ZSTD_OPT_NUM (1<<12)
66 #define ZSTD_OPT_NUM (1<<12)
67 #define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */
67 #define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */
68
68
69 #define ZSTD_REP_NUM 3 /* number of repcodes */
69 #define ZSTD_REP_NUM 3 /* number of repcodes */
70 #define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */
70 #define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */
71 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
71 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
72 #define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
72 #define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
73 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
73 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
74
74
75 #define KB *(1 <<10)
75 #define KB *(1 <<10)
76 #define MB *(1 <<20)
76 #define MB *(1 <<20)
77 #define GB *(1U<<30)
77 #define GB *(1U<<30)
78
78
79 #define BIT7 128
79 #define BIT7 128
80 #define BIT6 64
80 #define BIT6 64
81 #define BIT5 32
81 #define BIT5 32
82 #define BIT4 16
82 #define BIT4 16
83 #define BIT1 2
83 #define BIT1 2
84 #define BIT0 1
84 #define BIT0 1
85
85
86 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
86 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
87 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
87 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
88 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
88 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
89
89
90 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
90 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
91 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
91 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
92 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
92 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
93
93
94 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
94 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
95 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
95 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
96
96
97 #define HufLog 12
97 #define HufLog 12
98 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
98 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
99
99
100 #define LONGNBSEQ 0x7F00
100 #define LONGNBSEQ 0x7F00
101
101
102 #define MINMATCH 3
102 #define MINMATCH 3
103 #define EQUAL_READ32 4
103 #define EQUAL_READ32 4
104
104
105 #define Litbits 8
105 #define Litbits 8
106 #define MaxLit ((1<<Litbits) - 1)
106 #define MaxLit ((1<<Litbits) - 1)
107 #define MaxML 52
107 #define MaxML 52
108 #define MaxLL 35
108 #define MaxLL 35
109 #define MaxOff 28
109 #define MaxOff 28
110 #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
110 #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
111 #define MLFSELog 9
111 #define MLFSELog 9
112 #define LLFSELog 9
112 #define LLFSELog 9
113 #define OffFSELog 8
113 #define OffFSELog 8
114
114
115 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
116 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
117 13,14,15,16 };
117 13,14,15,16 };
118 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
118 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
120 -1,-1,-1,-1 };
120 -1,-1,-1,-1 };
121 #define LL_DEFAULTNORMLOG 6 /* for static allocation */
121 #define LL_DEFAULTNORMLOG 6 /* for static allocation */
122 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
122 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
123
123
124 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
124 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
126 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
127 12,13,14,15,16 };
127 12,13,14,15,16 };
128 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
128 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
131 -1,-1,-1,-1,-1 };
131 -1,-1,-1,-1,-1 };
132 #define ML_DEFAULTNORMLOG 6 /* for static allocation */
132 #define ML_DEFAULTNORMLOG 6 /* for static allocation */
133 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
133 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
134
134
135 static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
135 static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
136 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
137 #define OF_DEFAULTNORMLOG 5 /* for static allocation */
137 #define OF_DEFAULTNORMLOG 5 /* for static allocation */
138 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
138 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
139
139
140
140
141 /*-*******************************************
141 /*-*******************************************
142 * Shared functions to include for inlining
142 * Shared functions to include for inlining
143 *********************************************/
143 *********************************************/
144 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
144 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
145 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
145 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
146
146
147 /*! ZSTD_wildcopy() :
147 /*! ZSTD_wildcopy() :
148 * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
148 * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
149 #define WILDCOPY_OVERLENGTH 8
149 #define WILDCOPY_OVERLENGTH 8
150 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
150 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
151 {
151 {
152 const BYTE* ip = (const BYTE*)src;
152 const BYTE* ip = (const BYTE*)src;
153 BYTE* op = (BYTE*)dst;
153 BYTE* op = (BYTE*)dst;
154 BYTE* const oend = op + length;
154 BYTE* const oend = op + length;
155 do
155 do
156 COPY8(op, ip)
156 COPY8(op, ip)
157 while (op < oend);
157 while (op < oend);
158 }
158 }
159
159
160 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
160 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
161 {
161 {
162 const BYTE* ip = (const BYTE*)src;
162 const BYTE* ip = (const BYTE*)src;
163 BYTE* op = (BYTE*)dst;
163 BYTE* op = (BYTE*)dst;
164 BYTE* const oend = (BYTE*)dstEnd;
164 BYTE* const oend = (BYTE*)dstEnd;
165 do
165 do
166 COPY8(op, ip)
166 COPY8(op, ip)
167 while (op < oend);
167 while (op < oend);
168 }
168 }
169
169
170
170
171 /*-*******************************************
171 /*-*******************************************
172 * Private interfaces
172 * Private interfaces
173 *********************************************/
173 *********************************************/
174 typedef struct ZSTD_stats_s ZSTD_stats_t;
174 typedef struct ZSTD_stats_s ZSTD_stats_t;
175
175
176 typedef struct {
176 typedef struct {
177 U32 off;
177 U32 off;
178 U32 len;
178 U32 len;
179 } ZSTD_match_t;
179 } ZSTD_match_t;
180
180
181 typedef struct {
181 typedef struct {
182 U32 price;
182 U32 price;
183 U32 off;
183 U32 off;
184 U32 mlen;
184 U32 mlen;
185 U32 litlen;
185 U32 litlen;
186 U32 rep[ZSTD_REP_NUM];
186 U32 rep[ZSTD_REP_NUM];
187 } ZSTD_optimal_t;
187 } ZSTD_optimal_t;
188
188
189
189
190 typedef struct seqDef_s {
190 typedef struct seqDef_s {
191 U32 offset;
191 U32 offset;
192 U16 litLength;
192 U16 litLength;
193 U16 matchLength;
193 U16 matchLength;
194 } seqDef;
194 } seqDef;
195
195
196
196
197 typedef struct {
197 typedef struct {
198 seqDef* sequencesStart;
198 seqDef* sequencesStart;
199 seqDef* sequences;
199 seqDef* sequences;
200 BYTE* litStart;
200 BYTE* litStart;
201 BYTE* lit;
201 BYTE* lit;
202 BYTE* llCode;
202 BYTE* llCode;
203 BYTE* mlCode;
203 BYTE* mlCode;
204 BYTE* ofCode;
204 BYTE* ofCode;
205 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
205 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
206 U32 longLengthPos;
206 U32 longLengthPos;
207 /* opt */
207 /* opt */
208 ZSTD_optimal_t* priceTable;
208 ZSTD_optimal_t* priceTable;
209 ZSTD_match_t* matchTable;
209 ZSTD_match_t* matchTable;
210 U32* matchLengthFreq;
210 U32* matchLengthFreq;
211 U32* litLengthFreq;
211 U32* litLengthFreq;
212 U32* litFreq;
212 U32* litFreq;
213 U32* offCodeFreq;
213 U32* offCodeFreq;
214 U32 matchLengthSum;
214 U32 matchLengthSum;
215 U32 matchSum;
215 U32 matchSum;
216 U32 litLengthSum;
216 U32 litLengthSum;
217 U32 litSum;
217 U32 litSum;
218 U32 offCodeSum;
218 U32 offCodeSum;
219 U32 log2matchLengthSum;
219 U32 log2matchLengthSum;
220 U32 log2matchSum;
220 U32 log2matchSum;
221 U32 log2litLengthSum;
221 U32 log2litLengthSum;
222 U32 log2litSum;
222 U32 log2litSum;
223 U32 log2offCodeSum;
223 U32 log2offCodeSum;
224 U32 factor;
224 U32 factor;
225 U32 staticPrices;
225 U32 staticPrices;
226 U32 cachedPrice;
226 U32 cachedPrice;
227 U32 cachedLitLength;
227 U32 cachedLitLength;
228 const BYTE* cachedLiterals;
228 const BYTE* cachedLiterals;
229 } seqStore_t;
229 } seqStore_t;
230
230
231 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
231 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
232 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
232 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
233 int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
233 int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
234
234
235 /* custom memory allocation functions */
235 /* custom memory allocation functions */
236 void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
236 void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
237 void ZSTD_defaultFreeFunction(void* opaque, void* address);
237 void ZSTD_defaultFreeFunction(void* opaque, void* address);
238 #ifndef ZSTD_DLL_IMPORT
238 #ifndef ZSTD_DLL_IMPORT
239 static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
239 static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
240 #endif
240 #endif
241 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
241 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
242 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
242 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
243
243
244
244
245 /*====== common function ======*/
245 /*====== common function ======*/
246
246
247 MEM_STATIC U32 ZSTD_highbit32(U32 val)
247 MEM_STATIC U32 ZSTD_highbit32(U32 val)
248 {
248 {
249 # if defined(_MSC_VER) /* Visual */
249 # if defined(_MSC_VER) /* Visual */
250 unsigned long r=0;
250 unsigned long r=0;
251 _BitScanReverse(&r, val);
251 _BitScanReverse(&r, val);
252 return (unsigned)r;
252 return (unsigned)r;
253 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
253 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
254 return 31 - __builtin_clz(val);
254 return 31 - __builtin_clz(val);
255 # else /* Software version */
255 # else /* Software version */
256 static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
256 static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
257 U32 v = val;
257 U32 v = val;
258 int r;
258 int r;
259 v |= v >> 1;
259 v |= v >> 1;
260 v |= v >> 2;
260 v |= v >> 2;
261 v |= v >> 4;
261 v |= v >> 4;
262 v |= v >> 8;
262 v |= v >> 8;
263 v |= v >> 16;
263 v |= v >> 16;
264 r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
264 r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
265 return r;
265 return r;
266 # endif
266 # endif
267 }
267 }
268
268
269
269
270 /* hidden functions */
271
272 /* ZSTD_invalidateRepCodes() :
273 * ensures next compression will not use repcodes from previous block.
274 * Note : only works with regular variant;
275 * do not use with extDict variant ! */
276 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
277
278
270 #endif /* ZSTD_CCOMMON_H_MODULE */
279 #endif /* ZSTD_CCOMMON_H_MODULE */
@@ -1,3291 +1,3334 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
8 */
9
9
10
10
11 /*-*************************************
11 /*-*************************************
12 * Dependencies
12 * Dependencies
13 ***************************************/
13 ***************************************/
14 #include <string.h> /* memset */
14 #include <string.h> /* memset */
15 #include "mem.h"
15 #include "mem.h"
16 #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
16 #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
17 #include "xxhash.h" /* XXH_reset, update, digest */
17 #include "xxhash.h" /* XXH_reset, update, digest */
18 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
18 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
19 #include "fse.h"
19 #include "fse.h"
20 #define HUF_STATIC_LINKING_ONLY
20 #define HUF_STATIC_LINKING_ONLY
21 #include "huf.h"
21 #include "huf.h"
22 #include "zstd_internal.h" /* includes zstd.h */
22 #include "zstd_internal.h" /* includes zstd.h */
23
23
24
24
25 /*-*************************************
25 /*-*************************************
26 * Constants
26 * Constants
27 ***************************************/
27 ***************************************/
28 static const U32 g_searchStrength = 8; /* control skip over incompressible data */
28 static const U32 g_searchStrength = 8; /* control skip over incompressible data */
29 #define HASH_READ_SIZE 8
29 #define HASH_READ_SIZE 8
30 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
30 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
31
31
32
32
33 /*-*************************************
33 /*-*************************************
34 * Helper functions
34 * Helper functions
35 ***************************************/
35 ***************************************/
36 #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
36 #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
37 size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
37 size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
38
38
39
39
40 /*-*************************************
40 /*-*************************************
41 * Sequence storage
41 * Sequence storage
42 ***************************************/
42 ***************************************/
43 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
43 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
44 {
44 {
45 ssPtr->lit = ssPtr->litStart;
45 ssPtr->lit = ssPtr->litStart;
46 ssPtr->sequences = ssPtr->sequencesStart;
46 ssPtr->sequences = ssPtr->sequencesStart;
47 ssPtr->longLengthID = 0;
47 ssPtr->longLengthID = 0;
48 }
48 }
49
49
50
50
51 /*-*************************************
51 /*-*************************************
52 * Context memory management
52 * Context memory management
53 ***************************************/
53 ***************************************/
54 struct ZSTD_CCtx_s
54 struct ZSTD_CCtx_s {
55 {
56 const BYTE* nextSrc; /* next block here to continue on current prefix */
55 const BYTE* nextSrc; /* next block here to continue on current prefix */
57 const BYTE* base; /* All regular indexes relative to this position */
56 const BYTE* base; /* All regular indexes relative to this position */
58 const BYTE* dictBase; /* extDict indexes relative to this position */
57 const BYTE* dictBase; /* extDict indexes relative to this position */
59 U32 dictLimit; /* below that point, need extDict */
58 U32 dictLimit; /* below that point, need extDict */
60 U32 lowLimit; /* below that point, no more data */
59 U32 lowLimit; /* below that point, no more data */
61 U32 nextToUpdate; /* index from which to continue dictionary update */
60 U32 nextToUpdate; /* index from which to continue dictionary update */
62 U32 nextToUpdate3; /* index from which to continue dictionary update */
61 U32 nextToUpdate3; /* index from which to continue dictionary update */
63 U32 hashLog3; /* dispatch table : larger == faster, more memory */
62 U32 hashLog3; /* dispatch table : larger == faster, more memory */
64 U32 loadedDictEnd;
63 U32 loadedDictEnd; /* index of end of dictionary */
64 U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
65 ZSTD_compressionStage_e stage;
65 ZSTD_compressionStage_e stage;
66 U32 rep[ZSTD_REP_NUM];
66 U32 rep[ZSTD_REP_NUM];
67 U32 savedRep[ZSTD_REP_NUM];
67 U32 repToConfirm[ZSTD_REP_NUM];
68 U32 dictID;
68 U32 dictID;
69 ZSTD_parameters params;
69 ZSTD_parameters params;
70 void* workSpace;
70 void* workSpace;
71 size_t workSpaceSize;
71 size_t workSpaceSize;
72 size_t blockSize;
72 size_t blockSize;
73 U64 frameContentSize;
73 U64 frameContentSize;
74 XXH64_state_t xxhState;
74 XXH64_state_t xxhState;
75 ZSTD_customMem customMem;
75 ZSTD_customMem customMem;
76
76
77 seqStore_t seqStore; /* sequences storage ptrs */
77 seqStore_t seqStore; /* sequences storage ptrs */
78 U32* hashTable;
78 U32* hashTable;
79 U32* hashTable3;
79 U32* hashTable3;
80 U32* chainTable;
80 U32* chainTable;
81 HUF_CElt* hufTable;
81 HUF_CElt* hufTable;
82 U32 flagStaticTables;
82 U32 flagStaticTables;
83 FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
83 FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
84 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
84 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
85 FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
85 FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
86 unsigned tmpCounters[1024];
86 unsigned tmpCounters[1024];
87 };
87 };
88
88
89 ZSTD_CCtx* ZSTD_createCCtx(void)
89 ZSTD_CCtx* ZSTD_createCCtx(void)
90 {
90 {
91 return ZSTD_createCCtx_advanced(defaultCustomMem);
91 return ZSTD_createCCtx_advanced(defaultCustomMem);
92 }
92 }
93
93
94 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
94 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
95 {
95 {
96 ZSTD_CCtx* cctx;
96 ZSTD_CCtx* cctx;
97
97
98 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
98 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
99 if (!customMem.customAlloc || !customMem.customFree) return NULL;
99 if (!customMem.customAlloc || !customMem.customFree) return NULL;
100
100
101 cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
101 cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
102 if (!cctx) return NULL;
102 if (!cctx) return NULL;
103 memset(cctx, 0, sizeof(ZSTD_CCtx));
103 memset(cctx, 0, sizeof(ZSTD_CCtx));
104 memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
104 cctx->customMem = customMem;
105 return cctx;
105 return cctx;
106 }
106 }
107
107
108 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
108 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
109 {
109 {
110 if (cctx==NULL) return 0; /* support free on NULL */
110 if (cctx==NULL) return 0; /* support free on NULL */
111 ZSTD_free(cctx->workSpace, cctx->customMem);
111 ZSTD_free(cctx->workSpace, cctx->customMem);
112 ZSTD_free(cctx, cctx->customMem);
112 ZSTD_free(cctx, cctx->customMem);
113 return 0; /* reserved as a potential error code in the future */
113 return 0; /* reserved as a potential error code in the future */
114 }
114 }
115
115
116 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
116 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
117 {
117 {
118 if (cctx==NULL) return 0; /* support sizeof on NULL */
118 if (cctx==NULL) return 0; /* support sizeof on NULL */
119 return sizeof(*cctx) + cctx->workSpaceSize;
119 return sizeof(*cctx) + cctx->workSpaceSize;
120 }
120 }
121
121
122 size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
123 {
124 switch(param)
125 {
126 case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
127 default: return ERROR(parameter_unknown);
128 }
129 }
130
122 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
131 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
123 {
132 {
124 return &(ctx->seqStore);
133 return &(ctx->seqStore);
125 }
134 }
126
135
127 static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx)
136 static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx)
128 {
137 {
129 return cctx->params;
138 return cctx->params;
130 }
139 }
131
140
132
141
133 /** ZSTD_checkParams() :
142 /** ZSTD_checkParams() :
134 ensure param values remain within authorized range.
143 ensure param values remain within authorized range.
135 @return : 0, or an error code if one value is beyond authorized range */
144 @return : 0, or an error code if one value is beyond authorized range */
136 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
145 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
137 {
146 {
138 # define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); }
147 # define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); }
139 CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
148 CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
140 CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
149 CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
141 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
150 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
142 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
151 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
143 { U32 const searchLengthMin = ((cParams.strategy == ZSTD_fast) | (cParams.strategy == ZSTD_greedy)) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
152 { U32 const searchLengthMin = ((cParams.strategy == ZSTD_fast) | (cParams.strategy == ZSTD_greedy)) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
144 U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
153 U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
145 CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
154 CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
146 CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
155 CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
147 if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported);
156 if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported);
148 return 0;
157 return 0;
149 }
158 }
150
159
151
160
152 /** ZSTD_cycleLog() :
161 /** ZSTD_cycleLog() :
153 * condition for correct operation : hashLog > 1 */
162 * condition for correct operation : hashLog > 1 */
154 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
163 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
155 {
164 {
156 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
165 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
157 return hashLog - btScale;
166 return hashLog - btScale;
158 }
167 }
159
168
160 /** ZSTD_adjustCParams() :
169 /** ZSTD_adjustCParams() :
161 optimize `cPar` for a given input (`srcSize` and `dictSize`).
170 optimize `cPar` for a given input (`srcSize` and `dictSize`).
162 mostly downsizing to reduce memory consumption and initialization.
171 mostly downsizing to reduce memory consumption and initialization.
163 Both `srcSize` and `dictSize` are optional (use 0 if unknown),
172 Both `srcSize` and `dictSize` are optional (use 0 if unknown),
164 but if both are 0, no optimization can be done.
173 but if both are 0, no optimization can be done.
165 Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
174 Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
166 ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
175 ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
167 {
176 {
168 if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */
177 if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */
169
178
170 /* resize params, to use less memory when necessary */
179 /* resize params, to use less memory when necessary */
171 { U32 const minSrcSize = (srcSize==0) ? 500 : 0;
180 { U32 const minSrcSize = (srcSize==0) ? 500 : 0;
172 U64 const rSize = srcSize + dictSize + minSrcSize;
181 U64 const rSize = srcSize + dictSize + minSrcSize;
173 if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
182 if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
174 U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
183 U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
175 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
184 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
176 } }
185 } }
177 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
186 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
178 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
187 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
179 if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
188 if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
180 }
189 }
181
190
182 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
191 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
183
192
184 return cPar;
193 return cPar;
185 }
194 }
186
195
187
196
188 size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
197 size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
189 {
198 {
190 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
199 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
191 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
200 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
192 size_t const maxNbSeq = blockSize / divider;
201 size_t const maxNbSeq = blockSize / divider;
193 size_t const tokenSpace = blockSize + 11*maxNbSeq;
202 size_t const tokenSpace = blockSize + 11*maxNbSeq;
194
203
195 size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
204 size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
196 size_t const hSize = ((size_t)1) << cParams.hashLog;
205 size_t const hSize = ((size_t)1) << cParams.hashLog;
197 U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
206 U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
198 size_t const h3Size = ((size_t)1) << hashLog3;
207 size_t const h3Size = ((size_t)1) << hashLog3;
199 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
208 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
200
209
201 size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
210 size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
202 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
211 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
203 size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
212 size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
204 + (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
213 + (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
205
214
206 return sizeof(ZSTD_CCtx) + neededSpace;
215 return sizeof(ZSTD_CCtx) + neededSpace;
207 }
216 }
208
217
209
218
210 static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
219 static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
211 {
220 {
212 return (param1.cParams.hashLog == param2.cParams.hashLog)
221 return (param1.cParams.hashLog == param2.cParams.hashLog)
213 & (param1.cParams.chainLog == param2.cParams.chainLog)
222 & (param1.cParams.chainLog == param2.cParams.chainLog)
214 & (param1.cParams.strategy == param2.cParams.strategy)
223 & (param1.cParams.strategy == param2.cParams.strategy)
215 & ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3));
224 & ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3));
216 }
225 }
217
226
218 /*! ZSTD_continueCCtx() :
227 /*! ZSTD_continueCCtx() :
219 reuse CCtx without reset (note : requires no dictionary) */
228 reuse CCtx without reset (note : requires no dictionary) */
220 static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize)
229 static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize)
221 {
230 {
222 U32 const end = (U32)(cctx->nextSrc - cctx->base);
231 U32 const end = (U32)(cctx->nextSrc - cctx->base);
223 cctx->params = params;
232 cctx->params = params;
224 cctx->frameContentSize = frameContentSize;
233 cctx->frameContentSize = frameContentSize;
225 cctx->lowLimit = end;
234 cctx->lowLimit = end;
226 cctx->dictLimit = end;
235 cctx->dictLimit = end;
227 cctx->nextToUpdate = end+1;
236 cctx->nextToUpdate = end+1;
228 cctx->stage = ZSTDcs_init;
237 cctx->stage = ZSTDcs_init;
229 cctx->dictID = 0;
238 cctx->dictID = 0;
230 cctx->loadedDictEnd = 0;
239 cctx->loadedDictEnd = 0;
231 { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; }
240 { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; }
232 cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
241 cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
233 XXH64_reset(&cctx->xxhState, 0);
242 XXH64_reset(&cctx->xxhState, 0);
234 return 0;
243 return 0;
235 }
244 }
236
245
237 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
246 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
238
247
239 /*! ZSTD_resetCCtx_advanced() :
248 /*! ZSTD_resetCCtx_advanced() :
240 note : 'params' must be validated */
249 note : 'params' must be validated */
241 static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
250 static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
242 ZSTD_parameters params, U64 frameContentSize,
251 ZSTD_parameters params, U64 frameContentSize,
243 ZSTD_compResetPolicy_e const crp)
252 ZSTD_compResetPolicy_e const crp)
244 {
253 {
245 if (crp == ZSTDcrp_continue)
254 if (crp == ZSTDcrp_continue)
246 if (ZSTD_equivalentParams(params, zc->params))
255 if (ZSTD_equivalentParams(params, zc->params))
247 return ZSTD_continueCCtx(zc, params, frameContentSize);
256 return ZSTD_continueCCtx(zc, params, frameContentSize);
248
257
249 { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
258 { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
250 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
259 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
251 size_t const maxNbSeq = blockSize / divider;
260 size_t const maxNbSeq = blockSize / divider;
252 size_t const tokenSpace = blockSize + 11*maxNbSeq;
261 size_t const tokenSpace = blockSize + 11*maxNbSeq;
253 size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
262 size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
254 size_t const hSize = ((size_t)1) << params.cParams.hashLog;
263 size_t const hSize = ((size_t)1) << params.cParams.hashLog;
255 U32 const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
264 U32 const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
256 size_t const h3Size = ((size_t)1) << hashLog3;
265 size_t const h3Size = ((size_t)1) << hashLog3;
257 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
266 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
258 void* ptr;
267 void* ptr;
259
268
260 /* Check if workSpace is large enough, alloc a new one if needed */
269 /* Check if workSpace is large enough, alloc a new one if needed */
261 { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
270 { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
262 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
271 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
263 size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
272 size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
264 + (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
273 + (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
265 if (zc->workSpaceSize < neededSpace) {
274 if (zc->workSpaceSize < neededSpace) {
266 ZSTD_free(zc->workSpace, zc->customMem);
275 ZSTD_free(zc->workSpace, zc->customMem);
267 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
276 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
268 if (zc->workSpace == NULL) return ERROR(memory_allocation);
277 if (zc->workSpace == NULL) return ERROR(memory_allocation);
269 zc->workSpaceSize = neededSpace;
278 zc->workSpaceSize = neededSpace;
270 } }
279 } }
271
280
272 if (crp!=ZSTDcrp_noMemset) memset(zc->workSpace, 0, tableSpace); /* reset tables only */
281 if (crp!=ZSTDcrp_noMemset) memset(zc->workSpace, 0, tableSpace); /* reset tables only */
273 XXH64_reset(&zc->xxhState, 0);
282 XXH64_reset(&zc->xxhState, 0);
274 zc->hashLog3 = hashLog3;
283 zc->hashLog3 = hashLog3;
275 zc->hashTable = (U32*)(zc->workSpace);
284 zc->hashTable = (U32*)(zc->workSpace);
276 zc->chainTable = zc->hashTable + hSize;
285 zc->chainTable = zc->hashTable + hSize;
277 zc->hashTable3 = zc->chainTable + chainSize;
286 zc->hashTable3 = zc->chainTable + chainSize;
278 ptr = zc->hashTable3 + h3Size;
287 ptr = zc->hashTable3 + h3Size;
279 zc->hufTable = (HUF_CElt*)ptr;
288 zc->hufTable = (HUF_CElt*)ptr;
280 zc->flagStaticTables = 0;
289 zc->flagStaticTables = 0;
281 ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
290 ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
282
291
283 zc->nextToUpdate = 1;
292 zc->nextToUpdate = 1;
284 zc->nextSrc = NULL;
293 zc->nextSrc = NULL;
285 zc->base = NULL;
294 zc->base = NULL;
286 zc->dictBase = NULL;
295 zc->dictBase = NULL;
287 zc->dictLimit = 0;
296 zc->dictLimit = 0;
288 zc->lowLimit = 0;
297 zc->lowLimit = 0;
289 zc->params = params;
298 zc->params = params;
290 zc->blockSize = blockSize;
299 zc->blockSize = blockSize;
291 zc->frameContentSize = frameContentSize;
300 zc->frameContentSize = frameContentSize;
292 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
301 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
293
302
294 if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
303 if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
295 zc->seqStore.litFreq = (U32*)ptr;
304 zc->seqStore.litFreq = (U32*)ptr;
296 zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
305 zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
297 zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
306 zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
298 zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
307 zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
299 ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
308 ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
300 zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
309 zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
301 ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
310 ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
302 zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
311 zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
303 ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
312 ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
304 zc->seqStore.litLengthSum = 0;
313 zc->seqStore.litLengthSum = 0;
305 }
314 }
306 zc->seqStore.sequencesStart = (seqDef*)ptr;
315 zc->seqStore.sequencesStart = (seqDef*)ptr;
307 ptr = zc->seqStore.sequencesStart + maxNbSeq;
316 ptr = zc->seqStore.sequencesStart + maxNbSeq;
308 zc->seqStore.llCode = (BYTE*) ptr;
317 zc->seqStore.llCode = (BYTE*) ptr;
309 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
318 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
310 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
319 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
311 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
320 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
312
321
313 zc->stage = ZSTDcs_init;
322 zc->stage = ZSTDcs_init;
314 zc->dictID = 0;
323 zc->dictID = 0;
315 zc->loadedDictEnd = 0;
324 zc->loadedDictEnd = 0;
316
325
317 return 0;
326 return 0;
318 }
327 }
319 }
328 }
320
329
330 /* ZSTD_invalidateRepCodes() :
331 * ensures next compression will not use repcodes from previous block.
332 * Note : only works with regular variant;
333 * do not use with extDict variant ! */
334 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
335 int i;
336 for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
337 }
321
338
322 /*! ZSTD_copyCCtx() :
339 /*! ZSTD_copyCCtx() :
323 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
340 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
324 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
341 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
325 * @return : 0, or an error code */
342 * @return : 0, or an error code */
326 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
343 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
327 {
344 {
328 if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
345 if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
329
346
330 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
347 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
331 ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
348 ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
332
349
333 /* copy tables */
350 /* copy tables */
334 { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
351 { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
335 size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
352 size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
336 size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
353 size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
337 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
354 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
338 memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
355 memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
339 }
356 }
340
357
341 /* copy dictionary offsets */
358 /* copy dictionary offsets */
342 dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
359 dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
343 dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
360 dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
344 dstCCtx->nextSrc = srcCCtx->nextSrc;
361 dstCCtx->nextSrc = srcCCtx->nextSrc;
345 dstCCtx->base = srcCCtx->base;
362 dstCCtx->base = srcCCtx->base;
346 dstCCtx->dictBase = srcCCtx->dictBase;
363 dstCCtx->dictBase = srcCCtx->dictBase;
347 dstCCtx->dictLimit = srcCCtx->dictLimit;
364 dstCCtx->dictLimit = srcCCtx->dictLimit;
348 dstCCtx->lowLimit = srcCCtx->lowLimit;
365 dstCCtx->lowLimit = srcCCtx->lowLimit;
349 dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
366 dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
350 dstCCtx->dictID = srcCCtx->dictID;
367 dstCCtx->dictID = srcCCtx->dictID;
351
368
352 /* copy entropy tables */
369 /* copy entropy tables */
353 dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
370 dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
354 if (srcCCtx->flagStaticTables) {
371 if (srcCCtx->flagStaticTables) {
355 memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
372 memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
356 memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
373 memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
357 memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
374 memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
358 memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
375 memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
359 }
376 }
360
377
361 return 0;
378 return 0;
362 }
379 }
363
380
364
381
365 /*! ZSTD_reduceTable() :
382 /*! ZSTD_reduceTable() :
366 * reduce table indexes by `reducerValue` */
383 * reduce table indexes by `reducerValue` */
367 static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
384 static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
368 {
385 {
369 U32 u;
386 U32 u;
370 for (u=0 ; u < size ; u++) {
387 for (u=0 ; u < size ; u++) {
371 if (table[u] < reducerValue) table[u] = 0;
388 if (table[u] < reducerValue) table[u] = 0;
372 else table[u] -= reducerValue;
389 else table[u] -= reducerValue;
373 }
390 }
374 }
391 }
375
392
376 /*! ZSTD_reduceIndex() :
393 /*! ZSTD_reduceIndex() :
377 * rescale all indexes to avoid future overflow (indexes are U32) */
394 * rescale all indexes to avoid future overflow (indexes are U32) */
378 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
395 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
379 {
396 {
380 { U32 const hSize = 1 << zc->params.cParams.hashLog;
397 { U32 const hSize = 1 << zc->params.cParams.hashLog;
381 ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
398 ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
382
399
383 { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
400 { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
384 ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
401 ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
385
402
386 { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
403 { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
387 ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
404 ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
388 }
405 }
389
406
390
407
391 /*-*******************************************************
408 /*-*******************************************************
392 * Block entropic compression
409 * Block entropic compression
393 *********************************************************/
410 *********************************************************/
394
411
395 /* See doc/zstd_compression_format.md for detailed format description */
412 /* See doc/zstd_compression_format.md for detailed format description */
396
413
397 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
414 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
398 {
415 {
399 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
416 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
400 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
417 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
401 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
418 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
402 return ZSTD_blockHeaderSize+srcSize;
419 return ZSTD_blockHeaderSize+srcSize;
403 }
420 }
404
421
405
422
406 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
423 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
407 {
424 {
408 BYTE* const ostart = (BYTE* const)dst;
425 BYTE* const ostart = (BYTE* const)dst;
409 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
426 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
410
427
411 if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
428 if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
412
429
413 switch(flSize)
430 switch(flSize)
414 {
431 {
415 case 1: /* 2 - 1 - 5 */
432 case 1: /* 2 - 1 - 5 */
416 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
433 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
417 break;
434 break;
418 case 2: /* 2 - 2 - 12 */
435 case 2: /* 2 - 2 - 12 */
419 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
436 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
420 break;
437 break;
421 default: /*note : should not be necessary : flSize is within {1,2,3} */
438 default: /*note : should not be necessary : flSize is within {1,2,3} */
422 case 3: /* 2 - 2 - 20 */
439 case 3: /* 2 - 2 - 20 */
423 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
440 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
424 break;
441 break;
425 }
442 }
426
443
427 memcpy(ostart + flSize, src, srcSize);
444 memcpy(ostart + flSize, src, srcSize);
428 return srcSize + flSize;
445 return srcSize + flSize;
429 }
446 }
430
447
431 static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
448 static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
432 {
449 {
433 BYTE* const ostart = (BYTE* const)dst;
450 BYTE* const ostart = (BYTE* const)dst;
434 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
451 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
435
452
436 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
453 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
437
454
438 switch(flSize)
455 switch(flSize)
439 {
456 {
440 case 1: /* 2 - 1 - 5 */
457 case 1: /* 2 - 1 - 5 */
441 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
458 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
442 break;
459 break;
443 case 2: /* 2 - 2 - 12 */
460 case 2: /* 2 - 2 - 12 */
444 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
461 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
445 break;
462 break;
446 default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
463 default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
447 case 3: /* 2 - 2 - 20 */
464 case 3: /* 2 - 2 - 20 */
448 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
465 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
449 break;
466 break;
450 }
467 }
451
468
452 ostart[flSize] = *(const BYTE*)src;
469 ostart[flSize] = *(const BYTE*)src;
453 return flSize+1;
470 return flSize+1;
454 }
471 }
455
472
456
473
457 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
474 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
458
475
459 static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
476 static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
460 void* dst, size_t dstCapacity,
477 void* dst, size_t dstCapacity,
461 const void* src, size_t srcSize)
478 const void* src, size_t srcSize)
462 {
479 {
463 size_t const minGain = ZSTD_minGain(srcSize);
480 size_t const minGain = ZSTD_minGain(srcSize);
464 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
481 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
465 BYTE* const ostart = (BYTE*)dst;
482 BYTE* const ostart = (BYTE*)dst;
466 U32 singleStream = srcSize < 256;
483 U32 singleStream = srcSize < 256;
467 symbolEncodingType_e hType = set_compressed;
484 symbolEncodingType_e hType = set_compressed;
468 size_t cLitSize;
485 size_t cLitSize;
469
486
470
487
471 /* small ? don't even attempt compression (speed opt) */
488 /* small ? don't even attempt compression (speed opt) */
472 # define LITERAL_NOENTROPY 63
489 # define LITERAL_NOENTROPY 63
473 { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
490 { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
474 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
491 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
475 }
492 }
476
493
477 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
494 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
478 if (zc->flagStaticTables && (lhSize==3)) {
495 if (zc->flagStaticTables && (lhSize==3)) {
479 hType = set_repeat;
496 hType = set_repeat;
480 singleStream = 1;
497 singleStream = 1;
481 cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
498 cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
482 } else {
499 } else {
483 cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters))
500 cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters))
484 : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters));
501 : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters));
485 }
502 }
486
503
487 if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
504 if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
488 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
505 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
489 if (cLitSize==1)
506 if (cLitSize==1)
490 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
507 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
491
508
492 /* Build header */
509 /* Build header */
493 switch(lhSize)
510 switch(lhSize)
494 {
511 {
495 case 3: /* 2 - 2 - 10 - 10 */
512 case 3: /* 2 - 2 - 10 - 10 */
496 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
513 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
497 MEM_writeLE24(ostart, lhc);
514 MEM_writeLE24(ostart, lhc);
498 break;
515 break;
499 }
516 }
500 case 4: /* 2 - 2 - 14 - 14 */
517 case 4: /* 2 - 2 - 14 - 14 */
501 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
518 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
502 MEM_writeLE32(ostart, lhc);
519 MEM_writeLE32(ostart, lhc);
503 break;
520 break;
504 }
521 }
505 default: /* should not be necessary, lhSize is only {3,4,5} */
522 default: /* should not be necessary, lhSize is only {3,4,5} */
506 case 5: /* 2 - 2 - 18 - 18 */
523 case 5: /* 2 - 2 - 18 - 18 */
507 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
524 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
508 MEM_writeLE32(ostart, lhc);
525 MEM_writeLE32(ostart, lhc);
509 ostart[4] = (BYTE)(cLitSize >> 10);
526 ostart[4] = (BYTE)(cLitSize >> 10);
510 break;
527 break;
511 }
528 }
512 }
529 }
513 return lhSize+cLitSize;
530 return lhSize+cLitSize;
514 }
531 }
515
532
516 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
533 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
517 8, 9, 10, 11, 12, 13, 14, 15,
534 8, 9, 10, 11, 12, 13, 14, 15,
518 16, 16, 17, 17, 18, 18, 19, 19,
535 16, 16, 17, 17, 18, 18, 19, 19,
519 20, 20, 20, 20, 21, 21, 21, 21,
536 20, 20, 20, 20, 21, 21, 21, 21,
520 22, 22, 22, 22, 22, 22, 22, 22,
537 22, 22, 22, 22, 22, 22, 22, 22,
521 23, 23, 23, 23, 23, 23, 23, 23,
538 23, 23, 23, 23, 23, 23, 23, 23,
522 24, 24, 24, 24, 24, 24, 24, 24,
539 24, 24, 24, 24, 24, 24, 24, 24,
523 24, 24, 24, 24, 24, 24, 24, 24 };
540 24, 24, 24, 24, 24, 24, 24, 24 };
524
541
525 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
542 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
526 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
543 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
527 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
544 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
528 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
545 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
529 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
546 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
530 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
547 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
531 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
548 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
532 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
549 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
533
550
534
551
535 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
552 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
536 {
553 {
537 BYTE const LL_deltaCode = 19;
554 BYTE const LL_deltaCode = 19;
538 BYTE const ML_deltaCode = 36;
555 BYTE const ML_deltaCode = 36;
539 const seqDef* const sequences = seqStorePtr->sequencesStart;
556 const seqDef* const sequences = seqStorePtr->sequencesStart;
540 BYTE* const llCodeTable = seqStorePtr->llCode;
557 BYTE* const llCodeTable = seqStorePtr->llCode;
541 BYTE* const ofCodeTable = seqStorePtr->ofCode;
558 BYTE* const ofCodeTable = seqStorePtr->ofCode;
542 BYTE* const mlCodeTable = seqStorePtr->mlCode;
559 BYTE* const mlCodeTable = seqStorePtr->mlCode;
543 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
560 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
544 U32 u;
561 U32 u;
545 for (u=0; u<nbSeq; u++) {
562 for (u=0; u<nbSeq; u++) {
546 U32 const llv = sequences[u].litLength;
563 U32 const llv = sequences[u].litLength;
547 U32 const mlv = sequences[u].matchLength;
564 U32 const mlv = sequences[u].matchLength;
548 llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
565 llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
549 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
566 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
550 mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
567 mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
551 }
568 }
552 if (seqStorePtr->longLengthID==1)
569 if (seqStorePtr->longLengthID==1)
553 llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
570 llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
554 if (seqStorePtr->longLengthID==2)
571 if (seqStorePtr->longLengthID==2)
555 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
572 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
556 }
573 }
557
574
558
575
559 size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
576 size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
560 void* dst, size_t dstCapacity,
577 void* dst, size_t dstCapacity,
561 size_t srcSize)
578 size_t srcSize)
562 {
579 {
563 const seqStore_t* seqStorePtr = &(zc->seqStore);
580 const seqStore_t* seqStorePtr = &(zc->seqStore);
564 U32 count[MaxSeq+1];
581 U32 count[MaxSeq+1];
565 S16 norm[MaxSeq+1];
582 S16 norm[MaxSeq+1];
566 FSE_CTable* CTable_LitLength = zc->litlengthCTable;
583 FSE_CTable* CTable_LitLength = zc->litlengthCTable;
567 FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
584 FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
568 FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
585 FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
569 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
586 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
570 const seqDef* const sequences = seqStorePtr->sequencesStart;
587 const seqDef* const sequences = seqStorePtr->sequencesStart;
571 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
588 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
572 const BYTE* const llCodeTable = seqStorePtr->llCode;
589 const BYTE* const llCodeTable = seqStorePtr->llCode;
573 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
590 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
574 BYTE* const ostart = (BYTE*)dst;
591 BYTE* const ostart = (BYTE*)dst;
575 BYTE* const oend = ostart + dstCapacity;
592 BYTE* const oend = ostart + dstCapacity;
576 BYTE* op = ostart;
593 BYTE* op = ostart;
577 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
594 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
578 BYTE* seqHead;
595 BYTE* seqHead;
579 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
596 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
580
597
581 /* Compress literals */
598 /* Compress literals */
582 { const BYTE* const literals = seqStorePtr->litStart;
599 { const BYTE* const literals = seqStorePtr->litStart;
583 size_t const litSize = seqStorePtr->lit - literals;
600 size_t const litSize = seqStorePtr->lit - literals;
584 size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
601 size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
585 if (ZSTD_isError(cSize)) return cSize;
602 if (ZSTD_isError(cSize)) return cSize;
586 op += cSize;
603 op += cSize;
587 }
604 }
588
605
589 /* Sequences Header */
606 /* Sequences Header */
590 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
607 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
591 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
608 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
592 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
609 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
593 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
610 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
594 if (nbSeq==0) goto _check_compressibility;
611 if (nbSeq==0) goto _check_compressibility;
595
612
596 /* seqHead : flags for FSE encoding type */
613 /* seqHead : flags for FSE encoding type */
597 seqHead = op++;
614 seqHead = op++;
598
615
599 #define MIN_SEQ_FOR_DYNAMIC_FSE 64
616 #define MIN_SEQ_FOR_DYNAMIC_FSE 64
600 #define MAX_SEQ_FOR_STATIC_FSE 1000
617 #define MAX_SEQ_FOR_STATIC_FSE 1000
601
618
602 /* convert length/distances into codes */
619 /* convert length/distances into codes */
603 ZSTD_seqToCodes(seqStorePtr);
620 ZSTD_seqToCodes(seqStorePtr);
604
621
605 /* CTable for Literal Lengths */
622 /* CTable for Literal Lengths */
606 { U32 max = MaxLL;
623 { U32 max = MaxLL;
607 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->tmpCounters);
624 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->tmpCounters);
608 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
625 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
609 *op++ = llCodeTable[0];
626 *op++ = llCodeTable[0];
610 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
627 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
611 LLtype = set_rle;
628 LLtype = set_rle;
612 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
629 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
613 LLtype = set_repeat;
630 LLtype = set_repeat;
614 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
631 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
615 FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
632 FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
616 LLtype = set_basic;
633 LLtype = set_basic;
617 } else {
634 } else {
618 size_t nbSeq_1 = nbSeq;
635 size_t nbSeq_1 = nbSeq;
619 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
636 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
620 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
637 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
621 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
638 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
622 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
639 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
623 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
640 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
624 op += NCountSize; }
641 op += NCountSize; }
625 FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
642 FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
626 LLtype = set_compressed;
643 LLtype = set_compressed;
627 } }
644 } }
628
645
629 /* CTable for Offsets */
646 /* CTable for Offsets */
630 { U32 max = MaxOff;
647 { U32 max = MaxOff;
631 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->tmpCounters);
648 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->tmpCounters);
632 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
649 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
633 *op++ = ofCodeTable[0];
650 *op++ = ofCodeTable[0];
634 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
651 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
635 Offtype = set_rle;
652 Offtype = set_rle;
636 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
653 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
637 Offtype = set_repeat;
654 Offtype = set_repeat;
638 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
655 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
639 FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
656 FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
640 Offtype = set_basic;
657 Offtype = set_basic;
641 } else {
658 } else {
642 size_t nbSeq_1 = nbSeq;
659 size_t nbSeq_1 = nbSeq;
643 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
660 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
644 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
661 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
645 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
662 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
646 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
663 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
647 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
664 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
648 op += NCountSize; }
665 op += NCountSize; }
649 FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
666 FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
650 Offtype = set_compressed;
667 Offtype = set_compressed;
651 } }
668 } }
652
669
653 /* CTable for MatchLengths */
670 /* CTable for MatchLengths */
654 { U32 max = MaxML;
671 { U32 max = MaxML;
655 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->tmpCounters);
672 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->tmpCounters);
656 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
673 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
657 *op++ = *mlCodeTable;
674 *op++ = *mlCodeTable;
658 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
675 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
659 MLtype = set_rle;
676 MLtype = set_rle;
660 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
677 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
661 MLtype = set_repeat;
678 MLtype = set_repeat;
662 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
679 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
663 FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
680 FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
664 MLtype = set_basic;
681 MLtype = set_basic;
665 } else {
682 } else {
666 size_t nbSeq_1 = nbSeq;
683 size_t nbSeq_1 = nbSeq;
667 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
684 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
668 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
685 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
669 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
686 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
670 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
687 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
671 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
688 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
672 op += NCountSize; }
689 op += NCountSize; }
673 FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
690 FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
674 MLtype = set_compressed;
691 MLtype = set_compressed;
675 } }
692 } }
676
693
677 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
694 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
678 zc->flagStaticTables = 0;
695 zc->flagStaticTables = 0;
679
696
680 /* Encoding Sequences */
697 /* Encoding Sequences */
681 { BIT_CStream_t blockStream;
698 { BIT_CStream_t blockStream;
682 FSE_CState_t stateMatchLength;
699 FSE_CState_t stateMatchLength;
683 FSE_CState_t stateOffsetBits;
700 FSE_CState_t stateOffsetBits;
684 FSE_CState_t stateLitLength;
701 FSE_CState_t stateLitLength;
685
702
686 CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
703 CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
687
704
688 /* first symbols */
705 /* first symbols */
689 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
706 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
690 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
707 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
691 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
708 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
692 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
709 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
693 if (MEM_32bits()) BIT_flushBits(&blockStream);
710 if (MEM_32bits()) BIT_flushBits(&blockStream);
694 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
711 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
695 if (MEM_32bits()) BIT_flushBits(&blockStream);
712 if (MEM_32bits()) BIT_flushBits(&blockStream);
696 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
713 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
697 BIT_flushBits(&blockStream);
714 BIT_flushBits(&blockStream);
698
715
699 { size_t n;
716 { size_t n;
700 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
717 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
701 BYTE const llCode = llCodeTable[n];
718 BYTE const llCode = llCodeTable[n];
702 BYTE const ofCode = ofCodeTable[n];
719 BYTE const ofCode = ofCodeTable[n];
703 BYTE const mlCode = mlCodeTable[n];
720 BYTE const mlCode = mlCodeTable[n];
704 U32 const llBits = LL_bits[llCode];
721 U32 const llBits = LL_bits[llCode];
705 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
722 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
706 U32 const mlBits = ML_bits[mlCode];
723 U32 const mlBits = ML_bits[mlCode];
707 /* (7)*/ /* (7)*/
724 /* (7)*/ /* (7)*/
708 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
725 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
709 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
726 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
710 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
727 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
711 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
728 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
712 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
729 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
713 BIT_flushBits(&blockStream); /* (7)*/
730 BIT_flushBits(&blockStream); /* (7)*/
714 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
731 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
715 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
732 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
716 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
733 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
717 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
734 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
718 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
735 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
719 BIT_flushBits(&blockStream); /* (7)*/
736 BIT_flushBits(&blockStream); /* (7)*/
720 } }
737 } }
721
738
722 FSE_flushCState(&blockStream, &stateMatchLength);
739 FSE_flushCState(&blockStream, &stateMatchLength);
723 FSE_flushCState(&blockStream, &stateOffsetBits);
740 FSE_flushCState(&blockStream, &stateOffsetBits);
724 FSE_flushCState(&blockStream, &stateLitLength);
741 FSE_flushCState(&blockStream, &stateLitLength);
725
742
726 { size_t const streamSize = BIT_closeCStream(&blockStream);
743 { size_t const streamSize = BIT_closeCStream(&blockStream);
727 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
744 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
728 op += streamSize;
745 op += streamSize;
729 } }
746 } }
730
747
731 /* check compressibility */
748 /* check compressibility */
732 _check_compressibility:
749 _check_compressibility:
733 { size_t const minGain = ZSTD_minGain(srcSize);
750 { size_t const minGain = ZSTD_minGain(srcSize);
734 size_t const maxCSize = srcSize - minGain;
751 size_t const maxCSize = srcSize - minGain;
735 if ((size_t)(op-ostart) >= maxCSize) return 0; }
752 if ((size_t)(op-ostart) >= maxCSize) return 0; }
736
753
737 /* confirm repcodes */
754 /* confirm repcodes */
738 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
755 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
739
756
740 return op - ostart;
757 return op - ostart;
741 }
758 }
742
759
743
760
761 #if 0 /* for debug */
762 # define STORESEQ_DEBUG
763 #include <stdio.h> /* fprintf */
764 U32 g_startDebug = 0;
765 const BYTE* g_start = NULL;
766 #endif
767
744 /*! ZSTD_storeSeq() :
768 /*! ZSTD_storeSeq() :
745 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
769 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
746 `offsetCode` : distance to match, or 0 == repCode.
770 `offsetCode` : distance to match, or 0 == repCode.
747 `matchCode` : matchLength - MINMATCH
771 `matchCode` : matchLength - MINMATCH
748 */
772 */
749 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
773 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
750 {
774 {
751 #if 0 /* for debug */
775 #ifdef STORESEQ_DEBUG
752 static const BYTE* g_start = NULL;
776 if (g_startDebug) {
753 const U32 pos = (U32)((const BYTE*)literals - g_start);
777 const U32 pos = (U32)((const BYTE*)literals - g_start);
754 if (g_start==NULL) g_start = (const BYTE*)literals;
778 if (g_start==NULL) g_start = (const BYTE*)literals;
755 //if ((pos > 1) && (pos < 50000))
779 if ((pos > 1895000) && (pos < 1895300))
756 printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
780 fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
757 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
781 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
782 }
758 #endif
783 #endif
759 /* copy Literals */
784 /* copy Literals */
760 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
785 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
761 seqStorePtr->lit += litLength;
786 seqStorePtr->lit += litLength;
762
787
763 /* literal Length */
788 /* literal Length */
764 if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
789 if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
765 seqStorePtr->sequences[0].litLength = (U16)litLength;
790 seqStorePtr->sequences[0].litLength = (U16)litLength;
766
791
767 /* match offset */
792 /* match offset */
768 seqStorePtr->sequences[0].offset = offsetCode + 1;
793 seqStorePtr->sequences[0].offset = offsetCode + 1;
769
794
770 /* match Length */
795 /* match Length */
771 if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
796 if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
772 seqStorePtr->sequences[0].matchLength = (U16)matchCode;
797 seqStorePtr->sequences[0].matchLength = (U16)matchCode;
773
798
774 seqStorePtr->sequences++;
799 seqStorePtr->sequences++;
775 }
800 }
776
801
777
802
778 /*-*************************************
803 /*-*************************************
779 * Match length counter
804 * Match length counter
780 ***************************************/
805 ***************************************/
781 static unsigned ZSTD_NbCommonBytes (register size_t val)
806 static unsigned ZSTD_NbCommonBytes (register size_t val)
782 {
807 {
783 if (MEM_isLittleEndian()) {
808 if (MEM_isLittleEndian()) {
784 if (MEM_64bits()) {
809 if (MEM_64bits()) {
785 # if defined(_MSC_VER) && defined(_WIN64)
810 # if defined(_MSC_VER) && defined(_WIN64)
786 unsigned long r = 0;
811 unsigned long r = 0;
787 _BitScanForward64( &r, (U64)val );
812 _BitScanForward64( &r, (U64)val );
788 return (unsigned)(r>>3);
813 return (unsigned)(r>>3);
789 # elif defined(__GNUC__) && (__GNUC__ >= 3)
814 # elif defined(__GNUC__) && (__GNUC__ >= 3)
790 return (__builtin_ctzll((U64)val) >> 3);
815 return (__builtin_ctzll((U64)val) >> 3);
791 # else
816 # else
792 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
817 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
793 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
818 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
794 # endif
819 # endif
795 } else { /* 32 bits */
820 } else { /* 32 bits */
796 # if defined(_MSC_VER)
821 # if defined(_MSC_VER)
797 unsigned long r=0;
822 unsigned long r=0;
798 _BitScanForward( &r, (U32)val );
823 _BitScanForward( &r, (U32)val );
799 return (unsigned)(r>>3);
824 return (unsigned)(r>>3);
800 # elif defined(__GNUC__) && (__GNUC__ >= 3)
825 # elif defined(__GNUC__) && (__GNUC__ >= 3)
801 return (__builtin_ctz((U32)val) >> 3);
826 return (__builtin_ctz((U32)val) >> 3);
802 # else
827 # else
803 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
828 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
804 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
829 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
805 # endif
830 # endif
806 }
831 }
807 } else { /* Big Endian CPU */
832 } else { /* Big Endian CPU */
808 if (MEM_64bits()) {
833 if (MEM_64bits()) {
809 # if defined(_MSC_VER) && defined(_WIN64)
834 # if defined(_MSC_VER) && defined(_WIN64)
810 unsigned long r = 0;
835 unsigned long r = 0;
811 _BitScanReverse64( &r, val );
836 _BitScanReverse64( &r, val );
812 return (unsigned)(r>>3);
837 return (unsigned)(r>>3);
813 # elif defined(__GNUC__) && (__GNUC__ >= 3)
838 # elif defined(__GNUC__) && (__GNUC__ >= 3)
814 return (__builtin_clzll(val) >> 3);
839 return (__builtin_clzll(val) >> 3);
815 # else
840 # else
816 unsigned r;
841 unsigned r;
817 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
842 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
818 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
843 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
819 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
844 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
820 r += (!val);
845 r += (!val);
821 return r;
846 return r;
822 # endif
847 # endif
823 } else { /* 32 bits */
848 } else { /* 32 bits */
824 # if defined(_MSC_VER)
849 # if defined(_MSC_VER)
825 unsigned long r = 0;
850 unsigned long r = 0;
826 _BitScanReverse( &r, (unsigned long)val );
851 _BitScanReverse( &r, (unsigned long)val );
827 return (unsigned)(r>>3);
852 return (unsigned)(r>>3);
828 # elif defined(__GNUC__) && (__GNUC__ >= 3)
853 # elif defined(__GNUC__) && (__GNUC__ >= 3)
829 return (__builtin_clz((U32)val) >> 3);
854 return (__builtin_clz((U32)val) >> 3);
830 # else
855 # else
831 unsigned r;
856 unsigned r;
832 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
857 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
833 r += (!val);
858 r += (!val);
834 return r;
859 return r;
835 # endif
860 # endif
836 } }
861 } }
837 }
862 }
838
863
839
864
840 static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
865 static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
841 {
866 {
842 const BYTE* const pStart = pIn;
867 const BYTE* const pStart = pIn;
843 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
868 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
844
869
845 while (pIn < pInLoopLimit) {
870 while (pIn < pInLoopLimit) {
846 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
871 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
847 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
872 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
848 pIn += ZSTD_NbCommonBytes(diff);
873 pIn += ZSTD_NbCommonBytes(diff);
849 return (size_t)(pIn - pStart);
874 return (size_t)(pIn - pStart);
850 }
875 }
851 if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
876 if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
852 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
877 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
853 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
878 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
854 return (size_t)(pIn - pStart);
879 return (size_t)(pIn - pStart);
855 }
880 }
856
881
857 /** ZSTD_count_2segments() :
882 /** ZSTD_count_2segments() :
858 * can count match length with `ip` & `match` in 2 different segments.
883 * can count match length with `ip` & `match` in 2 different segments.
859 * convention : on reaching mEnd, match count continue starting from iStart
884 * convention : on reaching mEnd, match count continue starting from iStart
860 */
885 */
861 static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
886 static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
862 {
887 {
863 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
888 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
864 size_t const matchLength = ZSTD_count(ip, match, vEnd);
889 size_t const matchLength = ZSTD_count(ip, match, vEnd);
865 if (match + matchLength != mEnd) return matchLength;
890 if (match + matchLength != mEnd) return matchLength;
866 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
891 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
867 }
892 }
868
893
869
894
870 /*-*************************************
895 /*-*************************************
871 * Hashes
896 * Hashes
872 ***************************************/
897 ***************************************/
873 static const U32 prime3bytes = 506832829U;
898 static const U32 prime3bytes = 506832829U;
874 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
899 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
875 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
900 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
876
901
877 static const U32 prime4bytes = 2654435761U;
902 static const U32 prime4bytes = 2654435761U;
878 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
903 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
879 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
904 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
880
905
881 static const U64 prime5bytes = 889523592379ULL;
906 static const U64 prime5bytes = 889523592379ULL;
882 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
907 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
883 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
908 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
884
909
885 static const U64 prime6bytes = 227718039650203ULL;
910 static const U64 prime6bytes = 227718039650203ULL;
886 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
911 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
887 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
912 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
888
913
889 static const U64 prime7bytes = 58295818150454627ULL;
914 static const U64 prime7bytes = 58295818150454627ULL;
890 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
915 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
891 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
916 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
892
917
893 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
918 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
894 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
919 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
895 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
920 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
896
921
897 static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
922 static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
898 {
923 {
899 switch(mls)
924 switch(mls)
900 {
925 {
901 default:
926 default:
902 case 4: return ZSTD_hash4Ptr(p, hBits);
927 case 4: return ZSTD_hash4Ptr(p, hBits);
903 case 5: return ZSTD_hash5Ptr(p, hBits);
928 case 5: return ZSTD_hash5Ptr(p, hBits);
904 case 6: return ZSTD_hash6Ptr(p, hBits);
929 case 6: return ZSTD_hash6Ptr(p, hBits);
905 case 7: return ZSTD_hash7Ptr(p, hBits);
930 case 7: return ZSTD_hash7Ptr(p, hBits);
906 case 8: return ZSTD_hash8Ptr(p, hBits);
931 case 8: return ZSTD_hash8Ptr(p, hBits);
907 }
932 }
908 }
933 }
909
934
910
935
911 /*-*************************************
936 /*-*************************************
912 * Fast Scan
937 * Fast Scan
913 ***************************************/
938 ***************************************/
914 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
939 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
915 {
940 {
916 U32* const hashTable = zc->hashTable;
941 U32* const hashTable = zc->hashTable;
917 U32 const hBits = zc->params.cParams.hashLog;
942 U32 const hBits = zc->params.cParams.hashLog;
918 const BYTE* const base = zc->base;
943 const BYTE* const base = zc->base;
919 const BYTE* ip = base + zc->nextToUpdate;
944 const BYTE* ip = base + zc->nextToUpdate;
920 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
945 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
921 const size_t fastHashFillStep = 3;
946 const size_t fastHashFillStep = 3;
922
947
923 while(ip <= iend) {
948 while(ip <= iend) {
924 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
949 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
925 ip += fastHashFillStep;
950 ip += fastHashFillStep;
926 }
951 }
927 }
952 }
928
953
929
954
930 FORCE_INLINE
955 FORCE_INLINE
931 void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
956 void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
932 const void* src, size_t srcSize,
957 const void* src, size_t srcSize,
933 const U32 mls)
958 const U32 mls)
934 {
959 {
935 U32* const hashTable = cctx->hashTable;
960 U32* const hashTable = cctx->hashTable;
936 U32 const hBits = cctx->params.cParams.hashLog;
961 U32 const hBits = cctx->params.cParams.hashLog;
937 seqStore_t* seqStorePtr = &(cctx->seqStore);
962 seqStore_t* seqStorePtr = &(cctx->seqStore);
938 const BYTE* const base = cctx->base;
963 const BYTE* const base = cctx->base;
939 const BYTE* const istart = (const BYTE*)src;
964 const BYTE* const istart = (const BYTE*)src;
940 const BYTE* ip = istart;
965 const BYTE* ip = istart;
941 const BYTE* anchor = istart;
966 const BYTE* anchor = istart;
942 const U32 lowestIndex = cctx->dictLimit;
967 const U32 lowestIndex = cctx->dictLimit;
943 const BYTE* const lowest = base + lowestIndex;
968 const BYTE* const lowest = base + lowestIndex;
944 const BYTE* const iend = istart + srcSize;
969 const BYTE* const iend = istart + srcSize;
945 const BYTE* const ilimit = iend - HASH_READ_SIZE;
970 const BYTE* const ilimit = iend - HASH_READ_SIZE;
946 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
971 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
947 U32 offsetSaved = 0;
972 U32 offsetSaved = 0;
948
973
949 /* init */
974 /* init */
950 ip += (ip==lowest);
975 ip += (ip==lowest);
951 { U32 const maxRep = (U32)(ip-lowest);
976 { U32 const maxRep = (U32)(ip-lowest);
952 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
977 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
953 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
978 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
954 }
979 }
955
980
956 /* Main Search Loop */
981 /* Main Search Loop */
957 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
982 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
958 size_t mLength;
983 size_t mLength;
959 size_t const h = ZSTD_hashPtr(ip, hBits, mls);
984 size_t const h = ZSTD_hashPtr(ip, hBits, mls);
960 U32 const current = (U32)(ip-base);
985 U32 const current = (U32)(ip-base);
961 U32 const matchIndex = hashTable[h];
986 U32 const matchIndex = hashTable[h];
962 const BYTE* match = base + matchIndex;
987 const BYTE* match = base + matchIndex;
963 hashTable[h] = current; /* update hash table */
988 hashTable[h] = current; /* update hash table */
964
989
965 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
990 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
966 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
991 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
967 ip++;
992 ip++;
968 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
993 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
969 } else {
994 } else {
970 U32 offset;
995 U32 offset;
971 if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
996 if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
972 ip += ((ip-anchor) >> g_searchStrength) + 1;
997 ip += ((ip-anchor) >> g_searchStrength) + 1;
973 continue;
998 continue;
974 }
999 }
975 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
1000 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
976 offset = (U32)(ip-match);
1001 offset = (U32)(ip-match);
977 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1002 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
978 offset_2 = offset_1;
1003 offset_2 = offset_1;
979 offset_1 = offset;
1004 offset_1 = offset;
980
1005
981 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1006 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
982 }
1007 }
983
1008
984 /* match found */
1009 /* match found */
985 ip += mLength;
1010 ip += mLength;
986 anchor = ip;
1011 anchor = ip;
987
1012
988 if (ip <= ilimit) {
1013 if (ip <= ilimit) {
989 /* Fill Table */
1014 /* Fill Table */
990 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
1015 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
991 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1016 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
992 /* check immediate repcode */
1017 /* check immediate repcode */
993 while ( (ip <= ilimit)
1018 while ( (ip <= ilimit)
994 && ( (offset_2>0)
1019 && ( (offset_2>0)
995 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1020 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
996 /* store sequence */
1021 /* store sequence */
997 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1022 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
998 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
1023 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
999 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
1024 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
1000 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1025 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1001 ip += rLength;
1026 ip += rLength;
1002 anchor = ip;
1027 anchor = ip;
1003 continue; /* faster when present ... (?) */
1028 continue; /* faster when present ... (?) */
1004 } } }
1029 } } }
1005
1030
1006 /* save reps for next block */
1031 /* save reps for next block */
1007 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1032 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1008 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1033 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
1009
1034
1010 /* Last Literals */
1035 /* Last Literals */
1011 { size_t const lastLLSize = iend - anchor;
1036 { size_t const lastLLSize = iend - anchor;
1012 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1037 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1013 seqStorePtr->lit += lastLLSize;
1038 seqStorePtr->lit += lastLLSize;
1014 }
1039 }
1015 }
1040 }
1016
1041
1017
1042
1018 static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
1043 static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
1019 const void* src, size_t srcSize)
1044 const void* src, size_t srcSize)
1020 {
1045 {
1021 const U32 mls = ctx->params.cParams.searchLength;
1046 const U32 mls = ctx->params.cParams.searchLength;
1022 switch(mls)
1047 switch(mls)
1023 {
1048 {
1024 default:
1049 default:
1025 case 4 :
1050 case 4 :
1026 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
1051 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
1027 case 5 :
1052 case 5 :
1028 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
1053 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
1029 case 6 :
1054 case 6 :
1030 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
1055 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
1031 case 7 :
1056 case 7 :
1032 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
1057 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
1033 }
1058 }
1034 }
1059 }
1035
1060
1036
1061
1037 static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
1062 static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
1038 const void* src, size_t srcSize,
1063 const void* src, size_t srcSize,
1039 const U32 mls)
1064 const U32 mls)
1040 {
1065 {
1041 U32* hashTable = ctx->hashTable;
1066 U32* hashTable = ctx->hashTable;
1042 const U32 hBits = ctx->params.cParams.hashLog;
1067 const U32 hBits = ctx->params.cParams.hashLog;
1043 seqStore_t* seqStorePtr = &(ctx->seqStore);
1068 seqStore_t* seqStorePtr = &(ctx->seqStore);
1044 const BYTE* const base = ctx->base;
1069 const BYTE* const base = ctx->base;
1045 const BYTE* const dictBase = ctx->dictBase;
1070 const BYTE* const dictBase = ctx->dictBase;
1046 const BYTE* const istart = (const BYTE*)src;
1071 const BYTE* const istart = (const BYTE*)src;
1047 const BYTE* ip = istart;
1072 const BYTE* ip = istart;
1048 const BYTE* anchor = istart;
1073 const BYTE* anchor = istart;
1049 const U32 lowestIndex = ctx->lowLimit;
1074 const U32 lowestIndex = ctx->lowLimit;
1050 const BYTE* const dictStart = dictBase + lowestIndex;
1075 const BYTE* const dictStart = dictBase + lowestIndex;
1051 const U32 dictLimit = ctx->dictLimit;
1076 const U32 dictLimit = ctx->dictLimit;
1052 const BYTE* const lowPrefixPtr = base + dictLimit;
1077 const BYTE* const lowPrefixPtr = base + dictLimit;
1053 const BYTE* const dictEnd = dictBase + dictLimit;
1078 const BYTE* const dictEnd = dictBase + dictLimit;
1054 const BYTE* const iend = istart + srcSize;
1079 const BYTE* const iend = istart + srcSize;
1055 const BYTE* const ilimit = iend - 8;
1080 const BYTE* const ilimit = iend - 8;
1056 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1081 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1057
1082
1058 /* Search Loop */
1083 /* Search Loop */
1059 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1084 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1060 const size_t h = ZSTD_hashPtr(ip, hBits, mls);
1085 const size_t h = ZSTD_hashPtr(ip, hBits, mls);
1061 const U32 matchIndex = hashTable[h];
1086 const U32 matchIndex = hashTable[h];
1062 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1087 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1063 const BYTE* match = matchBase + matchIndex;
1088 const BYTE* match = matchBase + matchIndex;
1064 const U32 current = (U32)(ip-base);
1089 const U32 current = (U32)(ip-base);
1065 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1090 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1066 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1091 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1067 const BYTE* repMatch = repBase + repIndex;
1092 const BYTE* repMatch = repBase + repIndex;
1068 size_t mLength;
1093 size_t mLength;
1069 hashTable[h] = current; /* update hash table */
1094 hashTable[h] = current; /* update hash table */
1070
1095
1071 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1096 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1072 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1097 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1073 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1098 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1074 mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
1099 mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
1075 ip++;
1100 ip++;
1076 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1101 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1077 } else {
1102 } else {
1078 if ( (matchIndex < lowestIndex) ||
1103 if ( (matchIndex < lowestIndex) ||
1079 (MEM_read32(match) != MEM_read32(ip)) ) {
1104 (MEM_read32(match) != MEM_read32(ip)) ) {
1080 ip += ((ip-anchor) >> g_searchStrength) + 1;
1105 ip += ((ip-anchor) >> g_searchStrength) + 1;
1081 continue;
1106 continue;
1082 }
1107 }
1083 { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1108 { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1084 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1109 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1085 U32 offset;
1110 U32 offset;
1086 mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
1111 mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
1087 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1112 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1088 offset = current - matchIndex;
1113 offset = current - matchIndex;
1089 offset_2 = offset_1;
1114 offset_2 = offset_1;
1090 offset_1 = offset;
1115 offset_1 = offset;
1091 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1116 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1092 } }
1117 } }
1093
1118
1094 /* found a match : store it */
1119 /* found a match : store it */
1095 ip += mLength;
1120 ip += mLength;
1096 anchor = ip;
1121 anchor = ip;
1097
1122
1098 if (ip <= ilimit) {
1123 if (ip <= ilimit) {
1099 /* Fill Table */
1124 /* Fill Table */
1100 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
1125 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
1101 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1126 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1102 /* check immediate repcode */
1127 /* check immediate repcode */
1103 while (ip <= ilimit) {
1128 while (ip <= ilimit) {
1104 U32 const current2 = (U32)(ip-base);
1129 U32 const current2 = (U32)(ip-base);
1105 U32 const repIndex2 = current2 - offset_2;
1130 U32 const repIndex2 = current2 - offset_2;
1106 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1131 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1107 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1132 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1108 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1133 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1109 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1134 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1110 size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1135 size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1111 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1136 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1112 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1137 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1113 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
1138 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
1114 ip += repLength2;
1139 ip += repLength2;
1115 anchor = ip;
1140 anchor = ip;
1116 continue;
1141 continue;
1117 }
1142 }
1118 break;
1143 break;
1119 } } }
1144 } } }
1120
1145
1121 /* save reps for next block */
1146 /* save reps for next block */
1122 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1147 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1123
1148
1124 /* Last Literals */
1149 /* Last Literals */
1125 { size_t const lastLLSize = iend - anchor;
1150 { size_t const lastLLSize = iend - anchor;
1126 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1151 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1127 seqStorePtr->lit += lastLLSize;
1152 seqStorePtr->lit += lastLLSize;
1128 }
1153 }
1129 }
1154 }
1130
1155
1131
1156
1132 static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
1157 static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
1133 const void* src, size_t srcSize)
1158 const void* src, size_t srcSize)
1134 {
1159 {
1135 U32 const mls = ctx->params.cParams.searchLength;
1160 U32 const mls = ctx->params.cParams.searchLength;
1136 switch(mls)
1161 switch(mls)
1137 {
1162 {
1138 default:
1163 default:
1139 case 4 :
1164 case 4 :
1140 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
1165 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
1141 case 5 :
1166 case 5 :
1142 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
1167 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
1143 case 6 :
1168 case 6 :
1144 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
1169 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
1145 case 7 :
1170 case 7 :
1146 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
1171 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
1147 }
1172 }
1148 }
1173 }
1149
1174
1150
1175
1151 /*-*************************************
1176 /*-*************************************
1152 * Double Fast
1177 * Double Fast
1153 ***************************************/
1178 ***************************************/
1154 static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
1179 static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
1155 {
1180 {
1156 U32* const hashLarge = cctx->hashTable;
1181 U32* const hashLarge = cctx->hashTable;
1157 U32 const hBitsL = cctx->params.cParams.hashLog;
1182 U32 const hBitsL = cctx->params.cParams.hashLog;
1158 U32* const hashSmall = cctx->chainTable;
1183 U32* const hashSmall = cctx->chainTable;
1159 U32 const hBitsS = cctx->params.cParams.chainLog;
1184 U32 const hBitsS = cctx->params.cParams.chainLog;
1160 const BYTE* const base = cctx->base;
1185 const BYTE* const base = cctx->base;
1161 const BYTE* ip = base + cctx->nextToUpdate;
1186 const BYTE* ip = base + cctx->nextToUpdate;
1162 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
1187 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
1163 const size_t fastHashFillStep = 3;
1188 const size_t fastHashFillStep = 3;
1164
1189
1165 while(ip <= iend) {
1190 while(ip <= iend) {
1166 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
1191 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
1167 hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
1192 hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
1168 ip += fastHashFillStep;
1193 ip += fastHashFillStep;
1169 }
1194 }
1170 }
1195 }
1171
1196
1172
1197
1173 FORCE_INLINE
1198 FORCE_INLINE
1174 void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
1199 void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
1175 const void* src, size_t srcSize,
1200 const void* src, size_t srcSize,
1176 const U32 mls)
1201 const U32 mls)
1177 {
1202 {
1178 U32* const hashLong = cctx->hashTable;
1203 U32* const hashLong = cctx->hashTable;
1179 const U32 hBitsL = cctx->params.cParams.hashLog;
1204 const U32 hBitsL = cctx->params.cParams.hashLog;
1180 U32* const hashSmall = cctx->chainTable;
1205 U32* const hashSmall = cctx->chainTable;
1181 const U32 hBitsS = cctx->params.cParams.chainLog;
1206 const U32 hBitsS = cctx->params.cParams.chainLog;
1182 seqStore_t* seqStorePtr = &(cctx->seqStore);
1207 seqStore_t* seqStorePtr = &(cctx->seqStore);
1183 const BYTE* const base = cctx->base;
1208 const BYTE* const base = cctx->base;
1184 const BYTE* const istart = (const BYTE*)src;
1209 const BYTE* const istart = (const BYTE*)src;
1185 const BYTE* ip = istart;
1210 const BYTE* ip = istart;
1186 const BYTE* anchor = istart;
1211 const BYTE* anchor = istart;
1187 const U32 lowestIndex = cctx->dictLimit;
1212 const U32 lowestIndex = cctx->dictLimit;
1188 const BYTE* const lowest = base + lowestIndex;
1213 const BYTE* const lowest = base + lowestIndex;
1189 const BYTE* const iend = istart + srcSize;
1214 const BYTE* const iend = istart + srcSize;
1190 const BYTE* const ilimit = iend - HASH_READ_SIZE;
1215 const BYTE* const ilimit = iend - HASH_READ_SIZE;
1191 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1216 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1192 U32 offsetSaved = 0;
1217 U32 offsetSaved = 0;
1193
1218
1194 /* init */
1219 /* init */
1195 ip += (ip==lowest);
1220 ip += (ip==lowest);
1196 { U32 const maxRep = (U32)(ip-lowest);
1221 { U32 const maxRep = (U32)(ip-lowest);
1197 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1222 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1198 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
1223 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
1199 }
1224 }
1200
1225
1201 /* Main Search Loop */
1226 /* Main Search Loop */
1202 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
1227 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
1203 size_t mLength;
1228 size_t mLength;
1204 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
1229 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
1205 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
1230 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
1206 U32 const current = (U32)(ip-base);
1231 U32 const current = (U32)(ip-base);
1207 U32 const matchIndexL = hashLong[h2];
1232 U32 const matchIndexL = hashLong[h2];
1208 U32 const matchIndexS = hashSmall[h];
1233 U32 const matchIndexS = hashSmall[h];
1209 const BYTE* matchLong = base + matchIndexL;
1234 const BYTE* matchLong = base + matchIndexL;
1210 const BYTE* match = base + matchIndexS;
1235 const BYTE* match = base + matchIndexS;
1211 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
1236 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
1212
1237
1213 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
1238 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
1214 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
1239 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
1215 ip++;
1240 ip++;
1216 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1241 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1217 } else {
1242 } else {
1218 U32 offset;
1243 U32 offset;
1219 if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
1244 if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
1220 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
1245 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
1221 offset = (U32)(ip-matchLong);
1246 offset = (U32)(ip-matchLong);
1222 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1247 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1223 } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
1248 } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
1224 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1249 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1225 U32 const matchIndex3 = hashLong[h3];
1250 U32 const matchIndex3 = hashLong[h3];
1226 const BYTE* match3 = base + matchIndex3;
1251 const BYTE* match3 = base + matchIndex3;
1227 hashLong[h3] = current + 1;
1252 hashLong[h3] = current + 1;
1228 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1253 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1229 mLength = ZSTD_count(ip+9, match3+8, iend) + 8;
1254 mLength = ZSTD_count(ip+9, match3+8, iend) + 8;
1230 ip++;
1255 ip++;
1231 offset = (U32)(ip-match3);
1256 offset = (U32)(ip-match3);
1232 while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1257 while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1233 } else {
1258 } else {
1234 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
1259 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
1235 offset = (U32)(ip-match);
1260 offset = (U32)(ip-match);
1236 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1261 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1237 }
1262 }
1238 } else {
1263 } else {
1239 ip += ((ip-anchor) >> g_searchStrength) + 1;
1264 ip += ((ip-anchor) >> g_searchStrength) + 1;
1240 continue;
1265 continue;
1241 }
1266 }
1242
1267
1243 offset_2 = offset_1;
1268 offset_2 = offset_1;
1244 offset_1 = offset;
1269 offset_1 = offset;
1245
1270
1246 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1271 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1247 }
1272 }
1248
1273
1249 /* match found */
1274 /* match found */
1250 ip += mLength;
1275 ip += mLength;
1251 anchor = ip;
1276 anchor = ip;
1252
1277
1253 if (ip <= ilimit) {
1278 if (ip <= ilimit) {
1254 /* Fill Table */
1279 /* Fill Table */
1255 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
1280 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
1256 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
1281 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
1257 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
1282 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
1258 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1283 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1259
1284
1260 /* check immediate repcode */
1285 /* check immediate repcode */
1261 while ( (ip <= ilimit)
1286 while ( (ip <= ilimit)
1262 && ( (offset_2>0)
1287 && ( (offset_2>0)
1263 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1288 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1264 /* store sequence */
1289 /* store sequence */
1265 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1290 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1266 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
1291 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
1267 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
1292 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
1268 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
1293 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
1269 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1294 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1270 ip += rLength;
1295 ip += rLength;
1271 anchor = ip;
1296 anchor = ip;
1272 continue; /* faster when present ... (?) */
1297 continue; /* faster when present ... (?) */
1273 } } }
1298 } } }
1274
1299
1275 /* save reps for next block */
1300 /* save reps for next block */
1276 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1301 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1277 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1302 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
1278
1303
1279 /* Last Literals */
1304 /* Last Literals */
1280 { size_t const lastLLSize = iend - anchor;
1305 { size_t const lastLLSize = iend - anchor;
1281 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1306 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1282 seqStorePtr->lit += lastLLSize;
1307 seqStorePtr->lit += lastLLSize;
1283 }
1308 }
1284 }
1309 }
1285
1310
1286
1311
1287 static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1312 static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1288 {
1313 {
1289 const U32 mls = ctx->params.cParams.searchLength;
1314 const U32 mls = ctx->params.cParams.searchLength;
1290 switch(mls)
1315 switch(mls)
1291 {
1316 {
1292 default:
1317 default:
1293 case 4 :
1318 case 4 :
1294 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
1319 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
1295 case 5 :
1320 case 5 :
1296 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
1321 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
1297 case 6 :
1322 case 6 :
1298 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
1323 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
1299 case 7 :
1324 case 7 :
1300 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
1325 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
1301 }
1326 }
1302 }
1327 }
1303
1328
1304
1329
1305 static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
1330 static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
1306 const void* src, size_t srcSize,
1331 const void* src, size_t srcSize,
1307 const U32 mls)
1332 const U32 mls)
1308 {
1333 {
1309 U32* const hashLong = ctx->hashTable;
1334 U32* const hashLong = ctx->hashTable;
1310 U32 const hBitsL = ctx->params.cParams.hashLog;
1335 U32 const hBitsL = ctx->params.cParams.hashLog;
1311 U32* const hashSmall = ctx->chainTable;
1336 U32* const hashSmall = ctx->chainTable;
1312 U32 const hBitsS = ctx->params.cParams.chainLog;
1337 U32 const hBitsS = ctx->params.cParams.chainLog;
1313 seqStore_t* seqStorePtr = &(ctx->seqStore);
1338 seqStore_t* seqStorePtr = &(ctx->seqStore);
1314 const BYTE* const base = ctx->base;
1339 const BYTE* const base = ctx->base;
1315 const BYTE* const dictBase = ctx->dictBase;
1340 const BYTE* const dictBase = ctx->dictBase;
1316 const BYTE* const istart = (const BYTE*)src;
1341 const BYTE* const istart = (const BYTE*)src;
1317 const BYTE* ip = istart;
1342 const BYTE* ip = istart;
1318 const BYTE* anchor = istart;
1343 const BYTE* anchor = istart;
1319 const U32 lowestIndex = ctx->lowLimit;
1344 const U32 lowestIndex = ctx->lowLimit;
1320 const BYTE* const dictStart = dictBase + lowestIndex;
1345 const BYTE* const dictStart = dictBase + lowestIndex;
1321 const U32 dictLimit = ctx->dictLimit;
1346 const U32 dictLimit = ctx->dictLimit;
1322 const BYTE* const lowPrefixPtr = base + dictLimit;
1347 const BYTE* const lowPrefixPtr = base + dictLimit;
1323 const BYTE* const dictEnd = dictBase + dictLimit;
1348 const BYTE* const dictEnd = dictBase + dictLimit;
1324 const BYTE* const iend = istart + srcSize;
1349 const BYTE* const iend = istart + srcSize;
1325 const BYTE* const ilimit = iend - 8;
1350 const BYTE* const ilimit = iend - 8;
1326 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1351 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1327
1352
1328 /* Search Loop */
1353 /* Search Loop */
1329 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1354 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1330 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
1355 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
1331 const U32 matchIndex = hashSmall[hSmall];
1356 const U32 matchIndex = hashSmall[hSmall];
1332 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1357 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1333 const BYTE* match = matchBase + matchIndex;
1358 const BYTE* match = matchBase + matchIndex;
1334
1359
1335 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
1360 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
1336 const U32 matchLongIndex = hashLong[hLong];
1361 const U32 matchLongIndex = hashLong[hLong];
1337 const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
1362 const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
1338 const BYTE* matchLong = matchLongBase + matchLongIndex;
1363 const BYTE* matchLong = matchLongBase + matchLongIndex;
1339
1364
1340 const U32 current = (U32)(ip-base);
1365 const U32 current = (U32)(ip-base);
1341 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1366 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1342 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1367 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1343 const BYTE* repMatch = repBase + repIndex;
1368 const BYTE* repMatch = repBase + repIndex;
1344 size_t mLength;
1369 size_t mLength;
1345 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
1370 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
1346
1371
1347 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1372 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1348 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1373 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1349 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1374 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1350 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
1375 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
1351 ip++;
1376 ip++;
1352 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1377 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1353 } else {
1378 } else {
1354 if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
1379 if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
1355 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
1380 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
1356 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
1381 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
1357 U32 offset;
1382 U32 offset;
1358 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
1383 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
1359 offset = current - matchLongIndex;
1384 offset = current - matchLongIndex;
1360 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1385 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1361 offset_2 = offset_1;
1386 offset_2 = offset_1;
1362 offset_1 = offset;
1387 offset_1 = offset;
1363 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1388 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1364
1389
1365 } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
1390 } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
1366 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1391 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1367 U32 const matchIndex3 = hashLong[h3];
1392 U32 const matchIndex3 = hashLong[h3];
1368 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
1393 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
1369 const BYTE* match3 = match3Base + matchIndex3;
1394 const BYTE* match3 = match3Base + matchIndex3;
1370 U32 offset;
1395 U32 offset;
1371 hashLong[h3] = current + 1;
1396 hashLong[h3] = current + 1;
1372 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1397 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1373 const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
1398 const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
1374 const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
1399 const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
1375 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
1400 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
1376 ip++;
1401 ip++;
1377 offset = current+1 - matchIndex3;
1402 offset = current+1 - matchIndex3;
1378 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1403 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1379 } else {
1404 } else {
1380 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1405 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1381 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1406 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1382 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
1407 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
1383 offset = current - matchIndex;
1408 offset = current - matchIndex;
1384 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1409 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1385 }
1410 }
1386 offset_2 = offset_1;
1411 offset_2 = offset_1;
1387 offset_1 = offset;
1412 offset_1 = offset;
1388 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1413 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1389
1414
1390 } else {
1415 } else {
1391 ip += ((ip-anchor) >> g_searchStrength) + 1;
1416 ip += ((ip-anchor) >> g_searchStrength) + 1;
1392 continue;
1417 continue;
1393 } }
1418 } }
1394
1419
1395 /* found a match : store it */
1420 /* found a match : store it */
1396 ip += mLength;
1421 ip += mLength;
1397 anchor = ip;
1422 anchor = ip;
1398
1423
1399 if (ip <= ilimit) {
1424 if (ip <= ilimit) {
1400 /* Fill Table */
1425 /* Fill Table */
1401 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
1426 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
1402 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
1427 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
1403 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1428 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1404 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
1429 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
1405 /* check immediate repcode */
1430 /* check immediate repcode */
1406 while (ip <= ilimit) {
1431 while (ip <= ilimit) {
1407 U32 const current2 = (U32)(ip-base);
1432 U32 const current2 = (U32)(ip-base);
1408 U32 const repIndex2 = current2 - offset_2;
1433 U32 const repIndex2 = current2 - offset_2;
1409 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1434 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1410 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1435 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1411 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1436 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1412 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1437 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1413 size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1438 size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1414 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1439 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1415 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1440 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1416 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
1441 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
1417 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
1442 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
1418 ip += repLength2;
1443 ip += repLength2;
1419 anchor = ip;
1444 anchor = ip;
1420 continue;
1445 continue;
1421 }
1446 }
1422 break;
1447 break;
1423 } } }
1448 } } }
1424
1449
1425 /* save reps for next block */
1450 /* save reps for next block */
1426 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1451 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1427
1452
1428 /* Last Literals */
1453 /* Last Literals */
1429 { size_t const lastLLSize = iend - anchor;
1454 { size_t const lastLLSize = iend - anchor;
1430 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1455 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1431 seqStorePtr->lit += lastLLSize;
1456 seqStorePtr->lit += lastLLSize;
1432 }
1457 }
1433 }
1458 }
1434
1459
1435
1460
1436 static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
1461 static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
1437 const void* src, size_t srcSize)
1462 const void* src, size_t srcSize)
1438 {
1463 {
1439 U32 const mls = ctx->params.cParams.searchLength;
1464 U32 const mls = ctx->params.cParams.searchLength;
1440 switch(mls)
1465 switch(mls)
1441 {
1466 {
1442 default:
1467 default:
1443 case 4 :
1468 case 4 :
1444 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
1469 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
1445 case 5 :
1470 case 5 :
1446 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
1471 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
1447 case 6 :
1472 case 6 :
1448 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
1473 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
1449 case 7 :
1474 case 7 :
1450 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
1475 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
1451 }
1476 }
1452 }
1477 }
1453
1478
1454
1479
1455 /*-*************************************
1480 /*-*************************************
1456 * Binary Tree search
1481 * Binary Tree search
1457 ***************************************/
1482 ***************************************/
1458 /** ZSTD_insertBt1() : add one or multiple positions to tree.
1483 /** ZSTD_insertBt1() : add one or multiple positions to tree.
1459 * ip : assumed <= iend-8 .
1484 * ip : assumed <= iend-8 .
1460 * @return : nb of positions added */
1485 * @return : nb of positions added */
1461 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
1486 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
1462 U32 extDict)
1487 U32 extDict)
1463 {
1488 {
1464 U32* const hashTable = zc->hashTable;
1489 U32* const hashTable = zc->hashTable;
1465 U32 const hashLog = zc->params.cParams.hashLog;
1490 U32 const hashLog = zc->params.cParams.hashLog;
1466 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
1491 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
1467 U32* const bt = zc->chainTable;
1492 U32* const bt = zc->chainTable;
1468 U32 const btLog = zc->params.cParams.chainLog - 1;
1493 U32 const btLog = zc->params.cParams.chainLog - 1;
1469 U32 const btMask = (1 << btLog) - 1;
1494 U32 const btMask = (1 << btLog) - 1;
1470 U32 matchIndex = hashTable[h];
1495 U32 matchIndex = hashTable[h];
1471 size_t commonLengthSmaller=0, commonLengthLarger=0;
1496 size_t commonLengthSmaller=0, commonLengthLarger=0;
1472 const BYTE* const base = zc->base;
1497 const BYTE* const base = zc->base;
1473 const BYTE* const dictBase = zc->dictBase;
1498 const BYTE* const dictBase = zc->dictBase;
1474 const U32 dictLimit = zc->dictLimit;
1499 const U32 dictLimit = zc->dictLimit;
1475 const BYTE* const dictEnd = dictBase + dictLimit;
1500 const BYTE* const dictEnd = dictBase + dictLimit;
1476 const BYTE* const prefixStart = base + dictLimit;
1501 const BYTE* const prefixStart = base + dictLimit;
1477 const BYTE* match;
1502 const BYTE* match;
1478 const U32 current = (U32)(ip-base);
1503 const U32 current = (U32)(ip-base);
1479 const U32 btLow = btMask >= current ? 0 : current - btMask;
1504 const U32 btLow = btMask >= current ? 0 : current - btMask;
1480 U32* smallerPtr = bt + 2*(current&btMask);
1505 U32* smallerPtr = bt + 2*(current&btMask);
1481 U32* largerPtr = smallerPtr + 1;
1506 U32* largerPtr = smallerPtr + 1;
1482 U32 dummy32; /* to be nullified at the end */
1507 U32 dummy32; /* to be nullified at the end */
1483 U32 const windowLow = zc->lowLimit;
1508 U32 const windowLow = zc->lowLimit;
1484 U32 matchEndIdx = current+8;
1509 U32 matchEndIdx = current+8;
1485 size_t bestLength = 8;
1510 size_t bestLength = 8;
1486 #ifdef ZSTD_C_PREDICT
1511 #ifdef ZSTD_C_PREDICT
1487 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
1512 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
1488 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
1513 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
1489 predictedSmall += (predictedSmall>0);
1514 predictedSmall += (predictedSmall>0);
1490 predictedLarge += (predictedLarge>0);
1515 predictedLarge += (predictedLarge>0);
1491 #endif /* ZSTD_C_PREDICT */
1516 #endif /* ZSTD_C_PREDICT */
1492
1517
1493 hashTable[h] = current; /* Update Hash Table */
1518 hashTable[h] = current; /* Update Hash Table */
1494
1519
1495 while (nbCompares-- && (matchIndex > windowLow)) {
1520 while (nbCompares-- && (matchIndex > windowLow)) {
1496 U32* const nextPtr = bt + 2*(matchIndex & btMask);
1521 U32* const nextPtr = bt + 2*(matchIndex & btMask);
1497 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1522 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1498
1523
1499 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
1524 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
1500 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
1525 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
1501 if (matchIndex == predictedSmall) {
1526 if (matchIndex == predictedSmall) {
1502 /* no need to check length, result known */
1527 /* no need to check length, result known */
1503 *smallerPtr = matchIndex;
1528 *smallerPtr = matchIndex;
1504 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1529 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1505 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1530 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1506 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1531 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1507 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
1532 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
1508 continue;
1533 continue;
1509 }
1534 }
1510 if (matchIndex == predictedLarge) {
1535 if (matchIndex == predictedLarge) {
1511 *largerPtr = matchIndex;
1536 *largerPtr = matchIndex;
1512 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1537 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1513 largerPtr = nextPtr;
1538 largerPtr = nextPtr;
1514 matchIndex = nextPtr[0];
1539 matchIndex = nextPtr[0];
1515 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
1540 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
1516 continue;
1541 continue;
1517 }
1542 }
1518 #endif
1543 #endif
1519 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1544 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1520 match = base + matchIndex;
1545 match = base + matchIndex;
1521 if (match[matchLength] == ip[matchLength])
1546 if (match[matchLength] == ip[matchLength])
1522 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1547 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1523 } else {
1548 } else {
1524 match = dictBase + matchIndex;
1549 match = dictBase + matchIndex;
1525 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1550 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1526 if (matchIndex+matchLength >= dictLimit)
1551 if (matchIndex+matchLength >= dictLimit)
1527 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
1552 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
1528 }
1553 }
1529
1554
1530 if (matchLength > bestLength) {
1555 if (matchLength > bestLength) {
1531 bestLength = matchLength;
1556 bestLength = matchLength;
1532 if (matchLength > matchEndIdx - matchIndex)
1557 if (matchLength > matchEndIdx - matchIndex)
1533 matchEndIdx = matchIndex + (U32)matchLength;
1558 matchEndIdx = matchIndex + (U32)matchLength;
1534 }
1559 }
1535
1560
1536 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
1561 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
1537 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
1562 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
1538
1563
1539 if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
1564 if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
1540 /* match is smaller than current */
1565 /* match is smaller than current */
1541 *smallerPtr = matchIndex; /* update smaller idx */
1566 *smallerPtr = matchIndex; /* update smaller idx */
1542 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
1567 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
1543 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1568 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1544 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1569 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1545 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1570 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1546 } else {
1571 } else {
1547 /* match is larger than current */
1572 /* match is larger than current */
1548 *largerPtr = matchIndex;
1573 *largerPtr = matchIndex;
1549 commonLengthLarger = matchLength;
1574 commonLengthLarger = matchLength;
1550 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1575 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1551 largerPtr = nextPtr;
1576 largerPtr = nextPtr;
1552 matchIndex = nextPtr[0];
1577 matchIndex = nextPtr[0];
1553 } }
1578 } }
1554
1579
1555 *smallerPtr = *largerPtr = 0;
1580 *smallerPtr = *largerPtr = 0;
1556 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
1581 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
1557 if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
1582 if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
1558 return 1;
1583 return 1;
1559 }
1584 }
1560
1585
1561
1586
1562 static size_t ZSTD_insertBtAndFindBestMatch (
1587 static size_t ZSTD_insertBtAndFindBestMatch (
1563 ZSTD_CCtx* zc,
1588 ZSTD_CCtx* zc,
1564 const BYTE* const ip, const BYTE* const iend,
1589 const BYTE* const ip, const BYTE* const iend,
1565 size_t* offsetPtr,
1590 size_t* offsetPtr,
1566 U32 nbCompares, const U32 mls,
1591 U32 nbCompares, const U32 mls,
1567 U32 extDict)
1592 U32 extDict)
1568 {
1593 {
1569 U32* const hashTable = zc->hashTable;
1594 U32* const hashTable = zc->hashTable;
1570 U32 const hashLog = zc->params.cParams.hashLog;
1595 U32 const hashLog = zc->params.cParams.hashLog;
1571 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
1596 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
1572 U32* const bt = zc->chainTable;
1597 U32* const bt = zc->chainTable;
1573 U32 const btLog = zc->params.cParams.chainLog - 1;
1598 U32 const btLog = zc->params.cParams.chainLog - 1;
1574 U32 const btMask = (1 << btLog) - 1;
1599 U32 const btMask = (1 << btLog) - 1;
1575 U32 matchIndex = hashTable[h];
1600 U32 matchIndex = hashTable[h];
1576 size_t commonLengthSmaller=0, commonLengthLarger=0;
1601 size_t commonLengthSmaller=0, commonLengthLarger=0;
1577 const BYTE* const base = zc->base;
1602 const BYTE* const base = zc->base;
1578 const BYTE* const dictBase = zc->dictBase;
1603 const BYTE* const dictBase = zc->dictBase;
1579 const U32 dictLimit = zc->dictLimit;
1604 const U32 dictLimit = zc->dictLimit;
1580 const BYTE* const dictEnd = dictBase + dictLimit;
1605 const BYTE* const dictEnd = dictBase + dictLimit;
1581 const BYTE* const prefixStart = base + dictLimit;
1606 const BYTE* const prefixStart = base + dictLimit;
1582 const U32 current = (U32)(ip-base);
1607 const U32 current = (U32)(ip-base);
1583 const U32 btLow = btMask >= current ? 0 : current - btMask;
1608 const U32 btLow = btMask >= current ? 0 : current - btMask;
1584 const U32 windowLow = zc->lowLimit;
1609 const U32 windowLow = zc->lowLimit;
1585 U32* smallerPtr = bt + 2*(current&btMask);
1610 U32* smallerPtr = bt + 2*(current&btMask);
1586 U32* largerPtr = bt + 2*(current&btMask) + 1;
1611 U32* largerPtr = bt + 2*(current&btMask) + 1;
1587 U32 matchEndIdx = current+8;
1612 U32 matchEndIdx = current+8;
1588 U32 dummy32; /* to be nullified at the end */
1613 U32 dummy32; /* to be nullified at the end */
1589 size_t bestLength = 0;
1614 size_t bestLength = 0;
1590
1615
1591 hashTable[h] = current; /* Update Hash Table */
1616 hashTable[h] = current; /* Update Hash Table */
1592
1617
1593 while (nbCompares-- && (matchIndex > windowLow)) {
1618 while (nbCompares-- && (matchIndex > windowLow)) {
1594 U32* const nextPtr = bt + 2*(matchIndex & btMask);
1619 U32* const nextPtr = bt + 2*(matchIndex & btMask);
1595 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1620 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1596 const BYTE* match;
1621 const BYTE* match;
1597
1622
1598 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1623 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1599 match = base + matchIndex;
1624 match = base + matchIndex;
1600 if (match[matchLength] == ip[matchLength])
1625 if (match[matchLength] == ip[matchLength])
1601 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1626 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1602 } else {
1627 } else {
1603 match = dictBase + matchIndex;
1628 match = dictBase + matchIndex;
1604 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1629 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1605 if (matchIndex+matchLength >= dictLimit)
1630 if (matchIndex+matchLength >= dictLimit)
1606 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
1631 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
1607 }
1632 }
1608
1633
1609 if (matchLength > bestLength) {
1634 if (matchLength > bestLength) {
1610 if (matchLength > matchEndIdx - matchIndex)
1635 if (matchLength > matchEndIdx - matchIndex)
1611 matchEndIdx = matchIndex + (U32)matchLength;
1636 matchEndIdx = matchIndex + (U32)matchLength;
1612 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
1637 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
1613 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
1638 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
1614 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
1639 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
1615 break; /* drop, to guarantee consistency (miss a little bit of compression) */
1640 break; /* drop, to guarantee consistency (miss a little bit of compression) */
1616 }
1641 }
1617
1642
1618 if (match[matchLength] < ip[matchLength]) {
1643 if (match[matchLength] < ip[matchLength]) {
1619 /* match is smaller than current */
1644 /* match is smaller than current */
1620 *smallerPtr = matchIndex; /* update smaller idx */
1645 *smallerPtr = matchIndex; /* update smaller idx */
1621 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
1646 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
1622 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1647 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1623 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1648 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1624 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1649 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1625 } else {
1650 } else {
1626 /* match is larger than current */
1651 /* match is larger than current */
1627 *largerPtr = matchIndex;
1652 *largerPtr = matchIndex;
1628 commonLengthLarger = matchLength;
1653 commonLengthLarger = matchLength;
1629 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1654 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1630 largerPtr = nextPtr;
1655 largerPtr = nextPtr;
1631 matchIndex = nextPtr[0];
1656 matchIndex = nextPtr[0];
1632 } }
1657 } }
1633
1658
1634 *smallerPtr = *largerPtr = 0;
1659 *smallerPtr = *largerPtr = 0;
1635
1660
1636 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
1661 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
1637 return bestLength;
1662 return bestLength;
1638 }
1663 }
1639
1664
1640
1665
1641 static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1666 static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1642 {
1667 {
1643 const BYTE* const base = zc->base;
1668 const BYTE* const base = zc->base;
1644 const U32 target = (U32)(ip - base);
1669 const U32 target = (U32)(ip - base);
1645 U32 idx = zc->nextToUpdate;
1670 U32 idx = zc->nextToUpdate;
1646
1671
1647 while(idx < target)
1672 while(idx < target)
1648 idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
1673 idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
1649 }
1674 }
1650
1675
1651 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
1676 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
1652 static size_t ZSTD_BtFindBestMatch (
1677 static size_t ZSTD_BtFindBestMatch (
1653 ZSTD_CCtx* zc,
1678 ZSTD_CCtx* zc,
1654 const BYTE* const ip, const BYTE* const iLimit,
1679 const BYTE* const ip, const BYTE* const iLimit,
1655 size_t* offsetPtr,
1680 size_t* offsetPtr,
1656 const U32 maxNbAttempts, const U32 mls)
1681 const U32 maxNbAttempts, const U32 mls)
1657 {
1682 {
1658 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
1683 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
1659 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
1684 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
1660 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
1685 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
1661 }
1686 }
1662
1687
1663
1688
1664 static size_t ZSTD_BtFindBestMatch_selectMLS (
1689 static size_t ZSTD_BtFindBestMatch_selectMLS (
1665 ZSTD_CCtx* zc, /* Index table will be updated */
1690 ZSTD_CCtx* zc, /* Index table will be updated */
1666 const BYTE* ip, const BYTE* const iLimit,
1691 const BYTE* ip, const BYTE* const iLimit,
1667 size_t* offsetPtr,
1692 size_t* offsetPtr,
1668 const U32 maxNbAttempts, const U32 matchLengthSearch)
1693 const U32 maxNbAttempts, const U32 matchLengthSearch)
1669 {
1694 {
1670 switch(matchLengthSearch)
1695 switch(matchLengthSearch)
1671 {
1696 {
1672 default :
1697 default :
1673 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1698 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1674 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1699 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1675 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1700 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1676 }
1701 }
1677 }
1702 }
1678
1703
1679
1704
1680 static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1705 static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1681 {
1706 {
1682 const BYTE* const base = zc->base;
1707 const BYTE* const base = zc->base;
1683 const U32 target = (U32)(ip - base);
1708 const U32 target = (U32)(ip - base);
1684 U32 idx = zc->nextToUpdate;
1709 U32 idx = zc->nextToUpdate;
1685
1710
1686 while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
1711 while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
1687 }
1712 }
1688
1713
1689
1714
1690 /** Tree updater, providing best match */
1715 /** Tree updater, providing best match */
1691 static size_t ZSTD_BtFindBestMatch_extDict (
1716 static size_t ZSTD_BtFindBestMatch_extDict (
1692 ZSTD_CCtx* zc,
1717 ZSTD_CCtx* zc,
1693 const BYTE* const ip, const BYTE* const iLimit,
1718 const BYTE* const ip, const BYTE* const iLimit,
1694 size_t* offsetPtr,
1719 size_t* offsetPtr,
1695 const U32 maxNbAttempts, const U32 mls)
1720 const U32 maxNbAttempts, const U32 mls)
1696 {
1721 {
1697 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
1722 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
1698 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
1723 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
1699 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
1724 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
1700 }
1725 }
1701
1726
1702
1727
1703 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
1728 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
1704 ZSTD_CCtx* zc, /* Index table will be updated */
1729 ZSTD_CCtx* zc, /* Index table will be updated */
1705 const BYTE* ip, const BYTE* const iLimit,
1730 const BYTE* ip, const BYTE* const iLimit,
1706 size_t* offsetPtr,
1731 size_t* offsetPtr,
1707 const U32 maxNbAttempts, const U32 matchLengthSearch)
1732 const U32 maxNbAttempts, const U32 matchLengthSearch)
1708 {
1733 {
1709 switch(matchLengthSearch)
1734 switch(matchLengthSearch)
1710 {
1735 {
1711 default :
1736 default :
1712 case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1737 case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1713 case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1738 case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1714 case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1739 case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1715 }
1740 }
1716 }
1741 }
1717
1742
1718
1743
1719
1744
1720 /* *********************************
1745 /* *********************************
1721 * Hash Chain
1746 * Hash Chain
1722 ***********************************/
1747 ***********************************/
1723 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
1748 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
1724
1749
1725 /* Update chains up to ip (excluded)
1750 /* Update chains up to ip (excluded)
1726 Assumption : always within prefix (ie. not within extDict) */
1751 Assumption : always within prefix (ie. not within extDict) */
1727 FORCE_INLINE
1752 FORCE_INLINE
1728 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
1753 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
1729 {
1754 {
1730 U32* const hashTable = zc->hashTable;
1755 U32* const hashTable = zc->hashTable;
1731 const U32 hashLog = zc->params.cParams.hashLog;
1756 const U32 hashLog = zc->params.cParams.hashLog;
1732 U32* const chainTable = zc->chainTable;
1757 U32* const chainTable = zc->chainTable;
1733 const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
1758 const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
1734 const BYTE* const base = zc->base;
1759 const BYTE* const base = zc->base;
1735 const U32 target = (U32)(ip - base);
1760 const U32 target = (U32)(ip - base);
1736 U32 idx = zc->nextToUpdate;
1761 U32 idx = zc->nextToUpdate;
1737
1762
1738 while(idx < target) { /* catch up */
1763 while(idx < target) { /* catch up */
1739 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
1764 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
1740 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
1765 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
1741 hashTable[h] = idx;
1766 hashTable[h] = idx;
1742 idx++;
1767 idx++;
1743 }
1768 }
1744
1769
1745 zc->nextToUpdate = target;
1770 zc->nextToUpdate = target;
1746 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
1771 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
1747 }
1772 }
1748
1773
1749
1774
1750
1775
1751 FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
1776 FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
1752 size_t ZSTD_HcFindBestMatch_generic (
1777 size_t ZSTD_HcFindBestMatch_generic (
1753 ZSTD_CCtx* zc, /* Index table will be updated */
1778 ZSTD_CCtx* zc, /* Index table will be updated */
1754 const BYTE* const ip, const BYTE* const iLimit,
1779 const BYTE* const ip, const BYTE* const iLimit,
1755 size_t* offsetPtr,
1780 size_t* offsetPtr,
1756 const U32 maxNbAttempts, const U32 mls, const U32 extDict)
1781 const U32 maxNbAttempts, const U32 mls, const U32 extDict)
1757 {
1782 {
1758 U32* const chainTable = zc->chainTable;
1783 U32* const chainTable = zc->chainTable;
1759 const U32 chainSize = (1 << zc->params.cParams.chainLog);
1784 const U32 chainSize = (1 << zc->params.cParams.chainLog);
1760 const U32 chainMask = chainSize-1;
1785 const U32 chainMask = chainSize-1;
1761 const BYTE* const base = zc->base;
1786 const BYTE* const base = zc->base;
1762 const BYTE* const dictBase = zc->dictBase;
1787 const BYTE* const dictBase = zc->dictBase;
1763 const U32 dictLimit = zc->dictLimit;
1788 const U32 dictLimit = zc->dictLimit;
1764 const BYTE* const prefixStart = base + dictLimit;
1789 const BYTE* const prefixStart = base + dictLimit;
1765 const BYTE* const dictEnd = dictBase + dictLimit;
1790 const BYTE* const dictEnd = dictBase + dictLimit;
1766 const U32 lowLimit = zc->lowLimit;
1791 const U32 lowLimit = zc->lowLimit;
1767 const U32 current = (U32)(ip-base);
1792 const U32 current = (U32)(ip-base);
1768 const U32 minChain = current > chainSize ? current - chainSize : 0;
1793 const U32 minChain = current > chainSize ? current - chainSize : 0;
1769 int nbAttempts=maxNbAttempts;
1794 int nbAttempts=maxNbAttempts;
1770 size_t ml=EQUAL_READ32-1;
1795 size_t ml=EQUAL_READ32-1;
1771
1796
1772 /* HC4 match finder */
1797 /* HC4 match finder */
1773 U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
1798 U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
1774
1799
1775 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
1800 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
1776 const BYTE* match;
1801 const BYTE* match;
1777 size_t currentMl=0;
1802 size_t currentMl=0;
1778 if ((!extDict) || matchIndex >= dictLimit) {
1803 if ((!extDict) || matchIndex >= dictLimit) {
1779 match = base + matchIndex;
1804 match = base + matchIndex;
1780 if (match[ml] == ip[ml]) /* potentially better */
1805 if (match[ml] == ip[ml]) /* potentially better */
1781 currentMl = ZSTD_count(ip, match, iLimit);
1806 currentMl = ZSTD_count(ip, match, iLimit);
1782 } else {
1807 } else {
1783 match = dictBase + matchIndex;
1808 match = dictBase + matchIndex;
1784 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
1809 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
1785 currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
1810 currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
1786 }
1811 }
1787
1812
1788 /* save best solution */
1813 /* save best solution */
1789 if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
1814 if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
1790
1815
1791 if (matchIndex <= minChain) break;
1816 if (matchIndex <= minChain) break;
1792 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
1817 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
1793 }
1818 }
1794
1819
1795 return ml;
1820 return ml;
1796 }
1821 }
1797
1822
1798
1823
1799 FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
1824 FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
1800 ZSTD_CCtx* zc,
1825 ZSTD_CCtx* zc,
1801 const BYTE* ip, const BYTE* const iLimit,
1826 const BYTE* ip, const BYTE* const iLimit,
1802 size_t* offsetPtr,
1827 size_t* offsetPtr,
1803 const U32 maxNbAttempts, const U32 matchLengthSearch)
1828 const U32 maxNbAttempts, const U32 matchLengthSearch)
1804 {
1829 {
1805 switch(matchLengthSearch)
1830 switch(matchLengthSearch)
1806 {
1831 {
1807 default :
1832 default :
1808 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
1833 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
1809 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
1834 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
1810 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
1835 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
1811 }
1836 }
1812 }
1837 }
1813
1838
1814
1839
1815 FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
1840 FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
1816 ZSTD_CCtx* zc,
1841 ZSTD_CCtx* zc,
1817 const BYTE* ip, const BYTE* const iLimit,
1842 const BYTE* ip, const BYTE* const iLimit,
1818 size_t* offsetPtr,
1843 size_t* offsetPtr,
1819 const U32 maxNbAttempts, const U32 matchLengthSearch)
1844 const U32 maxNbAttempts, const U32 matchLengthSearch)
1820 {
1845 {
1821 switch(matchLengthSearch)
1846 switch(matchLengthSearch)
1822 {
1847 {
1823 default :
1848 default :
1824 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
1849 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
1825 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
1850 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
1826 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
1851 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
1827 }
1852 }
1828 }
1853 }
1829
1854
1830
1855
1831 /* *******************************
1856 /* *******************************
1832 * Common parser - lazy strategy
1857 * Common parser - lazy strategy
1833 *********************************/
1858 *********************************/
1834 FORCE_INLINE
1859 FORCE_INLINE
1835 void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
1860 void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
1836 const void* src, size_t srcSize,
1861 const void* src, size_t srcSize,
1837 const U32 searchMethod, const U32 depth)
1862 const U32 searchMethod, const U32 depth)
1838 {
1863 {
1839 seqStore_t* seqStorePtr = &(ctx->seqStore);
1864 seqStore_t* seqStorePtr = &(ctx->seqStore);
1840 const BYTE* const istart = (const BYTE*)src;
1865 const BYTE* const istart = (const BYTE*)src;
1841 const BYTE* ip = istart;
1866 const BYTE* ip = istart;
1842 const BYTE* anchor = istart;
1867 const BYTE* anchor = istart;
1843 const BYTE* const iend = istart + srcSize;
1868 const BYTE* const iend = istart + srcSize;
1844 const BYTE* const ilimit = iend - 8;
1869 const BYTE* const ilimit = iend - 8;
1845 const BYTE* const base = ctx->base + ctx->dictLimit;
1870 const BYTE* const base = ctx->base + ctx->dictLimit;
1846
1871
1847 U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
1872 U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
1848 U32 const mls = ctx->params.cParams.searchLength;
1873 U32 const mls = ctx->params.cParams.searchLength;
1849
1874
1850 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
1875 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
1851 size_t* offsetPtr,
1876 size_t* offsetPtr,
1852 U32 maxNbAttempts, U32 matchLengthSearch);
1877 U32 maxNbAttempts, U32 matchLengthSearch);
1853 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
1878 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
1854 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
1879 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
1855
1880
1856 /* init */
1881 /* init */
1857 ip += (ip==base);
1882 ip += (ip==base);
1858 ctx->nextToUpdate3 = ctx->nextToUpdate;
1883 ctx->nextToUpdate3 = ctx->nextToUpdate;
1859 { U32 const maxRep = (U32)(ip-base);
1884 { U32 const maxRep = (U32)(ip-base);
1860 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
1885 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
1861 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1886 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1862 }
1887 }
1863
1888
1864 /* Match Loop */
1889 /* Match Loop */
1865 while (ip < ilimit) {
1890 while (ip < ilimit) {
1866 size_t matchLength=0;
1891 size_t matchLength=0;
1867 size_t offset=0;
1892 size_t offset=0;
1868 const BYTE* start=ip+1;
1893 const BYTE* start=ip+1;
1869
1894
1870 /* check repCode */
1895 /* check repCode */
1871 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
1896 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
1872 /* repcode : we take it */
1897 /* repcode : we take it */
1873 matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1898 matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1874 if (depth==0) goto _storeSequence;
1899 if (depth==0) goto _storeSequence;
1875 }
1900 }
1876
1901
1877 /* first search (depth 0) */
1902 /* first search (depth 0) */
1878 { size_t offsetFound = 99999999;
1903 { size_t offsetFound = 99999999;
1879 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
1904 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
1880 if (ml2 > matchLength)
1905 if (ml2 > matchLength)
1881 matchLength = ml2, start = ip, offset=offsetFound;
1906 matchLength = ml2, start = ip, offset=offsetFound;
1882 }
1907 }
1883
1908
1884 if (matchLength < EQUAL_READ32) {
1909 if (matchLength < EQUAL_READ32) {
1885 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
1910 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
1886 continue;
1911 continue;
1887 }
1912 }
1888
1913
1889 /* let's try to find a better solution */
1914 /* let's try to find a better solution */
1890 if (depth>=1)
1915 if (depth>=1)
1891 while (ip<ilimit) {
1916 while (ip<ilimit) {
1892 ip ++;
1917 ip ++;
1893 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1918 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1894 size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1919 size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1895 int const gain2 = (int)(mlRep * 3);
1920 int const gain2 = (int)(mlRep * 3);
1896 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1921 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1897 if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
1922 if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
1898 matchLength = mlRep, offset = 0, start = ip;
1923 matchLength = mlRep, offset = 0, start = ip;
1899 }
1924 }
1900 { size_t offset2=99999999;
1925 { size_t offset2=99999999;
1901 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1926 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1902 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1927 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1903 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1928 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1904 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1929 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1905 matchLength = ml2, offset = offset2, start = ip;
1930 matchLength = ml2, offset = offset2, start = ip;
1906 continue; /* search a better one */
1931 continue; /* search a better one */
1907 } }
1932 } }
1908
1933
1909 /* let's find an even better one */
1934 /* let's find an even better one */
1910 if ((depth==2) && (ip<ilimit)) {
1935 if ((depth==2) && (ip<ilimit)) {
1911 ip ++;
1936 ip ++;
1912 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1937 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1913 size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1938 size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1914 int const gain2 = (int)(ml2 * 4);
1939 int const gain2 = (int)(ml2 * 4);
1915 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1940 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1916 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
1941 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
1917 matchLength = ml2, offset = 0, start = ip;
1942 matchLength = ml2, offset = 0, start = ip;
1918 }
1943 }
1919 { size_t offset2=99999999;
1944 { size_t offset2=99999999;
1920 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1945 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1921 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1946 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1922 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1947 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1923 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1948 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1924 matchLength = ml2, offset = offset2, start = ip;
1949 matchLength = ml2, offset = offset2, start = ip;
1925 continue;
1950 continue;
1926 } } }
1951 } } }
1927 break; /* nothing found : store previous solution */
1952 break; /* nothing found : store previous solution */
1928 }
1953 }
1929
1954
1930 /* catch up */
1955 /* catch up */
1931 if (offset) {
1956 if (offset) {
1932 while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */
1957 while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */
1933 { start--; matchLength++; }
1958 { start--; matchLength++; }
1934 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1959 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1935 }
1960 }
1936
1961
1937 /* store sequence */
1962 /* store sequence */
1938 _storeSequence:
1963 _storeSequence:
1939 { size_t const litLength = start - anchor;
1964 { size_t const litLength = start - anchor;
1940 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1965 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1941 anchor = ip = start + matchLength;
1966 anchor = ip = start + matchLength;
1942 }
1967 }
1943
1968
1944 /* check immediate repcode */
1969 /* check immediate repcode */
1945 while ( (ip <= ilimit)
1970 while ( (ip <= ilimit)
1946 && ((offset_2>0)
1971 && ((offset_2>0)
1947 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1972 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1948 /* store sequence */
1973 /* store sequence */
1949 matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
1974 matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
1950 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
1975 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
1951 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
1976 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
1952 ip += matchLength;
1977 ip += matchLength;
1953 anchor = ip;
1978 anchor = ip;
1954 continue; /* faster when present ... (?) */
1979 continue; /* faster when present ... (?) */
1955 } }
1980 } }
1956
1981
1957 /* Save reps for next block */
1982 /* Save reps for next block */
1958 ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
1983 ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
1959 ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
1984 ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
1960
1985
1961 /* Last Literals */
1986 /* Last Literals */
1962 { size_t const lastLLSize = iend - anchor;
1987 { size_t const lastLLSize = iend - anchor;
1963 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1988 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1964 seqStorePtr->lit += lastLLSize;
1989 seqStorePtr->lit += lastLLSize;
1965 }
1990 }
1966 }
1991 }
1967
1992
1968
1993
1969 static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1994 static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1970 {
1995 {
1971 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
1996 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
1972 }
1997 }
1973
1998
1974 static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1999 static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1975 {
2000 {
1976 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
2001 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
1977 }
2002 }
1978
2003
1979 static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2004 static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1980 {
2005 {
1981 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
2006 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
1982 }
2007 }
1983
2008
1984 static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2009 static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1985 {
2010 {
1986 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
2011 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
1987 }
2012 }
1988
2013
1989
2014
1990 FORCE_INLINE
2015 FORCE_INLINE
1991 void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
2016 void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
1992 const void* src, size_t srcSize,
2017 const void* src, size_t srcSize,
1993 const U32 searchMethod, const U32 depth)
2018 const U32 searchMethod, const U32 depth)
1994 {
2019 {
1995 seqStore_t* seqStorePtr = &(ctx->seqStore);
2020 seqStore_t* seqStorePtr = &(ctx->seqStore);
1996 const BYTE* const istart = (const BYTE*)src;
2021 const BYTE* const istart = (const BYTE*)src;
1997 const BYTE* ip = istart;
2022 const BYTE* ip = istart;
1998 const BYTE* anchor = istart;
2023 const BYTE* anchor = istart;
1999 const BYTE* const iend = istart + srcSize;
2024 const BYTE* const iend = istart + srcSize;
2000 const BYTE* const ilimit = iend - 8;
2025 const BYTE* const ilimit = iend - 8;
2001 const BYTE* const base = ctx->base;
2026 const BYTE* const base = ctx->base;
2002 const U32 dictLimit = ctx->dictLimit;
2027 const U32 dictLimit = ctx->dictLimit;
2003 const U32 lowestIndex = ctx->lowLimit;
2028 const U32 lowestIndex = ctx->lowLimit;
2004 const BYTE* const prefixStart = base + dictLimit;
2029 const BYTE* const prefixStart = base + dictLimit;
2005 const BYTE* const dictBase = ctx->dictBase;
2030 const BYTE* const dictBase = ctx->dictBase;
2006 const BYTE* const dictEnd = dictBase + dictLimit;
2031 const BYTE* const dictEnd = dictBase + dictLimit;
2007 const BYTE* const dictStart = dictBase + ctx->lowLimit;
2032 const BYTE* const dictStart = dictBase + ctx->lowLimit;
2008
2033
2009 const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
2034 const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
2010 const U32 mls = ctx->params.cParams.searchLength;
2035 const U32 mls = ctx->params.cParams.searchLength;
2011
2036
2012 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2037 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2013 size_t* offsetPtr,
2038 size_t* offsetPtr,
2014 U32 maxNbAttempts, U32 matchLengthSearch);
2039 U32 maxNbAttempts, U32 matchLengthSearch);
2015 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
2040 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
2016
2041
2017 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
2042 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
2018
2043
2019 /* init */
2044 /* init */
2020 ctx->nextToUpdate3 = ctx->nextToUpdate;
2045 ctx->nextToUpdate3 = ctx->nextToUpdate;
2021 ip += (ip == prefixStart);
2046 ip += (ip == prefixStart);
2022
2047
2023 /* Match Loop */
2048 /* Match Loop */
2024 while (ip < ilimit) {
2049 while (ip < ilimit) {
2025 size_t matchLength=0;
2050 size_t matchLength=0;
2026 size_t offset=0;
2051 size_t offset=0;
2027 const BYTE* start=ip+1;
2052 const BYTE* start=ip+1;
2028 U32 current = (U32)(ip-base);
2053 U32 current = (U32)(ip-base);
2029
2054
2030 /* check repCode */
2055 /* check repCode */
2031 { const U32 repIndex = (U32)(current+1 - offset_1);
2056 { const U32 repIndex = (U32)(current+1 - offset_1);
2032 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2057 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2033 const BYTE* const repMatch = repBase + repIndex;
2058 const BYTE* const repMatch = repBase + repIndex;
2034 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2059 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2035 if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
2060 if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
2036 /* repcode detected we should take it */
2061 /* repcode detected we should take it */
2037 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2062 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2038 matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2063 matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2039 if (depth==0) goto _storeSequence;
2064 if (depth==0) goto _storeSequence;
2040 } }
2065 } }
2041
2066
2042 /* first search (depth 0) */
2067 /* first search (depth 0) */
2043 { size_t offsetFound = 99999999;
2068 { size_t offsetFound = 99999999;
2044 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
2069 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
2045 if (ml2 > matchLength)
2070 if (ml2 > matchLength)
2046 matchLength = ml2, start = ip, offset=offsetFound;
2071 matchLength = ml2, start = ip, offset=offsetFound;
2047 }
2072 }
2048
2073
2049 if (matchLength < EQUAL_READ32) {
2074 if (matchLength < EQUAL_READ32) {
2050 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
2075 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
2051 continue;
2076 continue;
2052 }
2077 }
2053
2078
2054 /* let's try to find a better solution */
2079 /* let's try to find a better solution */
2055 if (depth>=1)
2080 if (depth>=1)
2056 while (ip<ilimit) {
2081 while (ip<ilimit) {
2057 ip ++;
2082 ip ++;
2058 current++;
2083 current++;
2059 /* check repCode */
2084 /* check repCode */
2060 if (offset) {
2085 if (offset) {
2061 const U32 repIndex = (U32)(current - offset_1);
2086 const U32 repIndex = (U32)(current - offset_1);
2062 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2087 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2063 const BYTE* const repMatch = repBase + repIndex;
2088 const BYTE* const repMatch = repBase + repIndex;
2064 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2089 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2065 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2090 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2066 /* repcode detected */
2091 /* repcode detected */
2067 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2092 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2068 size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2093 size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2069 int const gain2 = (int)(repLength * 3);
2094 int const gain2 = (int)(repLength * 3);
2070 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
2095 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
2071 if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2096 if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2072 matchLength = repLength, offset = 0, start = ip;
2097 matchLength = repLength, offset = 0, start = ip;
2073 } }
2098 } }
2074
2099
2075 /* search match, depth 1 */
2100 /* search match, depth 1 */
2076 { size_t offset2=99999999;
2101 { size_t offset2=99999999;
2077 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2102 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2078 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2103 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2079 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
2104 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
2080 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2105 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2081 matchLength = ml2, offset = offset2, start = ip;
2106 matchLength = ml2, offset = offset2, start = ip;
2082 continue; /* search a better one */
2107 continue; /* search a better one */
2083 } }
2108 } }
2084
2109
2085 /* let's find an even better one */
2110 /* let's find an even better one */
2086 if ((depth==2) && (ip<ilimit)) {
2111 if ((depth==2) && (ip<ilimit)) {
2087 ip ++;
2112 ip ++;
2088 current++;
2113 current++;
2089 /* check repCode */
2114 /* check repCode */
2090 if (offset) {
2115 if (offset) {
2091 const U32 repIndex = (U32)(current - offset_1);
2116 const U32 repIndex = (U32)(current - offset_1);
2092 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2117 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2093 const BYTE* const repMatch = repBase + repIndex;
2118 const BYTE* const repMatch = repBase + repIndex;
2094 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2119 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2095 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2120 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2096 /* repcode detected */
2121 /* repcode detected */
2097 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2122 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2098 size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2123 size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2099 int gain2 = (int)(repLength * 4);
2124 int gain2 = (int)(repLength * 4);
2100 int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
2125 int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
2101 if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2126 if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2102 matchLength = repLength, offset = 0, start = ip;
2127 matchLength = repLength, offset = 0, start = ip;
2103 } }
2128 } }
2104
2129
2105 /* search match, depth 2 */
2130 /* search match, depth 2 */
2106 { size_t offset2=99999999;
2131 { size_t offset2=99999999;
2107 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2132 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2108 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2133 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2109 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
2134 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
2110 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2135 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2111 matchLength = ml2, offset = offset2, start = ip;
2136 matchLength = ml2, offset = offset2, start = ip;
2112 continue;
2137 continue;
2113 } } }
2138 } } }
2114 break; /* nothing found : store previous solution */
2139 break; /* nothing found : store previous solution */
2115 }
2140 }
2116
2141
2117 /* catch up */
2142 /* catch up */
2118 if (offset) {
2143 if (offset) {
2119 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
2144 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
2120 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2145 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2121 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
2146 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
2122 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
2147 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
2123 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2148 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2124 }
2149 }
2125
2150
2126 /* store sequence */
2151 /* store sequence */
2127 _storeSequence:
2152 _storeSequence:
2128 { size_t const litLength = start - anchor;
2153 { size_t const litLength = start - anchor;
2129 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
2154 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
2130 anchor = ip = start + matchLength;
2155 anchor = ip = start + matchLength;
2131 }
2156 }
2132
2157
2133 /* check immediate repcode */
2158 /* check immediate repcode */
2134 while (ip <= ilimit) {
2159 while (ip <= ilimit) {
2135 const U32 repIndex = (U32)((ip-base) - offset_2);
2160 const U32 repIndex = (U32)((ip-base) - offset_2);
2136 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2161 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2137 const BYTE* const repMatch = repBase + repIndex;
2162 const BYTE* const repMatch = repBase + repIndex;
2138 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2163 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2139 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2164 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2140 /* repcode detected we should take it */
2165 /* repcode detected we should take it */
2141 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2166 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2142 matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2167 matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2143 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
2168 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
2144 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2169 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2145 ip += matchLength;
2170 ip += matchLength;
2146 anchor = ip;
2171 anchor = ip;
2147 continue; /* faster when present ... (?) */
2172 continue; /* faster when present ... (?) */
2148 }
2173 }
2149 break;
2174 break;
2150 } }
2175 } }
2151
2176
2152 /* Save reps for next block */
2177 /* Save reps for next block */
2153 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
2178 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
2154
2179
2155 /* Last Literals */
2180 /* Last Literals */
2156 { size_t const lastLLSize = iend - anchor;
2181 { size_t const lastLLSize = iend - anchor;
2157 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2182 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2158 seqStorePtr->lit += lastLLSize;
2183 seqStorePtr->lit += lastLLSize;
2159 }
2184 }
2160 }
2185 }
2161
2186
2162
2187
2163 void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2188 void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2164 {
2189 {
2165 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
2190 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
2166 }
2191 }
2167
2192
2168 static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2193 static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2169 {
2194 {
2170 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
2195 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
2171 }
2196 }
2172
2197
2173 static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2198 static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2174 {
2199 {
2175 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
2200 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
2176 }
2201 }
2177
2202
2178 static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2203 static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2179 {
2204 {
2180 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
2205 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
2181 }
2206 }
2182
2207
2183
2208
2184 /* The optimal parser */
2209 /* The optimal parser */
2185 #include "zstd_opt.h"
2210 #include "zstd_opt.h"
2186
2211
2187 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2212 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2188 {
2213 {
2189 #ifdef ZSTD_OPT_H_91842398743
2214 #ifdef ZSTD_OPT_H_91842398743
2190 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
2215 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
2191 #else
2216 #else
2192 (void)ctx; (void)src; (void)srcSize;
2217 (void)ctx; (void)src; (void)srcSize;
2193 return;
2218 return;
2194 #endif
2219 #endif
2195 }
2220 }
2196
2221
2197 static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2222 static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2198 {
2223 {
2199 #ifdef ZSTD_OPT_H_91842398743
2224 #ifdef ZSTD_OPT_H_91842398743
2200 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
2225 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
2201 #else
2226 #else
2202 (void)ctx; (void)src; (void)srcSize;
2227 (void)ctx; (void)src; (void)srcSize;
2203 return;
2228 return;
2204 #endif
2229 #endif
2205 }
2230 }
2206
2231
2207 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2232 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2208 {
2233 {
2209 #ifdef ZSTD_OPT_H_91842398743
2234 #ifdef ZSTD_OPT_H_91842398743
2210 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
2235 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
2211 #else
2236 #else
2212 (void)ctx; (void)src; (void)srcSize;
2237 (void)ctx; (void)src; (void)srcSize;
2213 return;
2238 return;
2214 #endif
2239 #endif
2215 }
2240 }
2216
2241
2217 static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2242 static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2218 {
2243 {
2219 #ifdef ZSTD_OPT_H_91842398743
2244 #ifdef ZSTD_OPT_H_91842398743
2220 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
2245 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
2221 #else
2246 #else
2222 (void)ctx; (void)src; (void)srcSize;
2247 (void)ctx; (void)src; (void)srcSize;
2223 return;
2248 return;
2224 #endif
2249 #endif
2225 }
2250 }
2226
2251
2227
2252
2228 typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
2253 typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
2229
2254
2230 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2255 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2231 {
2256 {
2232 static const ZSTD_blockCompressor blockCompressor[2][8] = {
2257 static const ZSTD_blockCompressor blockCompressor[2][8] = {
2233 { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 },
2258 { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 },
2234 { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict }
2259 { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict }
2235 };
2260 };
2236
2261
2237 return blockCompressor[extDict][(U32)strat];
2262 return blockCompressor[extDict][(U32)strat];
2238 }
2263 }
2239
2264
2240
2265
2241 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2266 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2242 {
2267 {
2243 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
2268 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
2244 const BYTE* const base = zc->base;
2269 const BYTE* const base = zc->base;
2245 const BYTE* const istart = (const BYTE*)src;
2270 const BYTE* const istart = (const BYTE*)src;
2246 const U32 current = (U32)(istart-base);
2271 const U32 current = (U32)(istart-base);
2247 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
2272 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
2248 ZSTD_resetSeqStore(&(zc->seqStore));
2273 ZSTD_resetSeqStore(&(zc->seqStore));
2249 if (current > zc->nextToUpdate + 384)
2274 if (current > zc->nextToUpdate + 384)
2250 zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
2275 zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
2251 blockCompressor(zc, src, srcSize);
2276 blockCompressor(zc, src, srcSize);
2252 return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
2277 return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
2253 }
2278 }
2254
2279
2255
2280
2256 /*! ZSTD_compress_generic() :
2281 /*! ZSTD_compress_generic() :
2257 * Compress a chunk of data into one or multiple blocks.
2282 * Compress a chunk of data into one or multiple blocks.
2258 * All blocks will be terminated, all input will be consumed.
2283 * All blocks will be terminated, all input will be consumed.
2259 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2284 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2260 * Frame is supposed already started (header already produced)
2285 * Frame is supposed already started (header already produced)
2261 * @return : compressed size, or an error code
2286 * @return : compressed size, or an error code
2262 */
2287 */
2263 static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
2288 static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
2264 void* dst, size_t dstCapacity,
2289 void* dst, size_t dstCapacity,
2265 const void* src, size_t srcSize,
2290 const void* src, size_t srcSize,
2266 U32 lastFrameChunk)
2291 U32 lastFrameChunk)
2267 {
2292 {
2268 size_t blockSize = cctx->blockSize;
2293 size_t blockSize = cctx->blockSize;
2269 size_t remaining = srcSize;
2294 size_t remaining = srcSize;
2270 const BYTE* ip = (const BYTE*)src;
2295 const BYTE* ip = (const BYTE*)src;
2271 BYTE* const ostart = (BYTE*)dst;
2296 BYTE* const ostart = (BYTE*)dst;
2272 BYTE* op = ostart;
2297 BYTE* op = ostart;
2273 U32 const maxDist = 1 << cctx->params.cParams.windowLog;
2298 U32 const maxDist = 1 << cctx->params.cParams.windowLog;
2274
2299
2275 if (cctx->params.fParams.checksumFlag && srcSize)
2300 if (cctx->params.fParams.checksumFlag && srcSize)
2276 XXH64_update(&cctx->xxhState, src, srcSize);
2301 XXH64_update(&cctx->xxhState, src, srcSize);
2277
2302
2278 while (remaining) {
2303 while (remaining) {
2279 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2304 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2280 size_t cSize;
2305 size_t cSize;
2281
2306
2282 if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
2307 if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
2283 if (remaining < blockSize) blockSize = remaining;
2308 if (remaining < blockSize) blockSize = remaining;
2284
2309
2285 /* preemptive overflow correction */
2310 /* preemptive overflow correction */
2286 if (cctx->lowLimit > (2U<<30)) {
2311 if (cctx->lowLimit > (2U<<30)) {
2287 U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
2312 U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
2288 U32 const current = (U32)(ip - cctx->base);
2313 U32 const current = (U32)(ip - cctx->base);
2289 U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
2314 U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
2290 U32 const correction = current - newCurrent;
2315 U32 const correction = current - newCurrent;
2291 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
2316 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
2292 ZSTD_reduceIndex(cctx, correction);
2317 ZSTD_reduceIndex(cctx, correction);
2293 cctx->base += correction;
2318 cctx->base += correction;
2294 cctx->dictBase += correction;
2319 cctx->dictBase += correction;
2295 cctx->lowLimit -= correction;
2320 cctx->lowLimit -= correction;
2296 cctx->dictLimit -= correction;
2321 cctx->dictLimit -= correction;
2297 if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
2322 if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
2298 else cctx->nextToUpdate -= correction;
2323 else cctx->nextToUpdate -= correction;
2299 }
2324 }
2300
2325
2301 if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
2326 if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
2302 /* enforce maxDist */
2327 /* enforce maxDist */
2303 U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
2328 U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
2304 if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
2329 if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
2305 if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
2330 if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
2306 }
2331 }
2307
2332
2308 cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
2333 cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
2309 if (ZSTD_isError(cSize)) return cSize;
2334 if (ZSTD_isError(cSize)) return cSize;
2310
2335
2311 if (cSize == 0) { /* block is not compressible */
2336 if (cSize == 0) { /* block is not compressible */
2312 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2337 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2313 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2338 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2314 MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
2339 MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
2315 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2340 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2316 cSize = ZSTD_blockHeaderSize+blockSize;
2341 cSize = ZSTD_blockHeaderSize+blockSize;
2317 } else {
2342 } else {
2318 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2343 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2319 MEM_writeLE24(op, cBlockHeader24);
2344 MEM_writeLE24(op, cBlockHeader24);
2320 cSize += ZSTD_blockHeaderSize;
2345 cSize += ZSTD_blockHeaderSize;
2321 }
2346 }
2322
2347
2323 remaining -= blockSize;
2348 remaining -= blockSize;
2324 dstCapacity -= cSize;
2349 dstCapacity -= cSize;
2325 ip += blockSize;
2350 ip += blockSize;
2326 op += cSize;
2351 op += cSize;
2327 }
2352 }
2328
2353
2329 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2354 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2330 return op-ostart;
2355 return op-ostart;
2331 }
2356 }
2332
2357
2333
2358
2334 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2359 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2335 ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
2360 ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
2336 { BYTE* const op = (BYTE*)dst;
2361 { BYTE* const op = (BYTE*)dst;
2337 U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2362 U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2338 U32 const checksumFlag = params.fParams.checksumFlag>0;
2363 U32 const checksumFlag = params.fParams.checksumFlag>0;
2339 U32 const windowSize = 1U << params.cParams.windowLog;
2364 U32 const windowSize = 1U << params.cParams.windowLog;
2340 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
2365 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
2341 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2366 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2342 U32 const fcsCode = params.fParams.contentSizeFlag ?
2367 U32 const fcsCode = params.fParams.contentSizeFlag ?
2343 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
2368 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
2344 0;
2369 0;
2345 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2370 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2346 size_t pos;
2371 size_t pos;
2347
2372
2348 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2373 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2349
2374
2350 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2375 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2351 op[4] = frameHeaderDecriptionByte; pos=5;
2376 op[4] = frameHeaderDecriptionByte; pos=5;
2352 if (!singleSegment) op[pos++] = windowLogByte;
2377 if (!singleSegment) op[pos++] = windowLogByte;
2353 switch(dictIDSizeCode)
2378 switch(dictIDSizeCode)
2354 {
2379 {
2355 default: /* impossible */
2380 default: /* impossible */
2356 case 0 : break;
2381 case 0 : break;
2357 case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2382 case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2358 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2383 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2359 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2384 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2360 }
2385 }
2361 switch(fcsCode)
2386 switch(fcsCode)
2362 {
2387 {
2363 default: /* impossible */
2388 default: /* impossible */
2364 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
2389 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
2365 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2390 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2366 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2391 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2367 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2392 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2368 }
2393 }
2369 return pos;
2394 return pos;
2370 }
2395 }
2371
2396
2372
2397
2373 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2398 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2374 void* dst, size_t dstCapacity,
2399 void* dst, size_t dstCapacity,
2375 const void* src, size_t srcSize,
2400 const void* src, size_t srcSize,
2376 U32 frame, U32 lastFrameChunk)
2401 U32 frame, U32 lastFrameChunk)
2377 {
2402 {
2378 const BYTE* const ip = (const BYTE*) src;
2403 const BYTE* const ip = (const BYTE*) src;
2379 size_t fhSize = 0;
2404 size_t fhSize = 0;
2380
2405
2381 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
2406 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
2382
2407
2383 if (frame && (cctx->stage==ZSTDcs_init)) {
2408 if (frame && (cctx->stage==ZSTDcs_init)) {
2384 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
2409 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
2385 if (ZSTD_isError(fhSize)) return fhSize;
2410 if (ZSTD_isError(fhSize)) return fhSize;
2386 dstCapacity -= fhSize;
2411 dstCapacity -= fhSize;
2387 dst = (char*)dst + fhSize;
2412 dst = (char*)dst + fhSize;
2388 cctx->stage = ZSTDcs_ongoing;
2413 cctx->stage = ZSTDcs_ongoing;
2389 }
2414 }
2390
2415
2391 /* Check if blocks follow each other */
2416 /* Check if blocks follow each other */
2392 if (src != cctx->nextSrc) {
2417 if (src != cctx->nextSrc) {
2393 /* not contiguous */
2418 /* not contiguous */
2394 ptrdiff_t const delta = cctx->nextSrc - ip;
2419 ptrdiff_t const delta = cctx->nextSrc - ip;
2395 cctx->lowLimit = cctx->dictLimit;
2420 cctx->lowLimit = cctx->dictLimit;
2396 cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
2421 cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
2397 cctx->dictBase = cctx->base;
2422 cctx->dictBase = cctx->base;
2398 cctx->base -= delta;
2423 cctx->base -= delta;
2399 cctx->nextToUpdate = cctx->dictLimit;
2424 cctx->nextToUpdate = cctx->dictLimit;
2400 if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
2425 if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
2401 }
2426 }
2402
2427
2403 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
2428 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
2404 if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
2429 if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
2405 ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
2430 ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
2406 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
2431 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
2407 cctx->lowLimit = lowLimitMax;
2432 cctx->lowLimit = lowLimitMax;
2408 }
2433 }
2409
2434
2410 cctx->nextSrc = ip + srcSize;
2435 cctx->nextSrc = ip + srcSize;
2411
2436
2412 { size_t const cSize = frame ?
2437 if (srcSize) {
2438 size_t const cSize = frame ?
2413 ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2439 ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2414 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2440 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2415 if (ZSTD_isError(cSize)) return cSize;
2441 if (ZSTD_isError(cSize)) return cSize;
2416 return cSize + fhSize;
2442 return cSize + fhSize;
2417 }
2443 } else
2444 return fhSize;
2418 }
2445 }
2419
2446
2420
2447
2421 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
2448 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
2422 void* dst, size_t dstCapacity,
2449 void* dst, size_t dstCapacity,
2423 const void* src, size_t srcSize)
2450 const void* src, size_t srcSize)
2424 {
2451 {
2425 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
2452 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
2426 }
2453 }
2427
2454
2428
2455
2429 size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx)
2456 size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx)
2430 {
2457 {
2431 return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog);
2458 return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog);
2432 }
2459 }
2433
2460
2434 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2461 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2435 {
2462 {
2436 size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
2463 size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
2437 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
2464 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
2438 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
2465 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
2439 }
2466 }
2440
2467
2441
2468
2442 static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2469 static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2443 {
2470 {
2444 const BYTE* const ip = (const BYTE*) src;
2471 const BYTE* const ip = (const BYTE*) src;
2445 const BYTE* const iend = ip + srcSize;
2472 const BYTE* const iend = ip + srcSize;
2446
2473
2447 /* input becomes current prefix */
2474 /* input becomes current prefix */
2448 zc->lowLimit = zc->dictLimit;
2475 zc->lowLimit = zc->dictLimit;
2449 zc->dictLimit = (U32)(zc->nextSrc - zc->base);
2476 zc->dictLimit = (U32)(zc->nextSrc - zc->base);
2450 zc->dictBase = zc->base;
2477 zc->dictBase = zc->base;
2451 zc->base += ip - zc->nextSrc;
2478 zc->base += ip - zc->nextSrc;
2452 zc->nextToUpdate = zc->dictLimit;
2479 zc->nextToUpdate = zc->dictLimit;
2453 zc->loadedDictEnd = (U32)(iend - zc->base);
2480 zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
2454
2481
2455 zc->nextSrc = iend;
2482 zc->nextSrc = iend;
2456 if (srcSize <= HASH_READ_SIZE) return 0;
2483 if (srcSize <= HASH_READ_SIZE) return 0;
2457
2484
2458 switch(zc->params.cParams.strategy)
2485 switch(zc->params.cParams.strategy)
2459 {
2486 {
2460 case ZSTD_fast:
2487 case ZSTD_fast:
2461 ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
2488 ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
2462 break;
2489 break;
2463
2490
2464 case ZSTD_dfast:
2491 case ZSTD_dfast:
2465 ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
2492 ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
2466 break;
2493 break;
2467
2494
2468 case ZSTD_greedy:
2495 case ZSTD_greedy:
2469 case ZSTD_lazy:
2496 case ZSTD_lazy:
2470 case ZSTD_lazy2:
2497 case ZSTD_lazy2:
2471 ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
2498 ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
2472 break;
2499 break;
2473
2500
2474 case ZSTD_btlazy2:
2501 case ZSTD_btlazy2:
2475 case ZSTD_btopt:
2502 case ZSTD_btopt:
2476 case ZSTD_btopt2:
2503 case ZSTD_btopt2:
2477 ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
2504 ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
2478 break;
2505 break;
2479
2506
2480 default:
2507 default:
2481 return ERROR(GENERIC); /* strategy doesn't exist; impossible */
2508 return ERROR(GENERIC); /* strategy doesn't exist; impossible */
2482 }
2509 }
2483
2510
2484 zc->nextToUpdate = zc->loadedDictEnd;
2511 zc->nextToUpdate = zc->loadedDictEnd;
2485 return 0;
2512 return 0;
2486 }
2513 }
2487
2514
2488
2515
2489 /* Dictionaries that assign zero probability to symbols that show up causes problems
2516 /* Dictionaries that assign zero probability to symbols that show up causes problems
2490 when FSE encoding. Refuse dictionaries that assign zero probability to symbols
2517 when FSE encoding. Refuse dictionaries that assign zero probability to symbols
2491 that we may encounter during compression.
2518 that we may encounter during compression.
2492 NOTE: This behavior is not standard and could be improved in the future. */
2519 NOTE: This behavior is not standard and could be improved in the future. */
2493 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
2520 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
2494 U32 s;
2521 U32 s;
2495 if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
2522 if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
2496 for (s = 0; s <= maxSymbolValue; ++s) {
2523 for (s = 0; s <= maxSymbolValue; ++s) {
2497 if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
2524 if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
2498 }
2525 }
2499 return 0;
2526 return 0;
2500 }
2527 }
2501
2528
2502
2529
2503 /* Dictionary format :
2530 /* Dictionary format :
2504 Magic == ZSTD_DICT_MAGIC (4 bytes)
2531 Magic == ZSTD_DICT_MAGIC (4 bytes)
2505 HUF_writeCTable(256)
2532 HUF_writeCTable(256)
2506 FSE_writeNCount(off)
2533 FSE_writeNCount(off)
2507 FSE_writeNCount(ml)
2534 FSE_writeNCount(ml)
2508 FSE_writeNCount(ll)
2535 FSE_writeNCount(ll)
2509 RepOffsets
2536 RepOffsets
2510 Dictionary content
2537 Dictionary content
2511 */
2538 */
2512 /*! ZSTD_loadDictEntropyStats() :
2539 /*! ZSTD_loadDictEntropyStats() :
2513 @return : size read from dictionary
2540 @return : size read from dictionary
2514 note : magic number supposed already checked */
2541 note : magic number supposed already checked */
2515 static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
2542 static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
2516 {
2543 {
2517 const BYTE* dictPtr = (const BYTE*)dict;
2544 const BYTE* dictPtr = (const BYTE*)dict;
2518 const BYTE* const dictEnd = dictPtr + dictSize;
2545 const BYTE* const dictEnd = dictPtr + dictSize;
2519 short offcodeNCount[MaxOff+1];
2546 short offcodeNCount[MaxOff+1];
2520 unsigned offcodeMaxValue = MaxOff;
2547 unsigned offcodeMaxValue = MaxOff;
2521 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
2548 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
2522
2549
2523 { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
2550 { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
2524 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
2551 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
2525 dictPtr += hufHeaderSize;
2552 dictPtr += hufHeaderSize;
2526 }
2553 }
2527
2554
2528 { unsigned offcodeLog;
2555 { unsigned offcodeLog;
2529 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2556 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2530 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2557 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2531 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2558 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2532 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2559 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2533 CHECK_E (FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2560 CHECK_E (FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2534 dictPtr += offcodeHeaderSize;
2561 dictPtr += offcodeHeaderSize;
2535 }
2562 }
2536
2563
2537 { short matchlengthNCount[MaxML+1];
2564 { short matchlengthNCount[MaxML+1];
2538 unsigned matchlengthMaxValue = MaxML, matchlengthLog;
2565 unsigned matchlengthMaxValue = MaxML, matchlengthLog;
2539 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
2566 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
2540 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2567 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2541 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2568 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2542 /* Every match length code must have non-zero probability */
2569 /* Every match length code must have non-zero probability */
2543 CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2570 CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2544 CHECK_E (FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2571 CHECK_E (FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2545 dictPtr += matchlengthHeaderSize;
2572 dictPtr += matchlengthHeaderSize;
2546 }
2573 }
2547
2574
2548 { short litlengthNCount[MaxLL+1];
2575 { short litlengthNCount[MaxLL+1];
2549 unsigned litlengthMaxValue = MaxLL, litlengthLog;
2576 unsigned litlengthMaxValue = MaxLL, litlengthLog;
2550 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
2577 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
2551 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2578 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2552 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2579 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2553 /* Every literal length code must have non-zero probability */
2580 /* Every literal length code must have non-zero probability */
2554 CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2581 CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2555 CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2582 CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2556 dictPtr += litlengthHeaderSize;
2583 dictPtr += litlengthHeaderSize;
2557 }
2584 }
2558
2585
2559 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2586 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2560 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2587 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2561 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2588 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2562 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2589 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2563 dictPtr += 12;
2590 dictPtr += 12;
2564
2591
2565 { U32 offcodeMax = MaxOff;
2592 { U32 offcodeMax = MaxOff;
2566 if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) {
2593 if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) {
2567 U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
2594 U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
2568 /* Calculate minimum offset code required to represent maxOffset */
2595 /* Calculate minimum offset code required to represent maxOffset */
2569 offcodeMax = ZSTD_highbit32(maxOffset);
2596 offcodeMax = ZSTD_highbit32(maxOffset);
2570 }
2597 }
2571 /* Every possible supported offset <= dictContentSize + 128 KB must be representable */
2598 /* Every possible supported offset <= dictContentSize + 128 KB must be representable */
2572 CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
2599 CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
2573 }
2600 }
2574
2601
2575 cctx->flagStaticTables = 1;
2602 cctx->flagStaticTables = 1;
2576 return dictPtr - (const BYTE*)dict;
2603 return dictPtr - (const BYTE*)dict;
2577 }
2604 }
2578
2605
2579 /** ZSTD_compress_insertDictionary() :
2606 /** ZSTD_compress_insertDictionary() :
2580 * @return : 0, or an error code */
2607 * @return : 0, or an error code */
2581 static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
2608 static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
2582 {
2609 {
2583 if ((dict==NULL) || (dictSize<=8)) return 0;
2610 if ((dict==NULL) || (dictSize<=8)) return 0;
2584
2611
2585 /* default : dict is pure content */
2612 /* default : dict is pure content */
2586 if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2613 if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2587 zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
2614 zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
2588
2615
2589 /* known magic number : dict is parsed for entropy stats and content */
2616 /* known magic number : dict is parsed for entropy stats and content */
2590 { size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8);
2617 { size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8);
2591 size_t const eSize = loadError + 8;
2618 size_t const eSize = loadError + 8;
2592 if (ZSTD_isError(loadError)) return loadError;
2619 if (ZSTD_isError(loadError)) return loadError;
2593 return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
2620 return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
2594 }
2621 }
2595 }
2622 }
2596
2623
2597
2598 /*! ZSTD_compressBegin_internal() :
2624 /*! ZSTD_compressBegin_internal() :
2599 * @return : 0, or an error code */
2625 * @return : 0, or an error code */
2600 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2626 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2601 const void* dict, size_t dictSize,
2627 const void* dict, size_t dictSize,
2602 ZSTD_parameters params, U64 pledgedSrcSize)
2628 ZSTD_parameters params, U64 pledgedSrcSize)
2603 {
2629 {
2604 ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
2630 ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
2605 CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
2631 CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
2606 return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
2632 return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
2607 }
2633 }
2608
2634
2609
2635
2610 /*! ZSTD_compressBegin_advanced() :
2636 /*! ZSTD_compressBegin_advanced() :
2611 * @return : 0, or an error code */
2637 * @return : 0, or an error code */
2612 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
2638 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
2613 const void* dict, size_t dictSize,
2639 const void* dict, size_t dictSize,
2614 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2640 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2615 {
2641 {
2616 /* compression parameters verification and optimization */
2642 /* compression parameters verification and optimization */
2617 CHECK_F(ZSTD_checkCParams(params.cParams));
2643 CHECK_F(ZSTD_checkCParams(params.cParams));
2618 return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
2644 return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
2619 }
2645 }
2620
2646
2621
2647
2622 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2648 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2623 {
2649 {
2624 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
2650 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
2625 return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
2651 return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
2626 }
2652 }
2627
2653
2628
2654
2629 size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
2655 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
2630 {
2656 {
2631 return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
2657 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
2632 }
2658 }
2633
2659
2634
2660
2635 /*! ZSTD_writeEpilogue() :
2661 /*! ZSTD_writeEpilogue() :
2636 * Ends a frame.
2662 * Ends a frame.
2637 * @return : nb of bytes written into dst (or an error code) */
2663 * @return : nb of bytes written into dst (or an error code) */
2638 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
2664 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
2639 {
2665 {
2640 BYTE* const ostart = (BYTE*)dst;
2666 BYTE* const ostart = (BYTE*)dst;
2641 BYTE* op = ostart;
2667 BYTE* op = ostart;
2642 size_t fhSize = 0;
2668 size_t fhSize = 0;
2643
2669
2644 if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
2670 if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
2645
2671
2646 /* special case : empty frame */
2672 /* special case : empty frame */
2647 if (cctx->stage == ZSTDcs_init) {
2673 if (cctx->stage == ZSTDcs_init) {
2648 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
2674 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
2649 if (ZSTD_isError(fhSize)) return fhSize;
2675 if (ZSTD_isError(fhSize)) return fhSize;
2650 dstCapacity -= fhSize;
2676 dstCapacity -= fhSize;
2651 op += fhSize;
2677 op += fhSize;
2652 cctx->stage = ZSTDcs_ongoing;
2678 cctx->stage = ZSTDcs_ongoing;
2653 }
2679 }
2654
2680
2655 if (cctx->stage != ZSTDcs_ending) {
2681 if (cctx->stage != ZSTDcs_ending) {
2656 /* write one last empty block, make it the "last" block */
2682 /* write one last empty block, make it the "last" block */
2657 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
2683 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
2658 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2684 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2659 MEM_writeLE32(op, cBlockHeader24);
2685 MEM_writeLE32(op, cBlockHeader24);
2660 op += ZSTD_blockHeaderSize;
2686 op += ZSTD_blockHeaderSize;
2661 dstCapacity -= ZSTD_blockHeaderSize;
2687 dstCapacity -= ZSTD_blockHeaderSize;
2662 }
2688 }
2663
2689
2664 if (cctx->params.fParams.checksumFlag) {
2690 if (cctx->params.fParams.checksumFlag) {
2665 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
2691 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
2666 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2692 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2667 MEM_writeLE32(op, checksum);
2693 MEM_writeLE32(op, checksum);
2668 op += 4;
2694 op += 4;
2669 }
2695 }
2670
2696
2671 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
2697 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
2672 return op-ostart;
2698 return op-ostart;
2673 }
2699 }
2674
2700
2675
2701
2676 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
2702 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
2677 void* dst, size_t dstCapacity,
2703 void* dst, size_t dstCapacity,
2678 const void* src, size_t srcSize)
2704 const void* src, size_t srcSize)
2679 {
2705 {
2680 size_t endResult;
2706 size_t endResult;
2681 size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
2707 size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
2682 if (ZSTD_isError(cSize)) return cSize;
2708 if (ZSTD_isError(cSize)) return cSize;
2683 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
2709 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
2684 if (ZSTD_isError(endResult)) return endResult;
2710 if (ZSTD_isError(endResult)) return endResult;
2685 return cSize + endResult;
2711 return cSize + endResult;
2686 }
2712 }
2687
2713
2688
2714
2689 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2715 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2690 void* dst, size_t dstCapacity,
2716 void* dst, size_t dstCapacity,
2691 const void* src, size_t srcSize,
2717 const void* src, size_t srcSize,
2692 const void* dict,size_t dictSize,
2718 const void* dict,size_t dictSize,
2693 ZSTD_parameters params)
2719 ZSTD_parameters params)
2694 {
2720 {
2695 CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
2721 CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
2696 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2722 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2697 }
2723 }
2698
2724
2699 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
2725 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
2700 void* dst, size_t dstCapacity,
2726 void* dst, size_t dstCapacity,
2701 const void* src, size_t srcSize,
2727 const void* src, size_t srcSize,
2702 const void* dict,size_t dictSize,
2728 const void* dict,size_t dictSize,
2703 ZSTD_parameters params)
2729 ZSTD_parameters params)
2704 {
2730 {
2705 CHECK_F(ZSTD_checkCParams(params.cParams));
2731 CHECK_F(ZSTD_checkCParams(params.cParams));
2706 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2732 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2707 }
2733 }
2708
2734
2709 size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
2735 size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
2710 {
2736 {
2711 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
2737 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
2712 params.fParams.contentSizeFlag = 1;
2738 params.fParams.contentSizeFlag = 1;
2713 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2739 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2714 }
2740 }
2715
2741
2716 size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2742 size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2717 {
2743 {
2718 return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2744 return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2719 }
2745 }
2720
2746
2721 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2747 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2722 {
2748 {
2723 size_t result;
2749 size_t result;
2724 ZSTD_CCtx ctxBody;
2750 ZSTD_CCtx ctxBody;
2725 memset(&ctxBody, 0, sizeof(ctxBody));
2751 memset(&ctxBody, 0, sizeof(ctxBody));
2726 memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem));
2752 memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem));
2727 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
2753 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
2728 ZSTD_free(ctxBody.workSpace, defaultCustomMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
2754 ZSTD_free(ctxBody.workSpace, defaultCustomMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
2729 return result;
2755 return result;
2730 }
2756 }
2731
2757
2732
2758
2733 /* ===== Dictionary API ===== */
2759 /* ===== Dictionary API ===== */
2734
2760
2735 struct ZSTD_CDict_s {
2761 struct ZSTD_CDict_s {
2736 void* dictContent;
2762 void* dictBuffer;
2763 const void* dictContent;
2737 size_t dictContentSize;
2764 size_t dictContentSize;
2738 ZSTD_CCtx* refContext;
2765 ZSTD_CCtx* refContext;
2739 }; /* typedef'd tp ZSTD_CDict within "zstd.h" */
2766 }; /* typedef'd tp ZSTD_CDict within "zstd.h" */
2740
2767
2741 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
2768 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
2742 {
2769 {
2743 if (cdict==NULL) return 0; /* support sizeof on NULL */
2770 if (cdict==NULL) return 0; /* support sizeof on NULL */
2744 return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
2771 return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
2745 }
2772 }
2746
2773
2747 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
2774 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
2775 ZSTD_parameters params, ZSTD_customMem customMem)
2748 {
2776 {
2749 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2777 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2750 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2778 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2751
2779
2752 { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2780 { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2753 void* const dictContent = ZSTD_malloc(dictSize, customMem);
2754 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2781 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2755
2782
2756 if (!dictContent || !cdict || !cctx) {
2783 if (!cdict || !cctx) {
2757 ZSTD_free(dictContent, customMem);
2758 ZSTD_free(cdict, customMem);
2784 ZSTD_free(cdict, customMem);
2759 ZSTD_free(cctx, customMem);
2785 ZSTD_free(cctx, customMem);
2760 return NULL;
2786 return NULL;
2761 }
2787 }
2762
2788
2763 if (dictSize) {
2789 if ((byReference) || (!dictBuffer) || (!dictSize)) {
2764 memcpy(dictContent, dict, dictSize);
2790 cdict->dictBuffer = NULL;
2791 cdict->dictContent = dictBuffer;
2792 } else {
2793 void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
2794 if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; }
2795 memcpy(internalBuffer, dictBuffer, dictSize);
2796 cdict->dictBuffer = internalBuffer;
2797 cdict->dictContent = internalBuffer;
2765 }
2798 }
2766 { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
2799
2800 { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
2767 if (ZSTD_isError(errorCode)) {
2801 if (ZSTD_isError(errorCode)) {
2768 ZSTD_free(dictContent, customMem);
2802 ZSTD_free(cdict->dictBuffer, customMem);
2803 ZSTD_free(cctx, customMem);
2769 ZSTD_free(cdict, customMem);
2804 ZSTD_free(cdict, customMem);
2770 ZSTD_free(cctx, customMem);
2771 return NULL;
2805 return NULL;
2772 } }
2806 } }
2773
2807
2774 cdict->dictContent = dictContent;
2808 cdict->refContext = cctx;
2775 cdict->dictContentSize = dictSize;
2809 cdict->dictContentSize = dictSize;
2776 cdict->refContext = cctx;
2777 return cdict;
2810 return cdict;
2778 }
2811 }
2779 }
2812 }
2780
2813
2781 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
2814 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
2782 {
2815 {
2783 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2816 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2784 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2817 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2785 params.fParams.contentSizeFlag = 1;
2818 params.fParams.contentSizeFlag = 1;
2786 return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
2819 return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
2820 }
2821
2822 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
2823 {
2824 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2825 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2826 params.fParams.contentSizeFlag = 1;
2827 return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
2787 }
2828 }
2788
2829
2789 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
2830 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
2790 {
2831 {
2791 if (cdict==NULL) return 0; /* support free on NULL */
2832 if (cdict==NULL) return 0; /* support free on NULL */
2792 { ZSTD_customMem const cMem = cdict->refContext->customMem;
2833 { ZSTD_customMem const cMem = cdict->refContext->customMem;
2793 ZSTD_freeCCtx(cdict->refContext);
2834 ZSTD_freeCCtx(cdict->refContext);
2794 ZSTD_free(cdict->dictContent, cMem);
2835 ZSTD_free(cdict->dictBuffer, cMem);
2795 ZSTD_free(cdict, cMem);
2836 ZSTD_free(cdict, cMem);
2796 return 0;
2837 return 0;
2797 }
2838 }
2798 }
2839 }
2799
2840
2800 static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
2841 static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
2801 return ZSTD_getParamsFromCCtx(cdict->refContext);
2842 return ZSTD_getParamsFromCCtx(cdict->refContext);
2802 }
2843 }
2803
2844
2804 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
2845 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
2805 {
2846 {
2806 if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2847 if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2807 else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
2848 else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
2808 return 0;
2849 return 0;
2809 }
2850 }
2810
2851
2811 /*! ZSTD_compress_usingCDict() :
2852 /*! ZSTD_compress_usingCDict() :
2812 * Compression using a digested Dictionary.
2853 * Compression using a digested Dictionary.
2813 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
2854 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
2814 * Note that compression level is decided during dictionary creation */
2855 * Note that compression level is decided during dictionary creation */
2815 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
2856 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
2816 void* dst, size_t dstCapacity,
2857 void* dst, size_t dstCapacity,
2817 const void* src, size_t srcSize,
2858 const void* src, size_t srcSize,
2818 const ZSTD_CDict* cdict)
2859 const ZSTD_CDict* cdict)
2819 {
2860 {
2820 CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
2861 CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
2821
2862
2822 if (cdict->refContext->params.fParams.contentSizeFlag==1) {
2863 if (cdict->refContext->params.fParams.contentSizeFlag==1) {
2823 cctx->params.fParams.contentSizeFlag = 1;
2864 cctx->params.fParams.contentSizeFlag = 1;
2824 cctx->frameContentSize = srcSize;
2865 cctx->frameContentSize = srcSize;
2825 }
2866 }
2826
2867
2827 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2868 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2828 }
2869 }
2829
2870
2830
2871
2831
2872
2832 /* ******************************************************************
2873 /* ******************************************************************
2833 * Streaming
2874 * Streaming
2834 ********************************************************************/
2875 ********************************************************************/
2835
2876
2836 typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
2877 typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
2837
2878
2838 struct ZSTD_CStream_s {
2879 struct ZSTD_CStream_s {
2839 ZSTD_CCtx* cctx;
2880 ZSTD_CCtx* cctx;
2840 ZSTD_CDict* cdictLocal;
2881 ZSTD_CDict* cdictLocal;
2841 const ZSTD_CDict* cdict;
2882 const ZSTD_CDict* cdict;
2842 char* inBuff;
2883 char* inBuff;
2843 size_t inBuffSize;
2884 size_t inBuffSize;
2844 size_t inToCompress;
2885 size_t inToCompress;
2845 size_t inBuffPos;
2886 size_t inBuffPos;
2846 size_t inBuffTarget;
2887 size_t inBuffTarget;
2847 size_t blockSize;
2888 size_t blockSize;
2848 char* outBuff;
2889 char* outBuff;
2849 size_t outBuffSize;
2890 size_t outBuffSize;
2850 size_t outBuffContentSize;
2891 size_t outBuffContentSize;
2851 size_t outBuffFlushedSize;
2892 size_t outBuffFlushedSize;
2852 ZSTD_cStreamStage stage;
2893 ZSTD_cStreamStage stage;
2853 U32 checksum;
2894 U32 checksum;
2854 U32 frameEnded;
2895 U32 frameEnded;
2855 U64 pledgedSrcSize;
2896 U64 pledgedSrcSize;
2856 U64 inputProcessed;
2897 U64 inputProcessed;
2857 ZSTD_parameters params;
2898 ZSTD_parameters params;
2858 ZSTD_customMem customMem;
2899 ZSTD_customMem customMem;
2859 }; /* typedef'd to ZSTD_CStream within "zstd.h" */
2900 }; /* typedef'd to ZSTD_CStream within "zstd.h" */
2860
2901
2861 ZSTD_CStream* ZSTD_createCStream(void)
2902 ZSTD_CStream* ZSTD_createCStream(void)
2862 {
2903 {
2863 return ZSTD_createCStream_advanced(defaultCustomMem);
2904 return ZSTD_createCStream_advanced(defaultCustomMem);
2864 }
2905 }
2865
2906
2866 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
2907 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
2867 {
2908 {
2868 ZSTD_CStream* zcs;
2909 ZSTD_CStream* zcs;
2869
2910
2870 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2911 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2871 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2912 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2872
2913
2873 zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
2914 zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
2874 if (zcs==NULL) return NULL;
2915 if (zcs==NULL) return NULL;
2875 memset(zcs, 0, sizeof(ZSTD_CStream));
2916 memset(zcs, 0, sizeof(ZSTD_CStream));
2876 memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
2917 memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
2877 zcs->cctx = ZSTD_createCCtx_advanced(customMem);
2918 zcs->cctx = ZSTD_createCCtx_advanced(customMem);
2878 if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; }
2919 if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; }
2879 return zcs;
2920 return zcs;
2880 }
2921 }
2881
2922
2882 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
2923 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
2883 {
2924 {
2884 if (zcs==NULL) return 0; /* support free on NULL */
2925 if (zcs==NULL) return 0; /* support free on NULL */
2885 { ZSTD_customMem const cMem = zcs->customMem;
2926 { ZSTD_customMem const cMem = zcs->customMem;
2886 ZSTD_freeCCtx(zcs->cctx);
2927 ZSTD_freeCCtx(zcs->cctx);
2887 ZSTD_freeCDict(zcs->cdictLocal);
2928 ZSTD_freeCDict(zcs->cdictLocal);
2888 ZSTD_free(zcs->inBuff, cMem);
2929 ZSTD_free(zcs->inBuff, cMem);
2889 ZSTD_free(zcs->outBuff, cMem);
2930 ZSTD_free(zcs->outBuff, cMem);
2890 ZSTD_free(zcs, cMem);
2931 ZSTD_free(zcs, cMem);
2891 return 0;
2932 return 0;
2892 }
2933 }
2893 }
2934 }
2894
2935
2895
2936
2896 /*====== Initialization ======*/
2937 /*====== Initialization ======*/
2897
2938
2898 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
2939 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
2899 size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
2940 size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
2900
2941
2901 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2942 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2902 {
2943 {
2903 if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */
2944 if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
2904
2945
2905 if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
2946 if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
2906 else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
2947 else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
2907
2948
2908 zcs->inToCompress = 0;
2949 zcs->inToCompress = 0;
2909 zcs->inBuffPos = 0;
2950 zcs->inBuffPos = 0;
2910 zcs->inBuffTarget = zcs->blockSize;
2951 zcs->inBuffTarget = zcs->blockSize;
2911 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
2952 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
2912 zcs->stage = zcss_load;
2953 zcs->stage = zcss_load;
2913 zcs->frameEnded = 0;
2954 zcs->frameEnded = 0;
2914 zcs->pledgedSrcSize = pledgedSrcSize;
2955 zcs->pledgedSrcSize = pledgedSrcSize;
2915 zcs->inputProcessed = 0;
2956 zcs->inputProcessed = 0;
2916 return 0; /* ready to go */
2957 return 0; /* ready to go */
2917 }
2958 }
2918
2959
2919 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2960 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2920 const void* dict, size_t dictSize,
2961 const void* dict, size_t dictSize,
2921 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2962 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2922 {
2963 {
2923 /* allocate buffers */
2964 /* allocate buffers */
2924 { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
2965 { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
2925 if (zcs->inBuffSize < neededInBuffSize) {
2966 if (zcs->inBuffSize < neededInBuffSize) {
2926 zcs->inBuffSize = neededInBuffSize;
2967 zcs->inBuffSize = neededInBuffSize;
2927 ZSTD_free(zcs->inBuff, zcs->customMem);
2968 ZSTD_free(zcs->inBuff, zcs->customMem);
2928 zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
2969 zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
2929 if (zcs->inBuff == NULL) return ERROR(memory_allocation);
2970 if (zcs->inBuff == NULL) return ERROR(memory_allocation);
2930 }
2971 }
2931 zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
2972 zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
2932 }
2973 }
2933 if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
2974 if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
2934 zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
2975 zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
2935 ZSTD_free(zcs->outBuff, zcs->customMem);
2976 ZSTD_free(zcs->outBuff, zcs->customMem);
2936 zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
2977 zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
2937 if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2978 if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2938 }
2979 }
2939
2980
2940 if (dict) {
2981 if (dict && dictSize >= 8) {
2941 ZSTD_freeCDict(zcs->cdictLocal);
2982 ZSTD_freeCDict(zcs->cdictLocal);
2942 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
2983 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
2943 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
2984 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
2944 zcs->cdict = zcs->cdictLocal;
2985 zcs->cdict = zcs->cdictLocal;
2945 } else zcs->cdict = NULL;
2986 } else zcs->cdict = NULL;
2946
2987
2947 zcs->checksum = params.fParams.checksumFlag > 0;
2988 zcs->checksum = params.fParams.checksumFlag > 0;
2948 zcs->params = params;
2989 zcs->params = params;
2949
2990
2950 return ZSTD_resetCStream(zcs, pledgedSrcSize);
2991 return ZSTD_resetCStream(zcs, pledgedSrcSize);
2951 }
2992 }
2952
2993
2953 /* note : cdict must outlive compression session */
2994 /* note : cdict must outlive compression session */
2954 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
2995 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
2955 {
2996 {
2956 ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
2997 ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
2957 size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
2998 size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
2958 zcs->cdict = cdict;
2999 zcs->cdict = cdict;
3000 zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
2959 return initError;
3001 return initError;
2960 }
3002 }
2961
3003
2962 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3004 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
2963 {
3005 {
2964 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
3006 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
2965 return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
3007 return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
2966 }
3008 }
2967
3009
2968 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
3010 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
2969 {
3011 {
2970 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3012 ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3013 if (pledgedSrcSize) params.fParams.contentSizeFlag = 1;
2971 return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
3014 return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
2972 }
3015 }
2973
3016
2974 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3017 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
2975 {
3018 {
2976 return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
3019 return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
2977 }
3020 }
2978
3021
2979 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
3022 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
2980 {
3023 {
2981 if (zcs==NULL) return 0; /* support sizeof on NULL */
3024 if (zcs==NULL) return 0; /* support sizeof on NULL */
2982 return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
3025 return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
2983 }
3026 }
2984
3027
2985 /*====== Compression ======*/
3028 /*====== Compression ======*/
2986
3029
2987 typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
3030 typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
2988
3031
2989 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
3032 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2990 {
3033 {
2991 size_t const length = MIN(dstCapacity, srcSize);
3034 size_t const length = MIN(dstCapacity, srcSize);
2992 memcpy(dst, src, length);
3035 memcpy(dst, src, length);
2993 return length;
3036 return length;
2994 }
3037 }
2995
3038
2996 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
3039 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
2997 void* dst, size_t* dstCapacityPtr,
3040 void* dst, size_t* dstCapacityPtr,
2998 const void* src, size_t* srcSizePtr,
3041 const void* src, size_t* srcSizePtr,
2999 ZSTD_flush_e const flush)
3042 ZSTD_flush_e const flush)
3000 {
3043 {
3001 U32 someMoreWork = 1;
3044 U32 someMoreWork = 1;
3002 const char* const istart = (const char*)src;
3045 const char* const istart = (const char*)src;
3003 const char* const iend = istart + *srcSizePtr;
3046 const char* const iend = istart + *srcSizePtr;
3004 const char* ip = istart;
3047 const char* ip = istart;
3005 char* const ostart = (char*)dst;
3048 char* const ostart = (char*)dst;
3006 char* const oend = ostart + *dstCapacityPtr;
3049 char* const oend = ostart + *dstCapacityPtr;
3007 char* op = ostart;
3050 char* op = ostart;
3008
3051
3009 while (someMoreWork) {
3052 while (someMoreWork) {
3010 switch(zcs->stage)
3053 switch(zcs->stage)
3011 {
3054 {
3012 case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
3055 case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
3013
3056
3014 case zcss_load:
3057 case zcss_load:
3015 /* complete inBuffer */
3058 /* complete inBuffer */
3016 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
3059 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
3017 size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip);
3060 size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip);
3018 zcs->inBuffPos += loaded;
3061 zcs->inBuffPos += loaded;
3019 ip += loaded;
3062 ip += loaded;
3020 if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) {
3063 if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) {
3021 someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */
3064 someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */
3022 } }
3065 } }
3023 /* compress current block (note : this stage cannot be stopped in the middle) */
3066 /* compress current block (note : this stage cannot be stopped in the middle) */
3024 { void* cDst;
3067 { void* cDst;
3025 size_t cSize;
3068 size_t cSize;
3026 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
3069 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
3027 size_t oSize = oend-op;
3070 size_t oSize = oend-op;
3028 if (oSize >= ZSTD_compressBound(iSize))
3071 if (oSize >= ZSTD_compressBound(iSize))
3029 cDst = op; /* compress directly into output buffer (avoid flush stage) */
3072 cDst = op; /* compress directly into output buffer (avoid flush stage) */
3030 else
3073 else
3031 cDst = zcs->outBuff, oSize = zcs->outBuffSize;
3074 cDst = zcs->outBuff, oSize = zcs->outBuffSize;
3032 cSize = (flush == zsf_end) ?
3075 cSize = (flush == zsf_end) ?
3033 ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
3076 ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
3034 ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
3077 ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
3035 if (ZSTD_isError(cSize)) return cSize;
3078 if (ZSTD_isError(cSize)) return cSize;
3036 if (flush == zsf_end) zcs->frameEnded = 1;
3079 if (flush == zsf_end) zcs->frameEnded = 1;
3037 /* prepare next block */
3080 /* prepare next block */
3038 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
3081 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
3039 if (zcs->inBuffTarget > zcs->inBuffSize)
3082 if (zcs->inBuffTarget > zcs->inBuffSize)
3040 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
3083 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
3041 zcs->inToCompress = zcs->inBuffPos;
3084 zcs->inToCompress = zcs->inBuffPos;
3042 if (cDst == op) { op += cSize; break; } /* no need to flush */
3085 if (cDst == op) { op += cSize; break; } /* no need to flush */
3043 zcs->outBuffContentSize = cSize;
3086 zcs->outBuffContentSize = cSize;
3044 zcs->outBuffFlushedSize = 0;
3087 zcs->outBuffFlushedSize = 0;
3045 zcs->stage = zcss_flush; /* pass-through to flush stage */
3088 zcs->stage = zcss_flush; /* pass-through to flush stage */
3046 }
3089 }
3047
3090
3048 case zcss_flush:
3091 case zcss_flush:
3049 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3092 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3050 size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3093 size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3051 op += flushed;
3094 op += flushed;
3052 zcs->outBuffFlushedSize += flushed;
3095 zcs->outBuffFlushedSize += flushed;
3053 if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
3096 if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
3054 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
3097 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
3055 zcs->stage = zcss_load;
3098 zcs->stage = zcss_load;
3056 break;
3099 break;
3057 }
3100 }
3058
3101
3059 case zcss_final:
3102 case zcss_final:
3060 someMoreWork = 0; /* do nothing */
3103 someMoreWork = 0; /* do nothing */
3061 break;
3104 break;
3062
3105
3063 default:
3106 default:
3064 return ERROR(GENERIC); /* impossible */
3107 return ERROR(GENERIC); /* impossible */
3065 }
3108 }
3066 }
3109 }
3067
3110
3068 *srcSizePtr = ip - istart;
3111 *srcSizePtr = ip - istart;
3069 *dstCapacityPtr = op - ostart;
3112 *dstCapacityPtr = op - ostart;
3070 zcs->inputProcessed += *srcSizePtr;
3113 zcs->inputProcessed += *srcSizePtr;
3071 if (zcs->frameEnded) return 0;
3114 if (zcs->frameEnded) return 0;
3072 { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
3115 { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
3073 if (hintInSize==0) hintInSize = zcs->blockSize;
3116 if (hintInSize==0) hintInSize = zcs->blockSize;
3074 return hintInSize;
3117 return hintInSize;
3075 }
3118 }
3076 }
3119 }
3077
3120
3078 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
3121 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
3079 {
3122 {
3080 size_t sizeRead = input->size - input->pos;
3123 size_t sizeRead = input->size - input->pos;
3081 size_t sizeWritten = output->size - output->pos;
3124 size_t sizeWritten = output->size - output->pos;
3082 size_t const result = ZSTD_compressStream_generic(zcs,
3125 size_t const result = ZSTD_compressStream_generic(zcs,
3083 (char*)(output->dst) + output->pos, &sizeWritten,
3126 (char*)(output->dst) + output->pos, &sizeWritten,
3084 (const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
3127 (const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
3085 input->pos += sizeRead;
3128 input->pos += sizeRead;
3086 output->pos += sizeWritten;
3129 output->pos += sizeWritten;
3087 return result;
3130 return result;
3088 }
3131 }
3089
3132
3090
3133
3091 /*====== Finalize ======*/
3134 /*====== Finalize ======*/
3092
3135
3093 /*! ZSTD_flushStream() :
3136 /*! ZSTD_flushStream() :
3094 * @return : amount of data remaining to flush */
3137 * @return : amount of data remaining to flush */
3095 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3138 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3096 {
3139 {
3097 size_t srcSize = 0;
3140 size_t srcSize = 0;
3098 size_t sizeWritten = output->size - output->pos;
3141 size_t sizeWritten = output->size - output->pos;
3099 size_t const result = ZSTD_compressStream_generic(zcs,
3142 size_t const result = ZSTD_compressStream_generic(zcs,
3100 (char*)(output->dst) + output->pos, &sizeWritten,
3143 (char*)(output->dst) + output->pos, &sizeWritten,
3101 &srcSize, &srcSize, /* use a valid src address instead of NULL */
3144 &srcSize, &srcSize, /* use a valid src address instead of NULL */
3102 zsf_flush);
3145 zsf_flush);
3103 output->pos += sizeWritten;
3146 output->pos += sizeWritten;
3104 if (ZSTD_isError(result)) return result;
3147 if (ZSTD_isError(result)) return result;
3105 return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
3148 return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
3106 }
3149 }
3107
3150
3108
3151
3109 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3152 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3110 {
3153 {
3111 BYTE* const ostart = (BYTE*)(output->dst) + output->pos;
3154 BYTE* const ostart = (BYTE*)(output->dst) + output->pos;
3112 BYTE* const oend = (BYTE*)(output->dst) + output->size;
3155 BYTE* const oend = (BYTE*)(output->dst) + output->size;
3113 BYTE* op = ostart;
3156 BYTE* op = ostart;
3114
3157
3115 if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
3158 if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
3116 return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
3159 return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
3117
3160
3118 if (zcs->stage != zcss_final) {
3161 if (zcs->stage != zcss_final) {
3119 /* flush whatever remains */
3162 /* flush whatever remains */
3120 size_t srcSize = 0;
3163 size_t srcSize = 0;
3121 size_t sizeWritten = output->size - output->pos;
3164 size_t sizeWritten = output->size - output->pos;
3122 size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
3165 size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
3123 size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3166 size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3124 op += sizeWritten;
3167 op += sizeWritten;
3125 if (remainingToFlush) {
3168 if (remainingToFlush) {
3126 output->pos += sizeWritten;
3169 output->pos += sizeWritten;
3127 return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
3170 return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
3128 }
3171 }
3129 /* create epilogue */
3172 /* create epilogue */
3130 zcs->stage = zcss_final;
3173 zcs->stage = zcss_final;
3131 zcs->outBuffContentSize = !notEnded ? 0 :
3174 zcs->outBuffContentSize = !notEnded ? 0 :
3132 ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0); /* write epilogue, including final empty block, into outBuff */
3175 ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0); /* write epilogue, including final empty block, into outBuff */
3133 }
3176 }
3134
3177
3135 /* flush epilogue */
3178 /* flush epilogue */
3136 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3179 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3137 size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3180 size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3138 op += flushed;
3181 op += flushed;
3139 zcs->outBuffFlushedSize += flushed;
3182 zcs->outBuffFlushedSize += flushed;
3140 output->pos += op-ostart;
3183 output->pos += op-ostart;
3141 if (toFlush==flushed) zcs->stage = zcss_init; /* end reached */
3184 if (toFlush==flushed) zcs->stage = zcss_init; /* end reached */
3142 return toFlush - flushed;
3185 return toFlush - flushed;
3143 }
3186 }
3144 }
3187 }
3145
3188
3146
3189
3147
3190
3148 /*-===== Pre-defined compression levels =====-*/
3191 /*-===== Pre-defined compression levels =====-*/
3149
3192
3150 #define ZSTD_DEFAULT_CLEVEL 1
3193 #define ZSTD_DEFAULT_CLEVEL 1
3151 #define ZSTD_MAX_CLEVEL 22
3194 #define ZSTD_MAX_CLEVEL 22
3152 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
3195 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
3153
3196
3154 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3197 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3155 { /* "default" */
3198 { /* "default" */
3156 /* W, C, H, S, L, TL, strat */
3199 /* W, C, H, S, L, TL, strat */
3157 { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
3200 { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
3158 { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
3201 { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
3159 { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
3202 { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
3160 { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/
3203 { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/
3161 { 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/
3204 { 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/
3162 { 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */
3205 { 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */
3163 { 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
3206 { 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
3164 { 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */
3207 { 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */
3165 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3208 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3166 { 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
3209 { 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
3167 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3210 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3168 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3211 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3169 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3212 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3170 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
3213 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
3171 { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
3214 { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
3172 { 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
3215 { 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
3173 { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
3216 { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
3174 { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
3217 { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
3175 { 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */
3218 { 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */
3176 { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
3219 { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
3177 { 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */
3220 { 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */
3178 { 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */
3221 { 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */
3179 { 27, 27, 25, 9, 3,512, ZSTD_btopt2 }, /* level 22 */
3222 { 27, 27, 25, 9, 3,512, ZSTD_btopt2 }, /* level 22 */
3180 },
3223 },
3181 { /* for srcSize <= 256 KB */
3224 { /* for srcSize <= 256 KB */
3182 /* W, C, H, S, L, T, strat */
3225 /* W, C, H, S, L, T, strat */
3183 { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
3226 { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
3184 { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
3227 { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
3185 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
3228 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
3186 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3229 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3187 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
3230 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
3188 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
3231 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
3189 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
3232 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
3190 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
3233 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
3191 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3234 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3192 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3235 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3193 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3236 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3194 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
3237 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
3195 { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
3238 { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
3196 { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
3239 { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
3197 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
3240 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
3198 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
3241 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
3199 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
3242 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
3200 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
3243 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
3201 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
3244 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
3202 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
3245 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
3203 { 18, 19, 18, 11, 3,512, ZSTD_btopt2 }, /* level 20.*/
3246 { 18, 19, 18, 11, 3,512, ZSTD_btopt2 }, /* level 20.*/
3204 { 18, 19, 18, 12, 3,512, ZSTD_btopt2 }, /* level 21.*/
3247 { 18, 19, 18, 12, 3,512, ZSTD_btopt2 }, /* level 21.*/
3205 { 18, 19, 18, 13, 3,512, ZSTD_btopt2 }, /* level 22.*/
3248 { 18, 19, 18, 13, 3,512, ZSTD_btopt2 }, /* level 22.*/
3206 },
3249 },
3207 { /* for srcSize <= 128 KB */
3250 { /* for srcSize <= 128 KB */
3208 /* W, C, H, S, L, T, strat */
3251 /* W, C, H, S, L, T, strat */
3209 { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
3252 { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
3210 { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
3253 { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
3211 { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
3254 { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
3212 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
3255 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
3213 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
3256 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
3214 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
3257 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
3215 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
3258 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
3216 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
3259 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
3217 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3260 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3218 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3261 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3219 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3262 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3220 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
3263 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
3221 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
3264 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
3222 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
3265 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
3223 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
3266 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
3224 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
3267 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
3225 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
3268 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
3226 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
3269 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
3227 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
3270 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
3228 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
3271 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
3229 { 17, 18, 17, 9, 3,256, ZSTD_btopt2 }, /* level 20.*/
3272 { 17, 18, 17, 9, 3,256, ZSTD_btopt2 }, /* level 20.*/
3230 { 17, 18, 17, 10, 3,256, ZSTD_btopt2 }, /* level 21.*/
3273 { 17, 18, 17, 10, 3,256, ZSTD_btopt2 }, /* level 21.*/
3231 { 17, 18, 17, 11, 3,512, ZSTD_btopt2 }, /* level 22.*/
3274 { 17, 18, 17, 11, 3,512, ZSTD_btopt2 }, /* level 22.*/
3232 },
3275 },
3233 { /* for srcSize <= 16 KB */
3276 { /* for srcSize <= 16 KB */
3234 /* W, C, H, S, L, T, strat */
3277 /* W, C, H, S, L, T, strat */
3235 { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
3278 { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
3236 { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
3279 { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
3237 { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
3280 { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
3238 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
3281 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
3239 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
3282 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
3240 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
3283 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
3241 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
3284 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
3242 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
3285 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
3243 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
3286 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
3244 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
3287 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
3245 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
3288 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
3246 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
3289 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
3247 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
3290 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
3248 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
3291 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
3249 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
3292 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
3250 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3293 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3251 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
3294 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
3252 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
3295 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
3253 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
3296 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
3254 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
3297 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
3255 { 14, 15, 15, 8, 3,256, ZSTD_btopt2 }, /* level 20.*/
3298 { 14, 15, 15, 8, 3,256, ZSTD_btopt2 }, /* level 20.*/
3256 { 14, 15, 15, 9, 3,256, ZSTD_btopt2 }, /* level 21.*/
3299 { 14, 15, 15, 9, 3,256, ZSTD_btopt2 }, /* level 21.*/
3257 { 14, 15, 15, 10, 3,256, ZSTD_btopt2 }, /* level 22.*/
3300 { 14, 15, 15, 10, 3,256, ZSTD_btopt2 }, /* level 22.*/
3258 },
3301 },
3259 };
3302 };
3260
3303
3261 /*! ZSTD_getCParams() :
3304 /*! ZSTD_getCParams() :
3262 * @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
3305 * @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
3263 * Size values are optional, provide 0 if not known or unused */
3306 * Size values are optional, provide 0 if not known or unused */
3264 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
3307 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
3265 {
3308 {
3266 ZSTD_compressionParameters cp;
3309 ZSTD_compressionParameters cp;
3267 size_t const addedSize = srcSize ? 0 : 500;
3310 size_t const addedSize = srcSize ? 0 : 500;
3268 U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
3311 U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
3269 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
3312 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
3270 if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
3313 if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
3271 if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
3314 if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
3272 cp = ZSTD_defaultCParameters[tableID][compressionLevel];
3315 cp = ZSTD_defaultCParameters[tableID][compressionLevel];
3273 if (MEM_32bits()) { /* auto-correction, for 32-bits mode */
3316 if (MEM_32bits()) { /* auto-correction, for 32-bits mode */
3274 if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
3317 if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
3275 if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
3318 if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
3276 if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
3319 if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
3277 }
3320 }
3278 cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
3321 cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
3279 return cp;
3322 return cp;
3280 }
3323 }
3281
3324
3282 /*! ZSTD_getParams() :
3325 /*! ZSTD_getParams() :
3283 * same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
3326 * same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
3284 * All fields of `ZSTD_frameParameters` are set to default (0) */
3327 * All fields of `ZSTD_frameParameters` are set to default (0) */
3285 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
3328 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
3286 ZSTD_parameters params;
3329 ZSTD_parameters params;
3287 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
3330 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
3288 memset(&params, 0, sizeof(params));
3331 memset(&params, 0, sizeof(params));
3289 params.cParams = cParams;
3332 params.cParams = cParams;
3290 return params;
3333 return params;
3291 }
3334 }
@@ -1,919 +1,919 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
8 */
9
9
10
10
11 /* Note : this file is intended to be included within zstd_compress.c */
11 /* Note : this file is intended to be included within zstd_compress.c */
12
12
13
13
14 #ifndef ZSTD_OPT_H_91842398743
14 #ifndef ZSTD_OPT_H_91842398743
15 #define ZSTD_OPT_H_91842398743
15 #define ZSTD_OPT_H_91842398743
16
16
17
17
18 #define ZSTD_LITFREQ_ADD 2
18 #define ZSTD_LITFREQ_ADD 2
19 #define ZSTD_FREQ_DIV 4
19 #define ZSTD_FREQ_DIV 4
20 #define ZSTD_MAX_PRICE (1<<30)
20 #define ZSTD_MAX_PRICE (1<<30)
21
21
22 /*-*************************************
22 /*-*************************************
23 * Price functions for optimal parser
23 * Price functions for optimal parser
24 ***************************************/
24 ***************************************/
25 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
25 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
26 {
26 {
27 ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1);
27 ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1);
28 ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1);
28 ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1);
29 ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1);
29 ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1);
30 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1);
30 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1);
31 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
31 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
32 }
32 }
33
33
34
34
35 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
35 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
36 {
36 {
37 unsigned u;
37 unsigned u;
38
38
39 ssPtr->cachedLiterals = NULL;
39 ssPtr->cachedLiterals = NULL;
40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
41 ssPtr->staticPrices = 0;
41 ssPtr->staticPrices = 0;
42
42
43 if (ssPtr->litLengthSum == 0) {
43 if (ssPtr->litLengthSum == 0) {
44 if (srcSize <= 1024) ssPtr->staticPrices = 1;
44 if (srcSize <= 1024) ssPtr->staticPrices = 1;
45
45
46 for (u=0; u<=MaxLit; u++)
46 for (u=0; u<=MaxLit; u++)
47 ssPtr->litFreq[u] = 0;
47 ssPtr->litFreq[u] = 0;
48 for (u=0; u<srcSize; u++)
48 for (u=0; u<srcSize; u++)
49 ssPtr->litFreq[src[u]]++;
49 ssPtr->litFreq[src[u]]++;
50
50
51 ssPtr->litSum = 0;
51 ssPtr->litSum = 0;
52 ssPtr->litLengthSum = MaxLL+1;
52 ssPtr->litLengthSum = MaxLL+1;
53 ssPtr->matchLengthSum = MaxML+1;
53 ssPtr->matchLengthSum = MaxML+1;
54 ssPtr->offCodeSum = (MaxOff+1);
54 ssPtr->offCodeSum = (MaxOff+1);
55 ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
55 ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
56
56
57 for (u=0; u<=MaxLit; u++) {
57 for (u=0; u<=MaxLit; u++) {
58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
59 ssPtr->litSum += ssPtr->litFreq[u];
59 ssPtr->litSum += ssPtr->litFreq[u];
60 }
60 }
61 for (u=0; u<=MaxLL; u++)
61 for (u=0; u<=MaxLL; u++)
62 ssPtr->litLengthFreq[u] = 1;
62 ssPtr->litLengthFreq[u] = 1;
63 for (u=0; u<=MaxML; u++)
63 for (u=0; u<=MaxML; u++)
64 ssPtr->matchLengthFreq[u] = 1;
64 ssPtr->matchLengthFreq[u] = 1;
65 for (u=0; u<=MaxOff; u++)
65 for (u=0; u<=MaxOff; u++)
66 ssPtr->offCodeFreq[u] = 1;
66 ssPtr->offCodeFreq[u] = 1;
67 } else {
67 } else {
68 ssPtr->matchLengthSum = 0;
68 ssPtr->matchLengthSum = 0;
69 ssPtr->litLengthSum = 0;
69 ssPtr->litLengthSum = 0;
70 ssPtr->offCodeSum = 0;
70 ssPtr->offCodeSum = 0;
71 ssPtr->matchSum = 0;
71 ssPtr->matchSum = 0;
72 ssPtr->litSum = 0;
72 ssPtr->litSum = 0;
73
73
74 for (u=0; u<=MaxLit; u++) {
74 for (u=0; u<=MaxLit; u++) {
75 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
75 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
76 ssPtr->litSum += ssPtr->litFreq[u];
76 ssPtr->litSum += ssPtr->litFreq[u];
77 }
77 }
78 for (u=0; u<=MaxLL; u++) {
78 for (u=0; u<=MaxLL; u++) {
79 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
79 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
80 ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
80 ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
81 }
81 }
82 for (u=0; u<=MaxML; u++) {
82 for (u=0; u<=MaxML; u++) {
83 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
83 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
84 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
84 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
85 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
85 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
86 }
86 }
87 ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
87 ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
88 for (u=0; u<=MaxOff; u++) {
88 for (u=0; u<=MaxOff; u++) {
89 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
89 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
90 ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
90 ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
91 }
91 }
92 }
92 }
93
93
94 ZSTD_setLog2Prices(ssPtr);
94 ZSTD_setLog2Prices(ssPtr);
95 }
95 }
96
96
97
97
98 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals)
98 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals)
99 {
99 {
100 U32 price, u;
100 U32 price, u;
101
101
102 if (ssPtr->staticPrices)
102 if (ssPtr->staticPrices)
103 return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
103 return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
104
104
105 if (litLength == 0)
105 if (litLength == 0)
106 return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
106 return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
107
107
108 /* literals */
108 /* literals */
109 if (ssPtr->cachedLiterals == literals) {
109 if (ssPtr->cachedLiterals == literals) {
110 U32 const additional = litLength - ssPtr->cachedLitLength;
110 U32 const additional = litLength - ssPtr->cachedLitLength;
111 const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
111 const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
112 price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
112 price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
113 for (u=0; u < additional; u++)
113 for (u=0; u < additional; u++)
114 price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1);
114 price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1);
115 ssPtr->cachedPrice = price;
115 ssPtr->cachedPrice = price;
116 ssPtr->cachedLitLength = litLength;
116 ssPtr->cachedLitLength = litLength;
117 } else {
117 } else {
118 price = litLength * ssPtr->log2litSum;
118 price = litLength * ssPtr->log2litSum;
119 for (u=0; u < litLength; u++)
119 for (u=0; u < litLength; u++)
120 price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1);
120 price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1);
121
121
122 if (litLength >= 12) {
122 if (litLength >= 12) {
123 ssPtr->cachedLiterals = literals;
123 ssPtr->cachedLiterals = literals;
124 ssPtr->cachedPrice = price;
124 ssPtr->cachedPrice = price;
125 ssPtr->cachedLitLength = litLength;
125 ssPtr->cachedLitLength = litLength;
126 }
126 }
127 }
127 }
128
128
129 /* literal Length */
129 /* literal Length */
130 { const BYTE LL_deltaCode = 19;
130 { const BYTE LL_deltaCode = 19;
131 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
131 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
132 price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1);
132 price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1);
133 }
133 }
134
134
135 return price;
135 return price;
136 }
136 }
137
137
138
138
139 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
139 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
140 {
140 {
141 /* offset */
141 /* offset */
142 U32 price;
142 U32 price;
143 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
143 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
144
144
145 if (seqStorePtr->staticPrices)
145 if (seqStorePtr->staticPrices)
146 return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
146 return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
147
147
148 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
148 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
149 if (!ultra && offCode >= 20) price += (offCode-19)*2;
149 if (!ultra && offCode >= 20) price += (offCode-19)*2;
150
150
151 /* match Length */
151 /* match Length */
152 { const BYTE ML_deltaCode = 36;
152 { const BYTE ML_deltaCode = 36;
153 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
153 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
154 price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1);
154 price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1);
155 }
155 }
156
156
157 return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
157 return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
158 }
158 }
159
159
160
160
161 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
161 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
162 {
162 {
163 U32 u;
163 U32 u;
164
164
165 /* literals */
165 /* literals */
166 seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
166 seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
167 for (u=0; u < litLength; u++)
167 for (u=0; u < litLength; u++)
168 seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
168 seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
169
169
170 /* literal Length */
170 /* literal Length */
171 { const BYTE LL_deltaCode = 19;
171 { const BYTE LL_deltaCode = 19;
172 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
172 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
173 seqStorePtr->litLengthFreq[llCode]++;
173 seqStorePtr->litLengthFreq[llCode]++;
174 seqStorePtr->litLengthSum++;
174 seqStorePtr->litLengthSum++;
175 }
175 }
176
176
177 /* match offset */
177 /* match offset */
178 { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
178 { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
179 seqStorePtr->offCodeSum++;
179 seqStorePtr->offCodeSum++;
180 seqStorePtr->offCodeFreq[offCode]++;
180 seqStorePtr->offCodeFreq[offCode]++;
181 }
181 }
182
182
183 /* match Length */
183 /* match Length */
184 { const BYTE ML_deltaCode = 36;
184 { const BYTE ML_deltaCode = 36;
185 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
185 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
186 seqStorePtr->matchLengthFreq[mlCode]++;
186 seqStorePtr->matchLengthFreq[mlCode]++;
187 seqStorePtr->matchLengthSum++;
187 seqStorePtr->matchLengthSum++;
188 }
188 }
189
189
190 ZSTD_setLog2Prices(seqStorePtr);
190 ZSTD_setLog2Prices(seqStorePtr);
191 }
191 }
192
192
193
193
194 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \
194 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \
195 { \
195 { \
196 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
196 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
197 opt[pos].mlen = mlen_; \
197 opt[pos].mlen = mlen_; \
198 opt[pos].off = offset_; \
198 opt[pos].off = offset_; \
199 opt[pos].litlen = litlen_; \
199 opt[pos].litlen = litlen_; \
200 opt[pos].price = price_; \
200 opt[pos].price = price_; \
201 }
201 }
202
202
203
203
204
204
205 /* Update hashTable3 up to ip (excluded)
205 /* Update hashTable3 up to ip (excluded)
206 Assumption : always within prefix (ie. not within extDict) */
206 Assumption : always within prefix (ie. not within extDict) */
207 FORCE_INLINE
207 FORCE_INLINE
208 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
208 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
209 {
209 {
210 U32* const hashTable3 = zc->hashTable3;
210 U32* const hashTable3 = zc->hashTable3;
211 U32 const hashLog3 = zc->hashLog3;
211 U32 const hashLog3 = zc->hashLog3;
212 const BYTE* const base = zc->base;
212 const BYTE* const base = zc->base;
213 U32 idx = zc->nextToUpdate3;
213 U32 idx = zc->nextToUpdate3;
214 const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
214 const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
215 const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
215 const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
216
216
217 while(idx < target) {
217 while(idx < target) {
218 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
218 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
219 idx++;
219 idx++;
220 }
220 }
221
221
222 return hashTable3[hash3];
222 return hashTable3[hash3];
223 }
223 }
224
224
225
225
226 /*-*************************************
226 /*-*************************************
227 * Binary Tree search
227 * Binary Tree search
228 ***************************************/
228 ***************************************/
229 static U32 ZSTD_insertBtAndGetAllMatches (
229 static U32 ZSTD_insertBtAndGetAllMatches (
230 ZSTD_CCtx* zc,
230 ZSTD_CCtx* zc,
231 const BYTE* const ip, const BYTE* const iLimit,
231 const BYTE* const ip, const BYTE* const iLimit,
232 U32 nbCompares, const U32 mls,
232 U32 nbCompares, const U32 mls,
233 U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
233 U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
234 {
234 {
235 const BYTE* const base = zc->base;
235 const BYTE* const base = zc->base;
236 const U32 current = (U32)(ip-base);
236 const U32 current = (U32)(ip-base);
237 const U32 hashLog = zc->params.cParams.hashLog;
237 const U32 hashLog = zc->params.cParams.hashLog;
238 const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
238 const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
239 U32* const hashTable = zc->hashTable;
239 U32* const hashTable = zc->hashTable;
240 U32 matchIndex = hashTable[h];
240 U32 matchIndex = hashTable[h];
241 U32* const bt = zc->chainTable;
241 U32* const bt = zc->chainTable;
242 const U32 btLog = zc->params.cParams.chainLog - 1;
242 const U32 btLog = zc->params.cParams.chainLog - 1;
243 const U32 btMask= (1U << btLog) - 1;
243 const U32 btMask= (1U << btLog) - 1;
244 size_t commonLengthSmaller=0, commonLengthLarger=0;
244 size_t commonLengthSmaller=0, commonLengthLarger=0;
245 const BYTE* const dictBase = zc->dictBase;
245 const BYTE* const dictBase = zc->dictBase;
246 const U32 dictLimit = zc->dictLimit;
246 const U32 dictLimit = zc->dictLimit;
247 const BYTE* const dictEnd = dictBase + dictLimit;
247 const BYTE* const dictEnd = dictBase + dictLimit;
248 const BYTE* const prefixStart = base + dictLimit;
248 const BYTE* const prefixStart = base + dictLimit;
249 const U32 btLow = btMask >= current ? 0 : current - btMask;
249 const U32 btLow = btMask >= current ? 0 : current - btMask;
250 const U32 windowLow = zc->lowLimit;
250 const U32 windowLow = zc->lowLimit;
251 U32* smallerPtr = bt + 2*(current&btMask);
251 U32* smallerPtr = bt + 2*(current&btMask);
252 U32* largerPtr = bt + 2*(current&btMask) + 1;
252 U32* largerPtr = bt + 2*(current&btMask) + 1;
253 U32 matchEndIdx = current+8;
253 U32 matchEndIdx = current+8;
254 U32 dummy32; /* to be nullified at the end */
254 U32 dummy32; /* to be nullified at the end */
255 U32 mnum = 0;
255 U32 mnum = 0;
256
256
257 const U32 minMatch = (mls == 3) ? 3 : 4;
257 const U32 minMatch = (mls == 3) ? 3 : 4;
258 size_t bestLength = minMatchLen-1;
258 size_t bestLength = minMatchLen-1;
259
259
260 if (minMatch == 3) { /* HC3 match finder */
260 if (minMatch == 3) { /* HC3 match finder */
261 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
261 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
262 if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
262 if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
263 const BYTE* match;
263 const BYTE* match;
264 size_t currentMl=0;
264 size_t currentMl=0;
265 if ((!extDict) || matchIndex3 >= dictLimit) {
265 if ((!extDict) || matchIndex3 >= dictLimit) {
266 match = base + matchIndex3;
266 match = base + matchIndex3;
267 if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
267 if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
268 } else {
268 } else {
269 match = dictBase + matchIndex3;
269 match = dictBase + matchIndex3;
270 if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
270 if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
271 currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
271 currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
272 }
272 }
273
273
274 /* save best solution */
274 /* save best solution */
275 if (currentMl > bestLength) {
275 if (currentMl > bestLength) {
276 bestLength = currentMl;
276 bestLength = currentMl;
277 matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
277 matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
278 matches[mnum].len = (U32)currentMl;
278 matches[mnum].len = (U32)currentMl;
279 mnum++;
279 mnum++;
280 if (currentMl > ZSTD_OPT_NUM) goto update;
280 if (currentMl > ZSTD_OPT_NUM) goto update;
281 if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
281 if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
282 }
282 }
283 }
283 }
284 }
284 }
285
285
286 hashTable[h] = current; /* Update Hash Table */
286 hashTable[h] = current; /* Update Hash Table */
287
287
288 while (nbCompares-- && (matchIndex > windowLow)) {
288 while (nbCompares-- && (matchIndex > windowLow)) {
289 U32* nextPtr = bt + 2*(matchIndex & btMask);
289 U32* nextPtr = bt + 2*(matchIndex & btMask);
290 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
290 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
291 const BYTE* match;
291 const BYTE* match;
292
292
293 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
293 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
294 match = base + matchIndex;
294 match = base + matchIndex;
295 if (match[matchLength] == ip[matchLength]) {
295 if (match[matchLength] == ip[matchLength]) {
296 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
296 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
297 }
297 }
298 } else {
298 } else {
299 match = dictBase + matchIndex;
299 match = dictBase + matchIndex;
300 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
300 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
301 if (matchIndex+matchLength >= dictLimit)
301 if (matchIndex+matchLength >= dictLimit)
302 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
302 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
303 }
303 }
304
304
305 if (matchLength > bestLength) {
305 if (matchLength > bestLength) {
306 if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
306 if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
307 bestLength = matchLength;
307 bestLength = matchLength;
308 matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
308 matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
309 matches[mnum].len = (U32)matchLength;
309 matches[mnum].len = (U32)matchLength;
310 mnum++;
310 mnum++;
311 if (matchLength > ZSTD_OPT_NUM) break;
311 if (matchLength > ZSTD_OPT_NUM) break;
312 if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */
312 if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */
313 break; /* drop, to guarantee consistency (miss a little bit of compression) */
313 break; /* drop, to guarantee consistency (miss a little bit of compression) */
314 }
314 }
315
315
316 if (match[matchLength] < ip[matchLength]) {
316 if (match[matchLength] < ip[matchLength]) {
317 /* match is smaller than current */
317 /* match is smaller than current */
318 *smallerPtr = matchIndex; /* update smaller idx */
318 *smallerPtr = matchIndex; /* update smaller idx */
319 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
319 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
320 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
320 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
321 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
321 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
322 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
322 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
323 } else {
323 } else {
324 /* match is larger than current */
324 /* match is larger than current */
325 *largerPtr = matchIndex;
325 *largerPtr = matchIndex;
326 commonLengthLarger = matchLength;
326 commonLengthLarger = matchLength;
327 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
327 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
328 largerPtr = nextPtr;
328 largerPtr = nextPtr;
329 matchIndex = nextPtr[0];
329 matchIndex = nextPtr[0];
330 } }
330 } }
331
331
332 *smallerPtr = *largerPtr = 0;
332 *smallerPtr = *largerPtr = 0;
333
333
334 update:
334 update:
335 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
335 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
336 return mnum;
336 return mnum;
337 }
337 }
338
338
339
339
340 /** Tree updater, providing best match */
340 /** Tree updater, providing best match */
341 static U32 ZSTD_BtGetAllMatches (
341 static U32 ZSTD_BtGetAllMatches (
342 ZSTD_CCtx* zc,
342 ZSTD_CCtx* zc,
343 const BYTE* const ip, const BYTE* const iLimit,
343 const BYTE* const ip, const BYTE* const iLimit,
344 const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
344 const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
345 {
345 {
346 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
346 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
347 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
347 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
348 return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
348 return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
349 }
349 }
350
350
351
351
352 static U32 ZSTD_BtGetAllMatches_selectMLS (
352 static U32 ZSTD_BtGetAllMatches_selectMLS (
353 ZSTD_CCtx* zc, /* Index table will be updated */
353 ZSTD_CCtx* zc, /* Index table will be updated */
354 const BYTE* ip, const BYTE* const iHighLimit,
354 const BYTE* ip, const BYTE* const iHighLimit,
355 const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
355 const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
356 {
356 {
357 switch(matchLengthSearch)
357 switch(matchLengthSearch)
358 {
358 {
359 case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
359 case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
360 default :
360 default :
361 case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
361 case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
362 case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
362 case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
363 case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
363 case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
364 }
364 }
365 }
365 }
366
366
367 /** Tree updater, providing best match */
367 /** Tree updater, providing best match */
368 static U32 ZSTD_BtGetAllMatches_extDict (
368 static U32 ZSTD_BtGetAllMatches_extDict (
369 ZSTD_CCtx* zc,
369 ZSTD_CCtx* zc,
370 const BYTE* const ip, const BYTE* const iLimit,
370 const BYTE* const ip, const BYTE* const iLimit,
371 const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
371 const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
372 {
372 {
373 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
373 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
374 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
374 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
375 return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
375 return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
376 }
376 }
377
377
378
378
379 static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
379 static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
380 ZSTD_CCtx* zc, /* Index table will be updated */
380 ZSTD_CCtx* zc, /* Index table will be updated */
381 const BYTE* ip, const BYTE* const iHighLimit,
381 const BYTE* ip, const BYTE* const iHighLimit,
382 const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
382 const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
383 {
383 {
384 switch(matchLengthSearch)
384 switch(matchLengthSearch)
385 {
385 {
386 case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
386 case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
387 default :
387 default :
388 case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
388 case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
389 case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
389 case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
390 case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
390 case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
391 }
391 }
392 }
392 }
393
393
394
394
395 /*-*******************************
395 /*-*******************************
396 * Optimal parser
396 * Optimal parser
397 *********************************/
397 *********************************/
398 FORCE_INLINE
398 FORCE_INLINE
399 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
399 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
400 const void* src, size_t srcSize, const int ultra)
400 const void* src, size_t srcSize, const int ultra)
401 {
401 {
402 seqStore_t* seqStorePtr = &(ctx->seqStore);
402 seqStore_t* seqStorePtr = &(ctx->seqStore);
403 const BYTE* const istart = (const BYTE*)src;
403 const BYTE* const istart = (const BYTE*)src;
404 const BYTE* ip = istart;
404 const BYTE* ip = istart;
405 const BYTE* anchor = istart;
405 const BYTE* anchor = istart;
406 const BYTE* const iend = istart + srcSize;
406 const BYTE* const iend = istart + srcSize;
407 const BYTE* const ilimit = iend - 8;
407 const BYTE* const ilimit = iend - 8;
408 const BYTE* const base = ctx->base;
408 const BYTE* const base = ctx->base;
409 const BYTE* const prefixStart = base + ctx->dictLimit;
409 const BYTE* const prefixStart = base + ctx->dictLimit;
410
410
411 const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
411 const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
412 const U32 sufficient_len = ctx->params.cParams.targetLength;
412 const U32 sufficient_len = ctx->params.cParams.targetLength;
413 const U32 mls = ctx->params.cParams.searchLength;
413 const U32 mls = ctx->params.cParams.searchLength;
414 const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
414 const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
415
415
416 ZSTD_optimal_t* opt = seqStorePtr->priceTable;
416 ZSTD_optimal_t* opt = seqStorePtr->priceTable;
417 ZSTD_match_t* matches = seqStorePtr->matchTable;
417 ZSTD_match_t* matches = seqStorePtr->matchTable;
418 const BYTE* inr;
418 const BYTE* inr;
419 U32 offset, rep[ZSTD_REP_NUM];
419 U32 offset, rep[ZSTD_REP_NUM];
420
420
421 /* init */
421 /* init */
422 ctx->nextToUpdate3 = ctx->nextToUpdate;
422 ctx->nextToUpdate3 = ctx->nextToUpdate;
423 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
423 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
424 ip += (ip==prefixStart);
424 ip += (ip==prefixStart);
425 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
425 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
426
426
427 /* Match Loop */
427 /* Match Loop */
428 while (ip < ilimit) {
428 while (ip < ilimit) {
429 U32 cur, match_num, last_pos, litlen, price;
429 U32 cur, match_num, last_pos, litlen, price;
430 U32 u, mlen, best_mlen, best_off, litLength;
430 U32 u, mlen, best_mlen, best_off, litLength;
431 memset(opt, 0, sizeof(ZSTD_optimal_t));
431 memset(opt, 0, sizeof(ZSTD_optimal_t));
432 last_pos = 0;
432 last_pos = 0;
433 litlen = (U32)(ip - anchor);
433 litlen = (U32)(ip - anchor);
434
434
435 /* check repCode */
435 /* check repCode */
436 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
436 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
437 for (i=(ip == anchor); i<last_i; i++) {
437 for (i=(ip == anchor); i<last_i; i++) {
438 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
438 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
439 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
439 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
440 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
440 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
441 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
441 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
442 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
442 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
443 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
443 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
444 goto _storeSequence;
444 goto _storeSequence;
445 }
445 }
446 best_off = i - (ip == anchor);
446 best_off = i - (ip == anchor);
447 do {
447 do {
448 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
448 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
449 if (mlen > last_pos || price < opt[mlen].price)
449 if (mlen > last_pos || price < opt[mlen].price)
450 SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
450 SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
451 mlen--;
451 mlen--;
452 } while (mlen >= minMatch);
452 } while (mlen >= minMatch);
453 } } }
453 } } }
454
454
455 match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
455 match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
456
456
457 if (!last_pos && !match_num) { ip++; continue; }
457 if (!last_pos && !match_num) { ip++; continue; }
458
458
459 if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
459 if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
460 best_mlen = matches[match_num-1].len;
460 best_mlen = matches[match_num-1].len;
461 best_off = matches[match_num-1].off;
461 best_off = matches[match_num-1].off;
462 cur = 0;
462 cur = 0;
463 last_pos = 1;
463 last_pos = 1;
464 goto _storeSequence;
464 goto _storeSequence;
465 }
465 }
466
466
467 /* set prices using matches at position = 0 */
467 /* set prices using matches at position = 0 */
468 best_mlen = (last_pos) ? last_pos : minMatch;
468 best_mlen = (last_pos) ? last_pos : minMatch;
469 for (u = 0; u < match_num; u++) {
469 for (u = 0; u < match_num; u++) {
470 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
470 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
471 best_mlen = matches[u].len;
471 best_mlen = matches[u].len;
472 while (mlen <= best_mlen) {
472 while (mlen <= best_mlen) {
473 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
473 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
474 if (mlen > last_pos || price < opt[mlen].price)
474 if (mlen > last_pos || price < opt[mlen].price)
475 SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
475 SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
476 mlen++;
476 mlen++;
477 } }
477 } }
478
478
479 if (last_pos < minMatch) { ip++; continue; }
479 if (last_pos < minMatch) { ip++; continue; }
480
480
481 /* initialize opt[0] */
481 /* initialize opt[0] */
482 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
482 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
483 opt[0].mlen = 1;
483 opt[0].mlen = 1;
484 opt[0].litlen = litlen;
484 opt[0].litlen = litlen;
485
485
486 /* check further positions */
486 /* check further positions */
487 for (cur = 1; cur <= last_pos; cur++) {
487 for (cur = 1; cur <= last_pos; cur++) {
488 inr = ip + cur;
488 inr = ip + cur;
489
489
490 if (opt[cur-1].mlen == 1) {
490 if (opt[cur-1].mlen == 1) {
491 litlen = opt[cur-1].litlen + 1;
491 litlen = opt[cur-1].litlen + 1;
492 if (cur > litlen) {
492 if (cur > litlen) {
493 price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
493 price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
494 } else
494 } else
495 price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
495 price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
496 } else {
496 } else {
497 litlen = 1;
497 litlen = 1;
498 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
498 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
499 }
499 }
500
500
501 if (cur > last_pos || price <= opt[cur].price)
501 if (cur > last_pos || price <= opt[cur].price)
502 SET_PRICE(cur, 1, 0, litlen, price);
502 SET_PRICE(cur, 1, 0, litlen, price);
503
503
504 if (cur == last_pos) break;
504 if (cur == last_pos) break;
505
505
506 if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
506 if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
507 continue;
507 continue;
508
508
509 mlen = opt[cur].mlen;
509 mlen = opt[cur].mlen;
510 if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
510 if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
511 opt[cur].rep[2] = opt[cur-mlen].rep[1];
511 opt[cur].rep[2] = opt[cur-mlen].rep[1];
512 opt[cur].rep[1] = opt[cur-mlen].rep[0];
512 opt[cur].rep[1] = opt[cur-mlen].rep[0];
513 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
513 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
514 } else {
514 } else {
515 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
515 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
516 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
516 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
517 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
517 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
518 }
518 }
519
519
520 best_mlen = minMatch;
520 best_mlen = minMatch;
521 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
521 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
522 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
522 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
523 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
523 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
524 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
524 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
525 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
525 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
526 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
526 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
527
527
528 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
528 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
529 best_mlen = mlen; best_off = i; last_pos = cur + 1;
529 best_mlen = mlen; best_off = i; last_pos = cur + 1;
530 goto _storeSequence;
530 goto _storeSequence;
531 }
531 }
532
532
533 best_off = i - (opt[cur].mlen != 1);
533 best_off = i - (opt[cur].mlen != 1);
534 if (mlen > best_mlen) best_mlen = mlen;
534 if (mlen > best_mlen) best_mlen = mlen;
535
535
536 do {
536 do {
537 if (opt[cur].mlen == 1) {
537 if (opt[cur].mlen == 1) {
538 litlen = opt[cur].litlen;
538 litlen = opt[cur].litlen;
539 if (cur > litlen) {
539 if (cur > litlen) {
540 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
540 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
541 } else
541 } else
542 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
542 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
543 } else {
543 } else {
544 litlen = 0;
544 litlen = 0;
545 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
545 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
546 }
546 }
547
547
548 if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
548 if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
549 SET_PRICE(cur + mlen, mlen, i, litlen, price);
549 SET_PRICE(cur + mlen, mlen, i, litlen, price);
550 mlen--;
550 mlen--;
551 } while (mlen >= minMatch);
551 } while (mlen >= minMatch);
552 } } }
552 } } }
553
553
554 match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
554 match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
555
555
556 if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
556 if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
557 best_mlen = matches[match_num-1].len;
557 best_mlen = matches[match_num-1].len;
558 best_off = matches[match_num-1].off;
558 best_off = matches[match_num-1].off;
559 last_pos = cur + 1;
559 last_pos = cur + 1;
560 goto _storeSequence;
560 goto _storeSequence;
561 }
561 }
562
562
563 /* set prices using matches at position = cur */
563 /* set prices using matches at position = cur */
564 for (u = 0; u < match_num; u++) {
564 for (u = 0; u < match_num; u++) {
565 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
565 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
566 best_mlen = matches[u].len;
566 best_mlen = matches[u].len;
567
567
568 while (mlen <= best_mlen) {
568 while (mlen <= best_mlen) {
569 if (opt[cur].mlen == 1) {
569 if (opt[cur].mlen == 1) {
570 litlen = opt[cur].litlen;
570 litlen = opt[cur].litlen;
571 if (cur > litlen)
571 if (cur > litlen)
572 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
572 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
573 else
573 else
574 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
574 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
575 } else {
575 } else {
576 litlen = 0;
576 litlen = 0;
577 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
577 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
578 }
578 }
579
579
580 if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
580 if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
581 SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
581 SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
582
582
583 mlen++;
583 mlen++;
584 } } }
584 } } }
585
585
586 best_mlen = opt[last_pos].mlen;
586 best_mlen = opt[last_pos].mlen;
587 best_off = opt[last_pos].off;
587 best_off = opt[last_pos].off;
588 cur = last_pos - best_mlen;
588 cur = last_pos - best_mlen;
589
589
590 /* store sequence */
590 /* store sequence */
591 _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
591 _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
592 opt[0].mlen = 1;
592 opt[0].mlen = 1;
593
593
594 while (1) {
594 while (1) {
595 mlen = opt[cur].mlen;
595 mlen = opt[cur].mlen;
596 offset = opt[cur].off;
596 offset = opt[cur].off;
597 opt[cur].mlen = best_mlen;
597 opt[cur].mlen = best_mlen;
598 opt[cur].off = best_off;
598 opt[cur].off = best_off;
599 best_mlen = mlen;
599 best_mlen = mlen;
600 best_off = offset;
600 best_off = offset;
601 if (mlen > cur) break;
601 if (mlen > cur) break;
602 cur -= mlen;
602 cur -= mlen;
603 }
603 }
604
604
605 for (u = 0; u <= last_pos;) {
605 for (u = 0; u <= last_pos;) {
606 u += opt[u].mlen;
606 u += opt[u].mlen;
607 }
607 }
608
608
609 for (cur=0; cur < last_pos; ) {
609 for (cur=0; cur < last_pos; ) {
610 mlen = opt[cur].mlen;
610 mlen = opt[cur].mlen;
611 if (mlen == 1) { ip++; cur++; continue; }
611 if (mlen == 1) { ip++; cur++; continue; }
612 offset = opt[cur].off;
612 offset = opt[cur].off;
613 cur += mlen;
613 cur += mlen;
614 litLength = (U32)(ip - anchor);
614 litLength = (U32)(ip - anchor);
615
615
616 if (offset > ZSTD_REP_MOVE_OPT) {
616 if (offset > ZSTD_REP_MOVE_OPT) {
617 rep[2] = rep[1];
617 rep[2] = rep[1];
618 rep[1] = rep[0];
618 rep[1] = rep[0];
619 rep[0] = offset - ZSTD_REP_MOVE_OPT;
619 rep[0] = offset - ZSTD_REP_MOVE_OPT;
620 offset--;
620 offset--;
621 } else {
621 } else {
622 if (offset != 0) {
622 if (offset != 0) {
623 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
623 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
624 if (offset != 1) rep[2] = rep[1];
624 if (offset != 1) rep[2] = rep[1];
625 rep[1] = rep[0];
625 rep[1] = rep[0];
626 rep[0] = best_off;
626 rep[0] = best_off;
627 }
627 }
628 if (litLength==0) offset--;
628 if (litLength==0) offset--;
629 }
629 }
630
630
631 ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
631 ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
632 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
632 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
633 anchor = ip = ip + mlen;
633 anchor = ip = ip + mlen;
634 } } /* for (cur=0; cur < last_pos; ) */
634 } } /* for (cur=0; cur < last_pos; ) */
635
635
636 /* Save reps for next block */
636 /* Save reps for next block */
637 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
637 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
638
638
639 /* Last Literals */
639 /* Last Literals */
640 { size_t const lastLLSize = iend - anchor;
640 { size_t const lastLLSize = iend - anchor;
641 memcpy(seqStorePtr->lit, anchor, lastLLSize);
641 memcpy(seqStorePtr->lit, anchor, lastLLSize);
642 seqStorePtr->lit += lastLLSize;
642 seqStorePtr->lit += lastLLSize;
643 }
643 }
644 }
644 }
645
645
646
646
647 FORCE_INLINE
647 FORCE_INLINE
648 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
648 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
649 const void* src, size_t srcSize, const int ultra)
649 const void* src, size_t srcSize, const int ultra)
650 {
650 {
651 seqStore_t* seqStorePtr = &(ctx->seqStore);
651 seqStore_t* seqStorePtr = &(ctx->seqStore);
652 const BYTE* const istart = (const BYTE*)src;
652 const BYTE* const istart = (const BYTE*)src;
653 const BYTE* ip = istart;
653 const BYTE* ip = istart;
654 const BYTE* anchor = istart;
654 const BYTE* anchor = istart;
655 const BYTE* const iend = istart + srcSize;
655 const BYTE* const iend = istart + srcSize;
656 const BYTE* const ilimit = iend - 8;
656 const BYTE* const ilimit = iend - 8;
657 const BYTE* const base = ctx->base;
657 const BYTE* const base = ctx->base;
658 const U32 lowestIndex = ctx->lowLimit;
658 const U32 lowestIndex = ctx->lowLimit;
659 const U32 dictLimit = ctx->dictLimit;
659 const U32 dictLimit = ctx->dictLimit;
660 const BYTE* const prefixStart = base + dictLimit;
660 const BYTE* const prefixStart = base + dictLimit;
661 const BYTE* const dictBase = ctx->dictBase;
661 const BYTE* const dictBase = ctx->dictBase;
662 const BYTE* const dictEnd = dictBase + dictLimit;
662 const BYTE* const dictEnd = dictBase + dictLimit;
663
663
664 const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
664 const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
665 const U32 sufficient_len = ctx->params.cParams.targetLength;
665 const U32 sufficient_len = ctx->params.cParams.targetLength;
666 const U32 mls = ctx->params.cParams.searchLength;
666 const U32 mls = ctx->params.cParams.searchLength;
667 const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
667 const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
668
668
669 ZSTD_optimal_t* opt = seqStorePtr->priceTable;
669 ZSTD_optimal_t* opt = seqStorePtr->priceTable;
670 ZSTD_match_t* matches = seqStorePtr->matchTable;
670 ZSTD_match_t* matches = seqStorePtr->matchTable;
671 const BYTE* inr;
671 const BYTE* inr;
672
672
673 /* init */
673 /* init */
674 U32 offset, rep[ZSTD_REP_NUM];
674 U32 offset, rep[ZSTD_REP_NUM];
675 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
675 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
676
676
677 ctx->nextToUpdate3 = ctx->nextToUpdate;
677 ctx->nextToUpdate3 = ctx->nextToUpdate;
678 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
678 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
679 ip += (ip==prefixStart);
679 ip += (ip==prefixStart);
680
680
681 /* Match Loop */
681 /* Match Loop */
682 while (ip < ilimit) {
682 while (ip < ilimit) {
683 U32 cur, match_num, last_pos, litlen, price;
683 U32 cur, match_num, last_pos, litlen, price;
684 U32 u, mlen, best_mlen, best_off, litLength;
684 U32 u, mlen, best_mlen, best_off, litLength;
685 U32 current = (U32)(ip-base);
685 U32 current = (U32)(ip-base);
686 memset(opt, 0, sizeof(ZSTD_optimal_t));
686 memset(opt, 0, sizeof(ZSTD_optimal_t));
687 last_pos = 0;
687 last_pos = 0;
688 opt[0].litlen = (U32)(ip - anchor);
688 opt[0].litlen = (U32)(ip - anchor);
689
689
690 /* check repCode */
690 /* check repCode */
691 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
691 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
692 for (i = (ip==anchor); i<last_i; i++) {
692 for (i = (ip==anchor); i<last_i; i++) {
693 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
693 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
694 const U32 repIndex = (U32)(current - repCur);
694 const U32 repIndex = (U32)(current - repCur);
695 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
695 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
696 const BYTE* const repMatch = repBase + repIndex;
696 const BYTE* const repMatch = repBase + repIndex;
697 if ( (repCur > 0 && repCur <= (S32)current)
697 if ( (repCur > 0 && repCur <= (S32)current)
698 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
698 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
699 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
699 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
700 /* repcode detected we should take it */
700 /* repcode detected we should take it */
701 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
701 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
702 mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
702 mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
703
703
704 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
704 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
705 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
705 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
706 goto _storeSequence;
706 goto _storeSequence;
707 }
707 }
708
708
709 best_off = i - (ip==anchor);
709 best_off = i - (ip==anchor);
710 litlen = opt[0].litlen;
710 litlen = opt[0].litlen;
711 do {
711 do {
712 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
712 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
713 if (mlen > last_pos || price < opt[mlen].price)
713 if (mlen > last_pos || price < opt[mlen].price)
714 SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
714 SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
715 mlen--;
715 mlen--;
716 } while (mlen >= minMatch);
716 } while (mlen >= minMatch);
717 } } }
717 } } }
718
718
719 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
719 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
720
720
721 if (!last_pos && !match_num) { ip++; continue; }
721 if (!last_pos && !match_num) { ip++; continue; }
722
722
723 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
723 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
724 opt[0].mlen = 1;
724 opt[0].mlen = 1;
725
725
726 if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
726 if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
727 best_mlen = matches[match_num-1].len;
727 best_mlen = matches[match_num-1].len;
728 best_off = matches[match_num-1].off;
728 best_off = matches[match_num-1].off;
729 cur = 0;
729 cur = 0;
730 last_pos = 1;
730 last_pos = 1;
731 goto _storeSequence;
731 goto _storeSequence;
732 }
732 }
733
733
734 best_mlen = (last_pos) ? last_pos : minMatch;
734 best_mlen = (last_pos) ? last_pos : minMatch;
735
735
736 /* set prices using matches at position = 0 */
736 /* set prices using matches at position = 0 */
737 for (u = 0; u < match_num; u++) {
737 for (u = 0; u < match_num; u++) {
738 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
738 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
739 best_mlen = matches[u].len;
739 best_mlen = matches[u].len;
740 litlen = opt[0].litlen;
740 litlen = opt[0].litlen;
741 while (mlen <= best_mlen) {
741 while (mlen <= best_mlen) {
742 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
742 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
743 if (mlen > last_pos || price < opt[mlen].price)
743 if (mlen > last_pos || price < opt[mlen].price)
744 SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
744 SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
745 mlen++;
745 mlen++;
746 } }
746 } }
747
747
748 if (last_pos < minMatch) {
748 if (last_pos < minMatch) {
749 ip++; continue;
749 ip++; continue;
750 }
750 }
751
751
752 /* check further positions */
752 /* check further positions */
753 for (cur = 1; cur <= last_pos; cur++) {
753 for (cur = 1; cur <= last_pos; cur++) {
754 inr = ip + cur;
754 inr = ip + cur;
755
755
756 if (opt[cur-1].mlen == 1) {
756 if (opt[cur-1].mlen == 1) {
757 litlen = opt[cur-1].litlen + 1;
757 litlen = opt[cur-1].litlen + 1;
758 if (cur > litlen) {
758 if (cur > litlen) {
759 price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
759 price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
760 } else
760 } else
761 price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
761 price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
762 } else {
762 } else {
763 litlen = 1;
763 litlen = 1;
764 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
764 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
765 }
765 }
766
766
767 if (cur > last_pos || price <= opt[cur].price)
767 if (cur > last_pos || price <= opt[cur].price)
768 SET_PRICE(cur, 1, 0, litlen, price);
768 SET_PRICE(cur, 1, 0, litlen, price);
769
769
770 if (cur == last_pos) break;
770 if (cur == last_pos) break;
771
771
772 if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
772 if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
773 continue;
773 continue;
774
774
775 mlen = opt[cur].mlen;
775 mlen = opt[cur].mlen;
776 if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
776 if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
777 opt[cur].rep[2] = opt[cur-mlen].rep[1];
777 opt[cur].rep[2] = opt[cur-mlen].rep[1];
778 opt[cur].rep[1] = opt[cur-mlen].rep[0];
778 opt[cur].rep[1] = opt[cur-mlen].rep[0];
779 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
779 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
780 } else {
780 } else {
781 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
781 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
782 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
782 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
783 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
783 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
784 }
784 }
785
785
786 best_mlen = minMatch;
786 best_mlen = minMatch;
787 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
787 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
788 for (i = (mlen != 1); i<last_i; i++) {
788 for (i = (mlen != 1); i<last_i; i++) {
789 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
789 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
790 const U32 repIndex = (U32)(current+cur - repCur);
790 const U32 repIndex = (U32)(current+cur - repCur);
791 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
791 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
792 const BYTE* const repMatch = repBase + repIndex;
792 const BYTE* const repMatch = repBase + repIndex;
793 if ( (repCur > 0 && repCur <= (S32)(current+cur))
793 if ( (repCur > 0 && repCur <= (S32)(current+cur))
794 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
794 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
795 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
795 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
796 /* repcode detected */
796 /* repcode detected */
797 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
797 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
798 mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
798 mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
799
799
800 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
800 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
801 best_mlen = mlen; best_off = i; last_pos = cur + 1;
801 best_mlen = mlen; best_off = i; last_pos = cur + 1;
802 goto _storeSequence;
802 goto _storeSequence;
803 }
803 }
804
804
805 best_off = i - (opt[cur].mlen != 1);
805 best_off = i - (opt[cur].mlen != 1);
806 if (mlen > best_mlen) best_mlen = mlen;
806 if (mlen > best_mlen) best_mlen = mlen;
807
807
808 do {
808 do {
809 if (opt[cur].mlen == 1) {
809 if (opt[cur].mlen == 1) {
810 litlen = opt[cur].litlen;
810 litlen = opt[cur].litlen;
811 if (cur > litlen) {
811 if (cur > litlen) {
812 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
812 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
813 } else
813 } else
814 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
814 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
815 } else {
815 } else {
816 litlen = 0;
816 litlen = 0;
817 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
817 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
818 }
818 }
819
819
820 if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
820 if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
821 SET_PRICE(cur + mlen, mlen, i, litlen, price);
821 SET_PRICE(cur + mlen, mlen, i, litlen, price);
822 mlen--;
822 mlen--;
823 } while (mlen >= minMatch);
823 } while (mlen >= minMatch);
824 } } }
824 } } }
825
825
826 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
826 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
827
827
828 if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
828 if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
829 best_mlen = matches[match_num-1].len;
829 best_mlen = matches[match_num-1].len;
830 best_off = matches[match_num-1].off;
830 best_off = matches[match_num-1].off;
831 last_pos = cur + 1;
831 last_pos = cur + 1;
832 goto _storeSequence;
832 goto _storeSequence;
833 }
833 }
834
834
835 /* set prices using matches at position = cur */
835 /* set prices using matches at position = cur */
836 for (u = 0; u < match_num; u++) {
836 for (u = 0; u < match_num; u++) {
837 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
837 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
838 best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
838 best_mlen = matches[u].len;
839
839
840 while (mlen <= best_mlen) {
840 while (mlen <= best_mlen) {
841 if (opt[cur].mlen == 1) {
841 if (opt[cur].mlen == 1) {
842 litlen = opt[cur].litlen;
842 litlen = opt[cur].litlen;
843 if (cur > litlen)
843 if (cur > litlen)
844 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
844 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
845 else
845 else
846 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
846 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
847 } else {
847 } else {
848 litlen = 0;
848 litlen = 0;
849 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
849 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
850 }
850 }
851
851
852 if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
852 if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
853 SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
853 SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
854
854
855 mlen++;
855 mlen++;
856 } } } /* for (cur = 1; cur <= last_pos; cur++) */
856 } } } /* for (cur = 1; cur <= last_pos; cur++) */
857
857
858 best_mlen = opt[last_pos].mlen;
858 best_mlen = opt[last_pos].mlen;
859 best_off = opt[last_pos].off;
859 best_off = opt[last_pos].off;
860 cur = last_pos - best_mlen;
860 cur = last_pos - best_mlen;
861
861
862 /* store sequence */
862 /* store sequence */
863 _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
863 _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
864 opt[0].mlen = 1;
864 opt[0].mlen = 1;
865
865
866 while (1) {
866 while (1) {
867 mlen = opt[cur].mlen;
867 mlen = opt[cur].mlen;
868 offset = opt[cur].off;
868 offset = opt[cur].off;
869 opt[cur].mlen = best_mlen;
869 opt[cur].mlen = best_mlen;
870 opt[cur].off = best_off;
870 opt[cur].off = best_off;
871 best_mlen = mlen;
871 best_mlen = mlen;
872 best_off = offset;
872 best_off = offset;
873 if (mlen > cur) break;
873 if (mlen > cur) break;
874 cur -= mlen;
874 cur -= mlen;
875 }
875 }
876
876
877 for (u = 0; u <= last_pos; ) {
877 for (u = 0; u <= last_pos; ) {
878 u += opt[u].mlen;
878 u += opt[u].mlen;
879 }
879 }
880
880
881 for (cur=0; cur < last_pos; ) {
881 for (cur=0; cur < last_pos; ) {
882 mlen = opt[cur].mlen;
882 mlen = opt[cur].mlen;
883 if (mlen == 1) { ip++; cur++; continue; }
883 if (mlen == 1) { ip++; cur++; continue; }
884 offset = opt[cur].off;
884 offset = opt[cur].off;
885 cur += mlen;
885 cur += mlen;
886 litLength = (U32)(ip - anchor);
886 litLength = (U32)(ip - anchor);
887
887
888 if (offset > ZSTD_REP_MOVE_OPT) {
888 if (offset > ZSTD_REP_MOVE_OPT) {
889 rep[2] = rep[1];
889 rep[2] = rep[1];
890 rep[1] = rep[0];
890 rep[1] = rep[0];
891 rep[0] = offset - ZSTD_REP_MOVE_OPT;
891 rep[0] = offset - ZSTD_REP_MOVE_OPT;
892 offset--;
892 offset--;
893 } else {
893 } else {
894 if (offset != 0) {
894 if (offset != 0) {
895 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
895 best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
896 if (offset != 1) rep[2] = rep[1];
896 if (offset != 1) rep[2] = rep[1];
897 rep[1] = rep[0];
897 rep[1] = rep[0];
898 rep[0] = best_off;
898 rep[0] = best_off;
899 }
899 }
900
900
901 if (litLength==0) offset--;
901 if (litLength==0) offset--;
902 }
902 }
903
903
904 ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
904 ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
905 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
905 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
906 anchor = ip = ip + mlen;
906 anchor = ip = ip + mlen;
907 } } /* for (cur=0; cur < last_pos; ) */
907 } } /* for (cur=0; cur < last_pos; ) */
908
908
909 /* Save reps for next block */
909 /* Save reps for next block */
910 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
910 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
911
911
912 /* Last Literals */
912 /* Last Literals */
913 { size_t lastLLSize = iend - anchor;
913 { size_t lastLLSize = iend - anchor;
914 memcpy(seqStorePtr->lit, anchor, lastLLSize);
914 memcpy(seqStorePtr->lit, anchor, lastLLSize);
915 seqStorePtr->lit += lastLLSize;
915 seqStorePtr->lit += lastLLSize;
916 }
916 }
917 }
917 }
918
918
919 #endif /* ZSTD_OPT_H_91842398743 */
919 #endif /* ZSTD_OPT_H_91842398743 */
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now