Show More
@@ -0,0 +1,132 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This software may be modified and distributed under the terms | |||
|
6 | * of the BSD license. See the LICENSE file for details. | |||
|
7 | */ | |||
|
8 | ||||
|
9 | #include "python-zstandard.h" | |||
|
10 | ||||
|
11 | extern PyObject* ZstdError; | |||
|
12 | ||||
|
13 | PyDoc_STRVAR(FrameParameters__doc__, | |||
|
14 | "FrameParameters: information about a zstd frame"); | |||
|
15 | ||||
|
16 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) { | |||
|
17 | const char* source; | |||
|
18 | Py_ssize_t sourceSize; | |||
|
19 | ZSTD_frameParams params; | |||
|
20 | FrameParametersObject* result = NULL; | |||
|
21 | size_t zresult; | |||
|
22 | ||||
|
23 | #if PY_MAJOR_VERSION >= 3 | |||
|
24 | if (!PyArg_ParseTuple(args, "y#:get_frame_parameters", | |||
|
25 | #else | |||
|
26 | if (!PyArg_ParseTuple(args, "s#:get_frame_parameters", | |||
|
27 | #endif | |||
|
28 | &source, &sourceSize)) { | |||
|
29 | return NULL; | |||
|
30 | } | |||
|
31 | ||||
|
32 | /* Needed for Python 2 to reject unicode */ | |||
|
33 | if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) { | |||
|
34 | PyErr_SetString(PyExc_TypeError, "argument must be bytes"); | |||
|
35 | return NULL; | |||
|
36 | } | |||
|
37 | ||||
|
38 | zresult = ZSTD_getFrameParams(¶ms, (void*)source, sourceSize); | |||
|
39 | ||||
|
40 | if (ZSTD_isError(zresult)) { | |||
|
41 | PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult)); | |||
|
42 | return NULL; | |||
|
43 | } | |||
|
44 | ||||
|
45 | if (zresult) { | |||
|
46 | PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult); | |||
|
47 | return NULL; | |||
|
48 | } | |||
|
49 | ||||
|
50 | result = PyObject_New(FrameParametersObject, &FrameParametersType); | |||
|
51 | if (!result) { | |||
|
52 | return NULL; | |||
|
53 | } | |||
|
54 | ||||
|
55 | result->frameContentSize = params.frameContentSize; | |||
|
56 | result->windowSize = params.windowSize; | |||
|
57 | result->dictID = params.dictID; | |||
|
58 | result->checksumFlag = params.checksumFlag ? 1 : 0; | |||
|
59 | ||||
|
60 | return result; | |||
|
61 | } | |||
|
62 | ||||
|
63 | static void FrameParameters_dealloc(PyObject* self) { | |||
|
64 | PyObject_Del(self); | |||
|
65 | } | |||
|
66 | ||||
|
67 | static PyMemberDef FrameParameters_members[] = { | |||
|
68 | { "content_size", T_ULONGLONG, | |||
|
69 | offsetof(FrameParametersObject, frameContentSize), READONLY, | |||
|
70 | "frame content size" }, | |||
|
71 | { "window_size", T_UINT, | |||
|
72 | offsetof(FrameParametersObject, windowSize), READONLY, | |||
|
73 | "window size" }, | |||
|
74 | { "dict_id", T_UINT, | |||
|
75 | offsetof(FrameParametersObject, dictID), READONLY, | |||
|
76 | "dictionary ID" }, | |||
|
77 | { "has_checksum", T_BOOL, | |||
|
78 | offsetof(FrameParametersObject, checksumFlag), READONLY, | |||
|
79 | "checksum flag" }, | |||
|
80 | { NULL } | |||
|
81 | }; | |||
|
82 | ||||
|
83 | PyTypeObject FrameParametersType = { | |||
|
84 | PyVarObject_HEAD_INIT(NULL, 0) | |||
|
85 | "FrameParameters", /* tp_name */ | |||
|
86 | sizeof(FrameParametersObject), /* tp_basicsize */ | |||
|
87 | 0, /* tp_itemsize */ | |||
|
88 | (destructor)FrameParameters_dealloc, /* tp_dealloc */ | |||
|
89 | 0, /* tp_print */ | |||
|
90 | 0, /* tp_getattr */ | |||
|
91 | 0, /* tp_setattr */ | |||
|
92 | 0, /* tp_compare */ | |||
|
93 | 0, /* tp_repr */ | |||
|
94 | 0, /* tp_as_number */ | |||
|
95 | 0, /* tp_as_sequence */ | |||
|
96 | 0, /* tp_as_mapping */ | |||
|
97 | 0, /* tp_hash */ | |||
|
98 | 0, /* tp_call */ | |||
|
99 | 0, /* tp_str */ | |||
|
100 | 0, /* tp_getattro */ | |||
|
101 | 0, /* tp_setattro */ | |||
|
102 | 0, /* tp_as_buffer */ | |||
|
103 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
|
104 | FrameParameters__doc__, /* tp_doc */ | |||
|
105 | 0, /* tp_traverse */ | |||
|
106 | 0, /* tp_clear */ | |||
|
107 | 0, /* tp_richcompare */ | |||
|
108 | 0, /* tp_weaklistoffset */ | |||
|
109 | 0, /* tp_iter */ | |||
|
110 | 0, /* tp_iternext */ | |||
|
111 | 0, /* tp_methods */ | |||
|
112 | FrameParameters_members, /* tp_members */ | |||
|
113 | 0, /* tp_getset */ | |||
|
114 | 0, /* tp_base */ | |||
|
115 | 0, /* tp_dict */ | |||
|
116 | 0, /* tp_descr_get */ | |||
|
117 | 0, /* tp_descr_set */ | |||
|
118 | 0, /* tp_dictoffset */ | |||
|
119 | 0, /* tp_init */ | |||
|
120 | 0, /* tp_alloc */ | |||
|
121 | 0, /* tp_new */ | |||
|
122 | }; | |||
|
123 | ||||
|
124 | void frameparams_module_init(PyObject* mod) { | |||
|
125 | Py_TYPE(&FrameParametersType) = &PyType_Type; | |||
|
126 | if (PyType_Ready(&FrameParametersType) < 0) { | |||
|
127 | return; | |||
|
128 | } | |||
|
129 | ||||
|
130 | Py_IncRef((PyObject*)&FrameParametersType); | |||
|
131 | PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType); | |||
|
132 | } |
@@ -0,0 +1,194 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2016-present, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under the BSD-style license found in the | |||
|
6 | * LICENSE file in the root directory of this source tree. An additional grant | |||
|
7 | * of patent rights can be found in the PATENTS file in the same directory. | |||
|
8 | */ | |||
|
9 | ||||
|
10 | ||||
|
11 | /* ====== Dependencies ======= */ | |||
|
12 | #include <stddef.h> /* size_t */ | |||
|
13 | #include <stdlib.h> /* malloc, calloc, free */ | |||
|
14 | #include "pool.h" | |||
|
15 | ||||
|
16 | /* ====== Compiler specifics ====== */ | |||
|
17 | #if defined(_MSC_VER) | |||
|
18 | # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ | |||
|
19 | #endif | |||
|
20 | ||||
|
21 | ||||
|
22 | #ifdef ZSTD_MULTITHREAD | |||
|
23 | ||||
|
24 | #include "threading.h" /* pthread adaptation */ | |||
|
25 | ||||
|
26 | /* A job is a function and an opaque argument */ | |||
|
27 | typedef struct POOL_job_s { | |||
|
28 | POOL_function function; | |||
|
29 | void *opaque; | |||
|
30 | } POOL_job; | |||
|
31 | ||||
|
32 | struct POOL_ctx_s { | |||
|
33 | /* Keep track of the threads */ | |||
|
34 | pthread_t *threads; | |||
|
35 | size_t numThreads; | |||
|
36 | ||||
|
37 | /* The queue is a circular buffer */ | |||
|
38 | POOL_job *queue; | |||
|
39 | size_t queueHead; | |||
|
40 | size_t queueTail; | |||
|
41 | size_t queueSize; | |||
|
42 | /* The mutex protects the queue */ | |||
|
43 | pthread_mutex_t queueMutex; | |||
|
44 | /* Condition variable for pushers to wait on when the queue is full */ | |||
|
45 | pthread_cond_t queuePushCond; | |||
|
46 | /* Condition variables for poppers to wait on when the queue is empty */ | |||
|
47 | pthread_cond_t queuePopCond; | |||
|
48 | /* Indicates if the queue is shutting down */ | |||
|
49 | int shutdown; | |||
|
50 | }; | |||
|
51 | ||||
|
52 | /* POOL_thread() : | |||
|
53 | Work thread for the thread pool. | |||
|
54 | Waits for jobs and executes them. | |||
|
55 | @returns : NULL on failure else non-null. | |||
|
56 | */ | |||
|
57 | static void* POOL_thread(void* opaque) { | |||
|
58 | POOL_ctx* const ctx = (POOL_ctx*)opaque; | |||
|
59 | if (!ctx) { return NULL; } | |||
|
60 | for (;;) { | |||
|
61 | /* Lock the mutex and wait for a non-empty queue or until shutdown */ | |||
|
62 | pthread_mutex_lock(&ctx->queueMutex); | |||
|
63 | while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) { | |||
|
64 | pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); | |||
|
65 | } | |||
|
66 | /* empty => shutting down: so stop */ | |||
|
67 | if (ctx->queueHead == ctx->queueTail) { | |||
|
68 | pthread_mutex_unlock(&ctx->queueMutex); | |||
|
69 | return opaque; | |||
|
70 | } | |||
|
71 | /* Pop a job off the queue */ | |||
|
72 | { POOL_job const job = ctx->queue[ctx->queueHead]; | |||
|
73 | ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; | |||
|
74 | /* Unlock the mutex, signal a pusher, and run the job */ | |||
|
75 | pthread_mutex_unlock(&ctx->queueMutex); | |||
|
76 | pthread_cond_signal(&ctx->queuePushCond); | |||
|
77 | job.function(job.opaque); | |||
|
78 | } | |||
|
79 | } | |||
|
80 | /* Unreachable */ | |||
|
81 | } | |||
|
82 | ||||
|
83 | POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { | |||
|
84 | POOL_ctx *ctx; | |||
|
85 | /* Check the parameters */ | |||
|
86 | if (!numThreads || !queueSize) { return NULL; } | |||
|
87 | /* Allocate the context and zero initialize */ | |||
|
88 | ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx)); | |||
|
89 | if (!ctx) { return NULL; } | |||
|
90 | /* Initialize the job queue. | |||
|
91 | * It needs one extra space since one space is wasted to differentiate empty | |||
|
92 | * and full queues. | |||
|
93 | */ | |||
|
94 | ctx->queueSize = queueSize + 1; | |||
|
95 | ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job)); | |||
|
96 | ctx->queueHead = 0; | |||
|
97 | ctx->queueTail = 0; | |||
|
98 | pthread_mutex_init(&ctx->queueMutex, NULL); | |||
|
99 | pthread_cond_init(&ctx->queuePushCond, NULL); | |||
|
100 | pthread_cond_init(&ctx->queuePopCond, NULL); | |||
|
101 | ctx->shutdown = 0; | |||
|
102 | /* Allocate space for the thread handles */ | |||
|
103 | ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t)); | |||
|
104 | ctx->numThreads = 0; | |||
|
105 | /* Check for errors */ | |||
|
106 | if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } | |||
|
107 | /* Initialize the threads */ | |||
|
108 | { size_t i; | |||
|
109 | for (i = 0; i < numThreads; ++i) { | |||
|
110 | if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { | |||
|
111 | ctx->numThreads = i; | |||
|
112 | POOL_free(ctx); | |||
|
113 | return NULL; | |||
|
114 | } } | |||
|
115 | ctx->numThreads = numThreads; | |||
|
116 | } | |||
|
117 | return ctx; | |||
|
118 | } | |||
|
119 | ||||
|
120 | /*! POOL_join() : | |||
|
121 | Shutdown the queue, wake any sleeping threads, and join all of the threads. | |||
|
122 | */ | |||
|
123 | static void POOL_join(POOL_ctx *ctx) { | |||
|
124 | /* Shut down the queue */ | |||
|
125 | pthread_mutex_lock(&ctx->queueMutex); | |||
|
126 | ctx->shutdown = 1; | |||
|
127 | pthread_mutex_unlock(&ctx->queueMutex); | |||
|
128 | /* Wake up sleeping threads */ | |||
|
129 | pthread_cond_broadcast(&ctx->queuePushCond); | |||
|
130 | pthread_cond_broadcast(&ctx->queuePopCond); | |||
|
131 | /* Join all of the threads */ | |||
|
132 | { size_t i; | |||
|
133 | for (i = 0; i < ctx->numThreads; ++i) { | |||
|
134 | pthread_join(ctx->threads[i], NULL); | |||
|
135 | } } | |||
|
136 | } | |||
|
137 | ||||
|
138 | void POOL_free(POOL_ctx *ctx) { | |||
|
139 | if (!ctx) { return; } | |||
|
140 | POOL_join(ctx); | |||
|
141 | pthread_mutex_destroy(&ctx->queueMutex); | |||
|
142 | pthread_cond_destroy(&ctx->queuePushCond); | |||
|
143 | pthread_cond_destroy(&ctx->queuePopCond); | |||
|
144 | if (ctx->queue) free(ctx->queue); | |||
|
145 | if (ctx->threads) free(ctx->threads); | |||
|
146 | free(ctx); | |||
|
147 | } | |||
|
148 | ||||
|
149 | void POOL_add(void *ctxVoid, POOL_function function, void *opaque) { | |||
|
150 | POOL_ctx *ctx = (POOL_ctx *)ctxVoid; | |||
|
151 | if (!ctx) { return; } | |||
|
152 | ||||
|
153 | pthread_mutex_lock(&ctx->queueMutex); | |||
|
154 | { POOL_job const job = {function, opaque}; | |||
|
155 | /* Wait until there is space in the queue for the new job */ | |||
|
156 | size_t newTail = (ctx->queueTail + 1) % ctx->queueSize; | |||
|
157 | while (ctx->queueHead == newTail && !ctx->shutdown) { | |||
|
158 | pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); | |||
|
159 | newTail = (ctx->queueTail + 1) % ctx->queueSize; | |||
|
160 | } | |||
|
161 | /* The queue is still going => there is space */ | |||
|
162 | if (!ctx->shutdown) { | |||
|
163 | ctx->queue[ctx->queueTail] = job; | |||
|
164 | ctx->queueTail = newTail; | |||
|
165 | } | |||
|
166 | } | |||
|
167 | pthread_mutex_unlock(&ctx->queueMutex); | |||
|
168 | pthread_cond_signal(&ctx->queuePopCond); | |||
|
169 | } | |||
|
170 | ||||
|
171 | #else /* ZSTD_MULTITHREAD not defined */ | |||
|
172 | /* No multi-threading support */ | |||
|
173 | ||||
|
174 | /* We don't need any data, but if it is empty malloc() might return NULL. */ | |||
|
175 | struct POOL_ctx_s { | |||
|
176 | int data; | |||
|
177 | }; | |||
|
178 | ||||
|
179 | POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { | |||
|
180 | (void)numThreads; | |||
|
181 | (void)queueSize; | |||
|
182 | return (POOL_ctx *)malloc(sizeof(POOL_ctx)); | |||
|
183 | } | |||
|
184 | ||||
|
185 | void POOL_free(POOL_ctx *ctx) { | |||
|
186 | if (ctx) free(ctx); | |||
|
187 | } | |||
|
188 | ||||
|
189 | void POOL_add(void *ctx, POOL_function function, void *opaque) { | |||
|
190 | (void)ctx; | |||
|
191 | function(opaque); | |||
|
192 | } | |||
|
193 | ||||
|
194 | #endif /* ZSTD_MULTITHREAD */ |
@@ -0,0 +1,56 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2016-present, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under the BSD-style license found in the | |||
|
6 | * LICENSE file in the root directory of this source tree. An additional grant | |||
|
7 | * of patent rights can be found in the PATENTS file in the same directory. | |||
|
8 | */ | |||
|
9 | #ifndef POOL_H | |||
|
10 | #define POOL_H | |||
|
11 | ||||
|
12 | #if defined (__cplusplus) | |||
|
13 | extern "C" { | |||
|
14 | #endif | |||
|
15 | ||||
|
16 | ||||
|
17 | #include <stddef.h> /* size_t */ | |||
|
18 | ||||
|
19 | typedef struct POOL_ctx_s POOL_ctx; | |||
|
20 | ||||
|
21 | /*! POOL_create() : | |||
|
22 | Create a thread pool with at most `numThreads` threads. | |||
|
23 | `numThreads` must be at least 1. | |||
|
24 | The maximum number of queued jobs before blocking is `queueSize`. | |||
|
25 | `queueSize` must be at least 1. | |||
|
26 | @return : The POOL_ctx pointer on success else NULL. | |||
|
27 | */ | |||
|
28 | POOL_ctx *POOL_create(size_t numThreads, size_t queueSize); | |||
|
29 | ||||
|
30 | /*! POOL_free() : | |||
|
31 | Free a thread pool returned by POOL_create(). | |||
|
32 | */ | |||
|
33 | void POOL_free(POOL_ctx *ctx); | |||
|
34 | ||||
|
35 | /*! POOL_function : | |||
|
36 | The function type that can be added to a thread pool. | |||
|
37 | */ | |||
|
38 | typedef void (*POOL_function)(void *); | |||
|
39 | /*! POOL_add_function : | |||
|
40 | The function type for a generic thread pool add function. | |||
|
41 | */ | |||
|
42 | typedef void (*POOL_add_function)(void *, POOL_function, void *); | |||
|
43 | ||||
|
44 | /*! POOL_add() : | |||
|
45 | Add the job `function(opaque)` to the thread pool. | |||
|
46 | Possibly blocks until there is room in the queue. | |||
|
47 | Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. | |||
|
48 | */ | |||
|
49 | void POOL_add(void *ctx, POOL_function function, void *opaque); | |||
|
50 | ||||
|
51 | ||||
|
52 | #if defined (__cplusplus) | |||
|
53 | } | |||
|
54 | #endif | |||
|
55 | ||||
|
56 | #endif |
@@ -0,0 +1,79 b'' | |||||
|
1 | ||||
|
2 | /** | |||
|
3 | * Copyright (c) 2016 Tino Reichardt | |||
|
4 | * All rights reserved. | |||
|
5 | * | |||
|
6 | * This source code is licensed under the BSD-style license found in the | |||
|
7 | * LICENSE file in the root directory of this source tree. An additional grant | |||
|
8 | * of patent rights can be found in the PATENTS file in the same directory. | |||
|
9 | * | |||
|
10 | * You can contact the author at: | |||
|
11 | * - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
|
12 | */ | |||
|
13 | ||||
|
14 | /** | |||
|
15 | * This file will hold wrapper for systems, which do not support pthreads | |||
|
16 | */ | |||
|
17 | ||||
|
18 | /* ====== Compiler specifics ====== */ | |||
|
19 | #if defined(_MSC_VER) | |||
|
20 | # pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */ | |||
|
21 | #endif | |||
|
22 | ||||
|
23 | ||||
|
24 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) | |||
|
25 | ||||
|
26 | /** | |||
|
27 | * Windows minimalist Pthread Wrapper, based on : | |||
|
28 | * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html | |||
|
29 | */ | |||
|
30 | ||||
|
31 | ||||
|
32 | /* === Dependencies === */ | |||
|
33 | #include <process.h> | |||
|
34 | #include <errno.h> | |||
|
35 | #include "threading.h" | |||
|
36 | ||||
|
37 | ||||
|
38 | /* === Implementation === */ | |||
|
39 | ||||
|
40 | static unsigned __stdcall worker(void *arg) | |||
|
41 | { | |||
|
42 | pthread_t* const thread = (pthread_t*) arg; | |||
|
43 | thread->arg = thread->start_routine(thread->arg); | |||
|
44 | return 0; | |||
|
45 | } | |||
|
46 | ||||
|
47 | int pthread_create(pthread_t* thread, const void* unused, | |||
|
48 | void* (*start_routine) (void*), void* arg) | |||
|
49 | { | |||
|
50 | (void)unused; | |||
|
51 | thread->arg = arg; | |||
|
52 | thread->start_routine = start_routine; | |||
|
53 | thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); | |||
|
54 | ||||
|
55 | if (!thread->handle) | |||
|
56 | return errno; | |||
|
57 | else | |||
|
58 | return 0; | |||
|
59 | } | |||
|
60 | ||||
|
61 | int _pthread_join(pthread_t * thread, void **value_ptr) | |||
|
62 | { | |||
|
63 | DWORD result; | |||
|
64 | ||||
|
65 | if (!thread->handle) return 0; | |||
|
66 | ||||
|
67 | result = WaitForSingleObject(thread->handle, INFINITE); | |||
|
68 | switch (result) { | |||
|
69 | case WAIT_OBJECT_0: | |||
|
70 | if (value_ptr) *value_ptr = thread->arg; | |||
|
71 | return 0; | |||
|
72 | case WAIT_ABANDONED: | |||
|
73 | return EINVAL; | |||
|
74 | default: | |||
|
75 | return GetLastError(); | |||
|
76 | } | |||
|
77 | } | |||
|
78 | ||||
|
79 | #endif /* ZSTD_MULTITHREAD */ |
@@ -0,0 +1,104 b'' | |||||
|
1 | ||||
|
2 | /** | |||
|
3 | * Copyright (c) 2016 Tino Reichardt | |||
|
4 | * All rights reserved. | |||
|
5 | * | |||
|
6 | * This source code is licensed under the BSD-style license found in the | |||
|
7 | * LICENSE file in the root directory of this source tree. An additional grant | |||
|
8 | * of patent rights can be found in the PATENTS file in the same directory. | |||
|
9 | * | |||
|
10 | * You can contact the author at: | |||
|
11 | * - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
|
12 | */ | |||
|
13 | ||||
|
14 | #ifndef THREADING_H_938743 | |||
|
15 | #define THREADING_H_938743 | |||
|
16 | ||||
|
17 | #if defined (__cplusplus) | |||
|
18 | extern "C" { | |||
|
19 | #endif | |||
|
20 | ||||
|
21 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) | |||
|
22 | ||||
|
23 | /** | |||
|
24 | * Windows minimalist Pthread Wrapper, based on : | |||
|
25 | * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html | |||
|
26 | */ | |||
|
27 | #ifdef WINVER | |||
|
28 | # undef WINVER | |||
|
29 | #endif | |||
|
30 | #define WINVER 0x0600 | |||
|
31 | ||||
|
32 | #ifdef _WIN32_WINNT | |||
|
33 | # undef _WIN32_WINNT | |||
|
34 | #endif | |||
|
35 | #define _WIN32_WINNT 0x0600 | |||
|
36 | ||||
|
37 | #ifndef WIN32_LEAN_AND_MEAN | |||
|
38 | # define WIN32_LEAN_AND_MEAN | |||
|
39 | #endif | |||
|
40 | ||||
|
41 | #include <windows.h> | |||
|
42 | ||||
|
43 | /* mutex */ | |||
|
44 | #define pthread_mutex_t CRITICAL_SECTION | |||
|
45 | #define pthread_mutex_init(a,b) InitializeCriticalSection((a)) | |||
|
46 | #define pthread_mutex_destroy(a) DeleteCriticalSection((a)) | |||
|
47 | #define pthread_mutex_lock(a) EnterCriticalSection((a)) | |||
|
48 | #define pthread_mutex_unlock(a) LeaveCriticalSection((a)) | |||
|
49 | ||||
|
50 | /* condition variable */ | |||
|
51 | #define pthread_cond_t CONDITION_VARIABLE | |||
|
52 | #define pthread_cond_init(a, b) InitializeConditionVariable((a)) | |||
|
53 | #define pthread_cond_destroy(a) /* No delete */ | |||
|
54 | #define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) | |||
|
55 | #define pthread_cond_signal(a) WakeConditionVariable((a)) | |||
|
56 | #define pthread_cond_broadcast(a) WakeAllConditionVariable((a)) | |||
|
57 | ||||
|
58 | /* pthread_create() and pthread_join() */ | |||
|
59 | typedef struct { | |||
|
60 | HANDLE handle; | |||
|
61 | void* (*start_routine)(void*); | |||
|
62 | void* arg; | |||
|
63 | } pthread_t; | |||
|
64 | ||||
|
65 | int pthread_create(pthread_t* thread, const void* unused, | |||
|
66 | void* (*start_routine) (void*), void* arg); | |||
|
67 | ||||
|
68 | #define pthread_join(a, b) _pthread_join(&(a), (b)) | |||
|
69 | int _pthread_join(pthread_t* thread, void** value_ptr); | |||
|
70 | ||||
|
71 | /** | |||
|
72 | * add here more wrappers as required | |||
|
73 | */ | |||
|
74 | ||||
|
75 | ||||
|
76 | #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection mathod */ | |||
|
77 | /* === POSIX Systems === */ | |||
|
78 | # include <pthread.h> | |||
|
79 | ||||
|
80 | #else /* ZSTD_MULTITHREAD not defined */ | |||
|
81 | /* No multithreading support */ | |||
|
82 | ||||
|
83 | #define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */ | |||
|
84 | #define pthread_mutex_init(a,b) | |||
|
85 | #define pthread_mutex_destroy(a) | |||
|
86 | #define pthread_mutex_lock(a) | |||
|
87 | #define pthread_mutex_unlock(a) | |||
|
88 | ||||
|
89 | #define pthread_cond_t int | |||
|
90 | #define pthread_cond_init(a,b) | |||
|
91 | #define pthread_cond_destroy(a) | |||
|
92 | #define pthread_cond_wait(a,b) | |||
|
93 | #define pthread_cond_signal(a) | |||
|
94 | #define pthread_cond_broadcast(a) | |||
|
95 | ||||
|
96 | /* do not use pthread_t */ | |||
|
97 | ||||
|
98 | #endif /* ZSTD_MULTITHREAD */ | |||
|
99 | ||||
|
100 | #if defined (__cplusplus) | |||
|
101 | } | |||
|
102 | #endif | |||
|
103 | ||||
|
104 | #endif /* THREADING_H_938743 */ |
This diff has been collapsed as it changes many lines, (740 lines changed) Show them Hide them | |||||
@@ -0,0 +1,740 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under the BSD-style license found in the | |||
|
6 | * LICENSE file in the root directory of this source tree. An additional grant | |||
|
7 | * of patent rights can be found in the PATENTS file in the same directory. | |||
|
8 | */ | |||
|
9 | ||||
|
10 | ||||
|
11 | /* ====== Tuning parameters ====== */ | |||
|
12 | #define ZSTDMT_NBTHREADS_MAX 128 | |||
|
13 | ||||
|
14 | ||||
|
15 | /* ====== Compiler specifics ====== */ | |||
|
16 | #if defined(_MSC_VER) | |||
|
17 | # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ | |||
|
18 | #endif | |||
|
19 | ||||
|
20 | ||||
|
21 | /* ====== Dependencies ====== */ | |||
|
22 | #include <stdlib.h> /* malloc */ | |||
|
23 | #include <string.h> /* memcpy */ | |||
|
24 | #include "pool.h" /* threadpool */ | |||
|
25 | #include "threading.h" /* mutex */ | |||
|
26 | #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ | |||
|
27 | #include "zstdmt_compress.h" | |||
|
28 | #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ | |||
|
29 | #include "xxhash.h" | |||
|
30 | ||||
|
31 | ||||
|
32 | /* ====== Debug ====== */ | |||
|
33 | #if 0 | |||
|
34 | ||||
|
35 | # include <stdio.h> | |||
|
36 | # include <unistd.h> | |||
|
37 | # include <sys/times.h> | |||
|
38 | static unsigned g_debugLevel = 3; | |||
|
39 | # define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); } | |||
|
40 | # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); } | |||
|
41 | ||||
|
42 | # define DEBUG_PRINTHEX(l,p,n) { \ | |||
|
43 | unsigned debug_u; \ | |||
|
44 | for (debug_u=0; debug_u<(n); debug_u++) \ | |||
|
45 | DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ | |||
|
46 | DEBUGLOGRAW(l, " \n"); \ | |||
|
47 | } | |||
|
48 | ||||
|
49 | static unsigned long long GetCurrentClockTimeMicroseconds() | |||
|
50 | { | |||
|
51 | static clock_t _ticksPerSecond = 0; | |||
|
52 | if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); | |||
|
53 | ||||
|
54 | struct tms junk; clock_t newTicks = (clock_t) times(&junk); | |||
|
55 | return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); | |||
|
56 | } | |||
|
57 | ||||
|
58 | #define MUTEX_WAIT_TIME_DLEVEL 5 | |||
|
59 | #define PTHREAD_MUTEX_LOCK(mutex) \ | |||
|
60 | if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ | |||
|
61 | unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \ | |||
|
62 | pthread_mutex_lock(mutex); \ | |||
|
63 | unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \ | |||
|
64 | unsigned long long elapsedTime = (afterTime-beforeTime); \ | |||
|
65 | if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ | |||
|
66 | DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ | |||
|
67 | elapsedTime, #mutex); \ | |||
|
68 | } \ | |||
|
69 | } else pthread_mutex_lock(mutex); | |||
|
70 | ||||
|
71 | #else | |||
|
72 | ||||
|
73 | # define DEBUGLOG(l, ...) {} /* disabled */ | |||
|
74 | # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m) | |||
|
75 | # define DEBUG_PRINTHEX(l,p,n) {} | |||
|
76 | ||||
|
77 | #endif | |||
|
78 | ||||
|
79 | ||||
|
80 | /* ===== Buffer Pool ===== */ | |||
|
81 | ||||
|
82 | typedef struct buffer_s { | |||
|
83 | void* start; | |||
|
84 | size_t size; | |||
|
85 | } buffer_t; | |||
|
86 | ||||
|
87 | static const buffer_t g_nullBuffer = { NULL, 0 }; | |||
|
88 | ||||
|
89 | typedef struct ZSTDMT_bufferPool_s { | |||
|
90 | unsigned totalBuffers; | |||
|
91 | unsigned nbBuffers; | |||
|
92 | buffer_t bTable[1]; /* variable size */ | |||
|
93 | } ZSTDMT_bufferPool; | |||
|
94 | ||||
|
95 | static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads) | |||
|
96 | { | |||
|
97 | unsigned const maxNbBuffers = 2*nbThreads + 2; | |||
|
98 | ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t)); | |||
|
99 | if (bufPool==NULL) return NULL; | |||
|
100 | bufPool->totalBuffers = maxNbBuffers; | |||
|
101 | bufPool->nbBuffers = 0; | |||
|
102 | return bufPool; | |||
|
103 | } | |||
|
104 | ||||
|
105 | static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) | |||
|
106 | { | |||
|
107 | unsigned u; | |||
|
108 | if (!bufPool) return; /* compatibility with free on NULL */ | |||
|
109 | for (u=0; u<bufPool->totalBuffers; u++) | |||
|
110 | free(bufPool->bTable[u].start); | |||
|
111 | free(bufPool); | |||
|
112 | } | |||
|
113 | ||||
|
114 | /* assumption : invocation from main thread only ! */ | |||
|
115 | static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) | |||
|
116 | { | |||
|
117 | if (pool->nbBuffers) { /* try to use an existing buffer */ | |||
|
118 | buffer_t const buf = pool->bTable[--(pool->nbBuffers)]; | |||
|
119 | size_t const availBufferSize = buf.size; | |||
|
120 | if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ | |||
|
121 | return buf; | |||
|
122 | free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */ | |||
|
123 | } | |||
|
124 | /* create new buffer */ | |||
|
125 | { buffer_t buffer; | |||
|
126 | void* const start = malloc(bSize); | |||
|
127 | if (start==NULL) bSize = 0; | |||
|
128 | buffer.start = start; /* note : start can be NULL if malloc fails ! */ | |||
|
129 | buffer.size = bSize; | |||
|
130 | return buffer; | |||
|
131 | } | |||
|
132 | } | |||
|
133 | ||||
|
134 | /* store buffer for later re-use, up to pool capacity */ | |||
|
135 | static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) | |||
|
136 | { | |||
|
137 | if (buf.start == NULL) return; /* release on NULL */ | |||
|
138 | if (pool->nbBuffers < pool->totalBuffers) { | |||
|
139 | pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ | |||
|
140 | return; | |||
|
141 | } | |||
|
142 | /* Reached bufferPool capacity (should not happen) */ | |||
|
143 | free(buf.start); | |||
|
144 | } | |||
|
145 | ||||
|
146 | ||||
|
147 | /* ===== CCtx Pool ===== */ | |||
|
148 | ||||
|
149 | typedef struct { | |||
|
150 | unsigned totalCCtx; | |||
|
151 | unsigned availCCtx; | |||
|
152 | ZSTD_CCtx* cctx[1]; /* variable size */ | |||
|
153 | } ZSTDMT_CCtxPool; | |||
|
154 | ||||
|
155 | /* assumption : CCtxPool invocation only from main thread */ | |||
|
156 | ||||
|
157 | /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ | |||
|
158 | static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) | |||
|
159 | { | |||
|
160 | unsigned u; | |||
|
161 | for (u=0; u<pool->totalCCtx; u++) | |||
|
162 | ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */ | |||
|
163 | free(pool); | |||
|
164 | } | |||
|
165 | ||||
|
166 | /* ZSTDMT_createCCtxPool() : | |||
|
167 | * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */ | |||
|
168 | static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) | |||
|
169 | { | |||
|
170 | ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*)); | |||
|
171 | if (!cctxPool) return NULL; | |||
|
172 | cctxPool->totalCCtx = nbThreads; | |||
|
173 | cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ | |||
|
174 | cctxPool->cctx[0] = ZSTD_createCCtx(); | |||
|
175 | if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } | |||
|
176 | DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads); | |||
|
177 | return cctxPool; | |||
|
178 | } | |||
|
179 | ||||
|
180 | static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool) | |||
|
181 | { | |||
|
182 | if (pool->availCCtx) { | |||
|
183 | pool->availCCtx--; | |||
|
184 | return pool->cctx[pool->availCCtx]; | |||
|
185 | } | |||
|
186 | return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */ | |||
|
187 | } | |||
|
188 | ||||
|
189 | static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) | |||
|
190 | { | |||
|
191 | if (cctx==NULL) return; /* compatibility with release on NULL */ | |||
|
192 | if (pool->availCCtx < pool->totalCCtx) | |||
|
193 | pool->cctx[pool->availCCtx++] = cctx; | |||
|
194 | else | |||
|
195 | /* pool overflow : should not happen, since totalCCtx==nbThreads */ | |||
|
196 | ZSTD_freeCCtx(cctx); | |||
|
197 | } | |||
|
198 | ||||
|
199 | ||||
|
200 | /* ===== Thread worker ===== */ | |||
|
201 | ||||
|
202 | typedef struct { | |||
|
203 | buffer_t buffer; | |||
|
204 | size_t filled; | |||
|
205 | } inBuff_t; | |||
|
206 | ||||
|
207 | typedef struct { | |||
|
208 | ZSTD_CCtx* cctx; | |||
|
209 | buffer_t src; | |||
|
210 | const void* srcStart; | |||
|
211 | size_t srcSize; | |||
|
212 | size_t dictSize; | |||
|
213 | buffer_t dstBuff; | |||
|
214 | size_t cSize; | |||
|
215 | size_t dstFlushed; | |||
|
216 | unsigned firstChunk; | |||
|
217 | unsigned lastChunk; | |||
|
218 | unsigned jobCompleted; | |||
|
219 | unsigned jobScanned; | |||
|
220 | pthread_mutex_t* jobCompleted_mutex; | |||
|
221 | pthread_cond_t* jobCompleted_cond; | |||
|
222 | ZSTD_parameters params; | |||
|
223 | ZSTD_CDict* cdict; | |||
|
224 | unsigned long long fullFrameSize; | |||
|
225 | } ZSTDMT_jobDescription; | |||
|
226 | ||||
|
227 | /* ZSTDMT_compressChunk() : POOL_function type */ | |||
|
228 | void ZSTDMT_compressChunk(void* jobDescription) | |||
|
229 | { | |||
|
230 | ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; | |||
|
231 | const void* const src = (const char*)job->srcStart + job->dictSize; | |||
|
232 | buffer_t const dstBuff = job->dstBuff; | |||
|
233 | DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize); | |||
|
234 | if (job->cdict) { | |||
|
235 | size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize); | |||
|
236 | if (job->cdict) DEBUGLOG(3, "using CDict "); | |||
|
237 | if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } | |||
|
238 | } else { | |||
|
239 | size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); | |||
|
240 | if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } | |||
|
241 | ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1); | |||
|
242 | } | |||
|
243 | if (!job->firstChunk) { /* flush frame header */ | |||
|
244 | size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0); | |||
|
245 | if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } | |||
|
246 | ZSTD_invalidateRepCodes(job->cctx); | |||
|
247 | } | |||
|
248 | ||||
|
249 | DEBUGLOG(4, "Compressing : "); | |||
|
250 | DEBUG_PRINTHEX(4, job->srcStart, 12); | |||
|
251 | job->cSize = (job->lastChunk) ? /* last chunk signal */ | |||
|
252 | ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : | |||
|
253 | ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize); | |||
|
254 | DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk); | |||
|
255 | ||||
|
256 | _endJob: | |||
|
257 | PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); | |||
|
258 | job->jobCompleted = 1; | |||
|
259 | job->jobScanned = 0; | |||
|
260 | pthread_cond_signal(job->jobCompleted_cond); | |||
|
261 | pthread_mutex_unlock(job->jobCompleted_mutex); | |||
|
262 | } | |||
|
263 | ||||
|
264 | ||||
|
265 | /* ------------------------------------------ */ | |||
|
266 | /* ===== Multi-threaded compression ===== */ | |||
|
267 | /* ------------------------------------------ */ | |||
|
268 | ||||
|
269 | struct ZSTDMT_CCtx_s { | |||
|
270 | POOL_ctx* factory; | |||
|
271 | ZSTDMT_bufferPool* buffPool; | |||
|
272 | ZSTDMT_CCtxPool* cctxPool; | |||
|
273 | pthread_mutex_t jobCompleted_mutex; | |||
|
274 | pthread_cond_t jobCompleted_cond; | |||
|
275 | size_t targetSectionSize; | |||
|
276 | size_t marginSize; | |||
|
277 | size_t inBuffSize; | |||
|
278 | size_t dictSize; | |||
|
279 | size_t targetDictSize; | |||
|
280 | inBuff_t inBuff; | |||
|
281 | ZSTD_parameters params; | |||
|
282 | XXH64_state_t xxhState; | |||
|
283 | unsigned nbThreads; | |||
|
284 | unsigned jobIDMask; | |||
|
285 | unsigned doneJobID; | |||
|
286 | unsigned nextJobID; | |||
|
287 | unsigned frameEnded; | |||
|
288 | unsigned allJobsCompleted; | |||
|
289 | unsigned overlapRLog; | |||
|
290 | unsigned long long frameContentSize; | |||
|
291 | size_t sectionSize; | |||
|
292 | ZSTD_CDict* cdict; | |||
|
293 | ZSTD_CStream* cstream; | |||
|
294 | ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ | |||
|
295 | }; | |||
|
296 | ||||
|
297 | ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) | |||
|
298 | { | |||
|
299 | ZSTDMT_CCtx* cctx; | |||
|
300 | U32 const minNbJobs = nbThreads + 2; | |||
|
301 | U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1; | |||
|
302 | U32 const nbJobs = 1 << nbJobsLog2; | |||
|
303 | DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", | |||
|
304 | nbThreads, minNbJobs, nbJobsLog2, nbJobs); | |||
|
305 | if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; | |||
|
306 | cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription)); | |||
|
307 | if (!cctx) return NULL; | |||
|
308 | cctx->nbThreads = nbThreads; | |||
|
309 | cctx->jobIDMask = nbJobs - 1; | |||
|
310 | cctx->allJobsCompleted = 1; | |||
|
311 | cctx->sectionSize = 0; | |||
|
312 | cctx->overlapRLog = 3; | |||
|
313 | cctx->factory = POOL_create(nbThreads, 1); | |||
|
314 | cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); | |||
|
315 | cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); | |||
|
316 | if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */ | |||
|
317 | ZSTDMT_freeCCtx(cctx); | |||
|
318 | return NULL; | |||
|
319 | } | |||
|
320 | if (nbThreads==1) { | |||
|
321 | cctx->cstream = ZSTD_createCStream(); | |||
|
322 | if (!cctx->cstream) { | |||
|
323 | ZSTDMT_freeCCtx(cctx); return NULL; | |||
|
324 | } } | |||
|
325 | pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */ | |||
|
326 | pthread_cond_init(&cctx->jobCompleted_cond, NULL); | |||
|
327 | DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads); | |||
|
328 | return cctx; | |||
|
329 | } | |||
|
330 | ||||
|
331 | /* ZSTDMT_releaseAllJobResources() : | |||
|
332 | * Ensure all workers are killed first. */ | |||
|
333 | static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) | |||
|
334 | { | |||
|
335 | unsigned jobID; | |||
|
336 | for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { | |||
|
337 | ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff); | |||
|
338 | mtctx->jobs[jobID].dstBuff = g_nullBuffer; | |||
|
339 | ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src); | |||
|
340 | mtctx->jobs[jobID].src = g_nullBuffer; | |||
|
341 | ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx); | |||
|
342 | mtctx->jobs[jobID].cctx = NULL; | |||
|
343 | } | |||
|
344 | memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); | |||
|
345 | ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); | |||
|
346 | mtctx->inBuff.buffer = g_nullBuffer; | |||
|
347 | mtctx->allJobsCompleted = 1; | |||
|
348 | } | |||
|
349 | ||||
|
350 | size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) | |||
|
351 | { | |||
|
352 | if (mtctx==NULL) return 0; /* compatible with free on NULL */ | |||
|
353 | POOL_free(mtctx->factory); | |||
|
354 | if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */ | |||
|
355 | ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */ | |||
|
356 | ZSTDMT_freeCCtxPool(mtctx->cctxPool); | |||
|
357 | ZSTD_freeCDict(mtctx->cdict); | |||
|
358 | ZSTD_freeCStream(mtctx->cstream); | |||
|
359 | pthread_mutex_destroy(&mtctx->jobCompleted_mutex); | |||
|
360 | pthread_cond_destroy(&mtctx->jobCompleted_cond); | |||
|
361 | free(mtctx); | |||
|
362 | return 0; | |||
|
363 | } | |||
|
364 | ||||
|
365 | size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value) | |||
|
366 | { | |||
|
367 | switch(parameter) | |||
|
368 | { | |||
|
369 | case ZSTDMT_p_sectionSize : | |||
|
370 | mtctx->sectionSize = value; | |||
|
371 | return 0; | |||
|
372 | case ZSTDMT_p_overlapSectionLog : | |||
|
373 | DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value); | |||
|
374 | mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value; | |||
|
375 | return 0; | |||
|
376 | default : | |||
|
377 | return ERROR(compressionParameter_unsupported); | |||
|
378 | } | |||
|
379 | } | |||
|
380 | ||||
|
381 | ||||
|
382 | /* ------------------------------------------ */ | |||
|
383 | /* ===== Multi-threaded compression ===== */ | |||
|
384 | /* ------------------------------------------ */ | |||
|
385 | ||||
|
386 | size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, | |||
|
387 | void* dst, size_t dstCapacity, | |||
|
388 | const void* src, size_t srcSize, | |||
|
389 | int compressionLevel) | |||
|
390 | { | |||
|
391 | ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); | |||
|
392 | size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2); | |||
|
393 | unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */; | |||
|
394 | unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads); | |||
|
395 | size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks; | |||
|
396 | size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */ | |||
|
397 | size_t remainingSrcSize = srcSize; | |||
|
398 | const char* const srcStart = (const char*)src; | |||
|
399 | size_t frameStartPos = 0; | |||
|
400 | ||||
|
401 | DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize); | |||
|
402 | DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize); | |||
|
403 | params.fParams.contentSizeFlag = 1; | |||
|
404 | ||||
|
405 | if (nbChunks==1) { /* fallback to single-thread mode */ | |||
|
406 | ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; | |||
|
407 | return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); | |||
|
408 | } | |||
|
409 | ||||
|
410 | { unsigned u; | |||
|
411 | for (u=0; u<nbChunks; u++) { | |||
|
412 | size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize); | |||
|
413 | size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity; | |||
|
414 | buffer_t const dstAsBuffer = { dst, dstCapacity }; | |||
|
415 | buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer; | |||
|
416 | ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); | |||
|
417 | ||||
|
418 | if ((cctx==NULL) || (dstBuffer.start==NULL)) { | |||
|
419 | mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */ | |||
|
420 | mtctx->jobs[u].jobCompleted = 1; | |||
|
421 | nbChunks = u+1; | |||
|
422 | break; /* let's wait for previous jobs to complete, but don't start new ones */ | |||
|
423 | } | |||
|
424 | ||||
|
425 | mtctx->jobs[u].srcStart = srcStart + frameStartPos; | |||
|
426 | mtctx->jobs[u].srcSize = chunkSize; | |||
|
427 | mtctx->jobs[u].fullFrameSize = srcSize; | |||
|
428 | mtctx->jobs[u].params = params; | |||
|
429 | mtctx->jobs[u].dstBuff = dstBuffer; | |||
|
430 | mtctx->jobs[u].cctx = cctx; | |||
|
431 | mtctx->jobs[u].firstChunk = (u==0); | |||
|
432 | mtctx->jobs[u].lastChunk = (u==nbChunks-1); | |||
|
433 | mtctx->jobs[u].jobCompleted = 0; | |||
|
434 | mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex; | |||
|
435 | mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond; | |||
|
436 | ||||
|
437 | DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize); | |||
|
438 | DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12); | |||
|
439 | POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]); | |||
|
440 | ||||
|
441 | frameStartPos += chunkSize; | |||
|
442 | remainingSrcSize -= chunkSize; | |||
|
443 | } } | |||
|
444 | /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */ | |||
|
445 | ||||
|
446 | { unsigned chunkID; | |||
|
447 | size_t error = 0, dstPos = 0; | |||
|
448 | for (chunkID=0; chunkID<nbChunks; chunkID++) { | |||
|
449 | DEBUGLOG(3, "waiting for chunk %u ", chunkID); | |||
|
450 | PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex); | |||
|
451 | while (mtctx->jobs[chunkID].jobCompleted==0) { | |||
|
452 | DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID); | |||
|
453 | pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex); | |||
|
454 | } | |||
|
455 | pthread_mutex_unlock(&mtctx->jobCompleted_mutex); | |||
|
456 | DEBUGLOG(3, "ready to write chunk %u ", chunkID); | |||
|
457 | ||||
|
458 | ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx); | |||
|
459 | mtctx->jobs[chunkID].cctx = NULL; | |||
|
460 | mtctx->jobs[chunkID].srcStart = NULL; | |||
|
461 | { size_t const cSize = mtctx->jobs[chunkID].cSize; | |||
|
462 | if (ZSTD_isError(cSize)) error = cSize; | |||
|
463 | if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); | |||
|
464 | if (chunkID) { /* note : chunk 0 is already written directly into dst */ | |||
|
465 | if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); | |||
|
466 | ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff); | |||
|
467 | mtctx->jobs[chunkID].dstBuff = g_nullBuffer; | |||
|
468 | } | |||
|
469 | dstPos += cSize ; | |||
|
470 | } | |||
|
471 | } | |||
|
472 | if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos); | |||
|
473 | return error ? error : dstPos; | |||
|
474 | } | |||
|
475 | ||||
|
476 | } | |||
|
477 | ||||
|
478 | ||||
|
479 | /* ====================================== */ | |||
|
480 | /* ======= Streaming API ======= */ | |||
|
481 | /* ====================================== */ | |||
|
482 | ||||
|
483 | static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) { | |||
|
484 | while (zcs->doneJobID < zcs->nextJobID) { | |||
|
485 | unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; | |||
|
486 | PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); | |||
|
487 | while (zcs->jobs[jobID].jobCompleted==0) { | |||
|
488 | DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */ | |||
|
489 | pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); | |||
|
490 | } | |||
|
491 | pthread_mutex_unlock(&zcs->jobCompleted_mutex); | |||
|
492 | zcs->doneJobID++; | |||
|
493 | } | |||
|
494 | } | |||
|
495 | ||||
|
496 | ||||
|
497 | static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, | |||
|
498 | const void* dict, size_t dictSize, unsigned updateDict, | |||
|
499 | ZSTD_parameters params, unsigned long long pledgedSrcSize) | |||
|
500 | { | |||
|
501 | ZSTD_customMem const cmem = { NULL, NULL, NULL }; | |||
|
502 | DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog); | |||
|
503 | if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize); | |||
|
504 | if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */ | |||
|
505 | ZSTDMT_waitForAllJobsCompleted(zcs); | |||
|
506 | ZSTDMT_releaseAllJobResources(zcs); | |||
|
507 | zcs->allJobsCompleted = 1; | |||
|
508 | } | |||
|
509 | zcs->params = params; | |||
|
510 | if (updateDict) { | |||
|
511 | ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL; | |||
|
512 | if (dict && dictSize) { | |||
|
513 | zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem); | |||
|
514 | if (zcs->cdict == NULL) return ERROR(memory_allocation); | |||
|
515 | } } | |||
|
516 | zcs->frameContentSize = pledgedSrcSize; | |||
|
517 | zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog); | |||
|
518 | DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog); | |||
|
519 | DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10)); | |||
|
520 | zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2); | |||
|
521 | zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); | |||
|
522 | zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize); | |||
|
523 | DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10)); | |||
|
524 | zcs->marginSize = zcs->targetSectionSize >> 2; | |||
|
525 | zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize; | |||
|
526 | zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); | |||
|
527 | if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); | |||
|
528 | zcs->inBuff.filled = 0; | |||
|
529 | zcs->dictSize = 0; | |||
|
530 | zcs->doneJobID = 0; | |||
|
531 | zcs->nextJobID = 0; | |||
|
532 | zcs->frameEnded = 0; | |||
|
533 | zcs->allJobsCompleted = 0; | |||
|
534 | if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0); | |||
|
535 | return 0; | |||
|
536 | } | |||
|
537 | ||||
|
538 | size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, | |||
|
539 | const void* dict, size_t dictSize, | |||
|
540 | ZSTD_parameters params, unsigned long long pledgedSrcSize) | |||
|
541 | { | |||
|
542 | return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize); | |||
|
543 | } | |||
|
544 | ||||
|
545 | /* ZSTDMT_resetCStream() : | |||
|
546 | * pledgedSrcSize is optional and can be zero == unknown */ | |||
|
547 | size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize) | |||
|
548 | { | |||
|
549 | if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize); | |||
|
550 | return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize); | |||
|
551 | } | |||
|
552 | ||||
|
553 | size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { | |||
|
554 | ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0); | |||
|
555 | return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0); | |||
|
556 | } | |||
|
557 | ||||
|
558 | ||||
|
559 | static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame) | |||
|
560 | { | |||
|
561 | size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); | |||
|
562 | buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); | |||
|
563 | ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); | |||
|
564 | unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; | |||
|
565 | ||||
|
566 | if ((cctx==NULL) || (dstBuffer.start==NULL)) { | |||
|
567 | zcs->jobs[jobID].jobCompleted = 1; | |||
|
568 | zcs->nextJobID++; | |||
|
569 | ZSTDMT_waitForAllJobsCompleted(zcs); | |||
|
570 | ZSTDMT_releaseAllJobResources(zcs); | |||
|
571 | return ERROR(memory_allocation); | |||
|
572 | } | |||
|
573 | ||||
|
574 | DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize); | |||
|
575 | zcs->jobs[jobID].src = zcs->inBuff.buffer; | |||
|
576 | zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; | |||
|
577 | zcs->jobs[jobID].srcSize = srcSize; | |||
|
578 | zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */ | |||
|
579 | zcs->jobs[jobID].params = zcs->params; | |||
|
580 | if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ | |||
|
581 | zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; | |||
|
582 | zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; | |||
|
583 | zcs->jobs[jobID].dstBuff = dstBuffer; | |||
|
584 | zcs->jobs[jobID].cctx = cctx; | |||
|
585 | zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); | |||
|
586 | zcs->jobs[jobID].lastChunk = endFrame; | |||
|
587 | zcs->jobs[jobID].jobCompleted = 0; | |||
|
588 | zcs->jobs[jobID].dstFlushed = 0; | |||
|
589 | zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; | |||
|
590 | zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; | |||
|
591 | ||||
|
592 | /* get a new buffer for next input */ | |||
|
593 | if (!endFrame) { | |||
|
594 | size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize); | |||
|
595 | zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); | |||
|
596 | if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ | |||
|
597 | zcs->jobs[jobID].jobCompleted = 1; | |||
|
598 | zcs->nextJobID++; | |||
|
599 | ZSTDMT_waitForAllJobsCompleted(zcs); | |||
|
600 | ZSTDMT_releaseAllJobResources(zcs); | |||
|
601 | return ERROR(memory_allocation); | |||
|
602 | } | |||
|
603 | DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled); | |||
|
604 | zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize; | |||
|
605 | DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize)); | |||
|
606 | memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled); | |||
|
607 | DEBUGLOG(5, "new inBuff pre-filled"); | |||
|
608 | zcs->dictSize = newDictSize; | |||
|
609 | } else { | |||
|
610 | zcs->inBuff.buffer = g_nullBuffer; | |||
|
611 | zcs->inBuff.filled = 0; | |||
|
612 | zcs->dictSize = 0; | |||
|
613 | zcs->frameEnded = 1; | |||
|
614 | if (zcs->nextJobID == 0) | |||
|
615 | zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */ | |||
|
616 | } | |||
|
617 | ||||
|
618 | DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); | |||
|
619 | POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ | |||
|
620 | zcs->nextJobID++; | |||
|
621 | return 0; | |||
|
622 | } | |||
|
623 | ||||
|
624 | ||||
|
625 | /* ZSTDMT_flushNextJob() : | |||
|
626 | * output : will be updated with amount of data flushed . | |||
|
627 | * blockToFlush : if >0, the function will block and wait if there is no data available to flush . | |||
|
628 | * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */ | |||
|
629 | static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush) | |||
|
630 | { | |||
|
631 | unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; | |||
|
632 | if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ | |||
|
633 | PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); | |||
|
634 | while (zcs->jobs[wJobID].jobCompleted==0) { | |||
|
635 | DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); | |||
|
636 | if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */ | |||
|
637 | pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */ | |||
|
638 | } | |||
|
639 | pthread_mutex_unlock(&zcs->jobCompleted_mutex); | |||
|
640 | /* compression job completed : output can be flushed */ | |||
|
641 | { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; | |||
|
642 | if (!job.jobScanned) { | |||
|
643 | if (ZSTD_isError(job.cSize)) { | |||
|
644 | DEBUGLOG(5, "compression error detected "); | |||
|
645 | ZSTDMT_waitForAllJobsCompleted(zcs); | |||
|
646 | ZSTDMT_releaseAllJobResources(zcs); | |||
|
647 | return job.cSize; | |||
|
648 | } | |||
|
649 | ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); | |||
|
650 | zcs->jobs[wJobID].cctx = NULL; | |||
|
651 | DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag); | |||
|
652 | if (zcs->params.fParams.checksumFlag) { | |||
|
653 | XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize); | |||
|
654 | if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */ | |||
|
655 | U32 const checksum = (U32)XXH64_digest(&zcs->xxhState); | |||
|
656 | DEBUGLOG(4, "writing checksum : %08X \n", checksum); | |||
|
657 | MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum); | |||
|
658 | job.cSize += 4; | |||
|
659 | zcs->jobs[wJobID].cSize += 4; | |||
|
660 | } } | |||
|
661 | ZSTDMT_releaseBuffer(zcs->buffPool, job.src); | |||
|
662 | zcs->jobs[wJobID].srcStart = NULL; | |||
|
663 | zcs->jobs[wJobID].src = g_nullBuffer; | |||
|
664 | zcs->jobs[wJobID].jobScanned = 1; | |||
|
665 | } | |||
|
666 | { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); | |||
|
667 | DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); | |||
|
668 | memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); | |||
|
669 | output->pos += toWrite; | |||
|
670 | job.dstFlushed += toWrite; | |||
|
671 | } | |||
|
672 | if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */ | |||
|
673 | ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); | |||
|
674 | zcs->jobs[wJobID].dstBuff = g_nullBuffer; | |||
|
675 | zcs->jobs[wJobID].jobCompleted = 0; | |||
|
676 | zcs->doneJobID++; | |||
|
677 | } else { | |||
|
678 | zcs->jobs[wJobID].dstFlushed = job.dstFlushed; | |||
|
679 | } | |||
|
680 | /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ | |||
|
681 | if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); | |||
|
682 | if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */ | |||
|
683 | zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */ | |||
|
684 | return 0; /* everything flushed */ | |||
|
685 | } } | |||
|
686 | ||||
|
687 | ||||
|
688 | size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) | |||
|
689 | { | |||
|
690 | size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize; | |||
|
691 | if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */ | |||
|
692 | if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input); | |||
|
693 | ||||
|
694 | /* fill input buffer */ | |||
|
695 | { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled); | |||
|
696 | memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad); | |||
|
697 | input->pos += toLoad; | |||
|
698 | zcs->inBuff.filled += toLoad; | |||
|
699 | } | |||
|
700 | ||||
|
701 | if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */ | |||
|
702 | && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */ | |||
|
703 | CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) ); | |||
|
704 | } | |||
|
705 | ||||
|
706 | /* check for data to flush */ | |||
|
707 | CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */ | |||
|
708 | ||||
|
709 | /* recommended next input size : fill current input buffer */ | |||
|
710 | return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ | |||
|
711 | } | |||
|
712 | ||||
|
713 | ||||
|
714 | static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame) | |||
|
715 | { | |||
|
716 | size_t const srcSize = zcs->inBuff.filled - zcs->dictSize; | |||
|
717 | ||||
|
718 | if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize); | |||
|
719 | if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) | |||
|
720 | && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { | |||
|
721 | CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) ); | |||
|
722 | } | |||
|
723 | ||||
|
724 | /* check if there is any data available to flush */ | |||
|
725 | DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID); | |||
|
726 | return ZSTDMT_flushNextJob(zcs, output, 1); | |||
|
727 | } | |||
|
728 | ||||
|
729 | ||||
|
730 | size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) | |||
|
731 | { | |||
|
732 | if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output); | |||
|
733 | return ZSTDMT_flushStream_internal(zcs, output, 0); | |||
|
734 | } | |||
|
735 | ||||
|
736 | size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) | |||
|
737 | { | |||
|
738 | if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output); | |||
|
739 | return ZSTDMT_flushStream_internal(zcs, output, 1); | |||
|
740 | } |
@@ -0,0 +1,78 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under the BSD-style license found in the | |||
|
6 | * LICENSE file in the root directory of this source tree. An additional grant | |||
|
7 | * of patent rights can be found in the PATENTS file in the same directory. | |||
|
8 | */ | |||
|
9 | ||||
|
10 | #ifndef ZSTDMT_COMPRESS_H | |||
|
11 | #define ZSTDMT_COMPRESS_H | |||
|
12 | ||||
|
13 | #if defined (__cplusplus) | |||
|
14 | extern "C" { | |||
|
15 | #endif | |||
|
16 | ||||
|
17 | ||||
|
18 | /* Note : All prototypes defined in this file shall be considered experimental. | |||
|
19 | * There is no guarantee of API continuity (yet) on any of these prototypes */ | |||
|
20 | ||||
|
21 | /* === Dependencies === */ | |||
|
22 | #include <stddef.h> /* size_t */ | |||
|
23 | #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ | |||
|
24 | #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ | |||
|
25 | ||||
|
26 | ||||
|
27 | /* === Simple one-pass functions === */ | |||
|
28 | ||||
|
29 | typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; | |||
|
30 | ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads); | |||
|
31 | ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx); | |||
|
32 | ||||
|
33 | ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, | |||
|
34 | void* dst, size_t dstCapacity, | |||
|
35 | const void* src, size_t srcSize, | |||
|
36 | int compressionLevel); | |||
|
37 | ||||
|
38 | ||||
|
39 | /* === Streaming functions === */ | |||
|
40 | ||||
|
41 | ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); | |||
|
42 | ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ | |||
|
43 | ||||
|
44 | ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |||
|
45 | ||||
|
46 | ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |||
|
47 | ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |||
|
48 | ||||
|
49 | ||||
|
50 | /* === Advanced functions and parameters === */ | |||
|
51 | ||||
|
52 | #ifndef ZSTDMT_SECTION_SIZE_MIN | |||
|
53 | # define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ | |||
|
54 | #endif | |||
|
55 | ||||
|
56 | ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */ | |||
|
57 | ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ | |||
|
58 | ||||
|
59 | /* ZSDTMT_parameter : | |||
|
60 | * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ | |||
|
61 | typedef enum { | |||
|
62 | ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ | |||
|
63 | ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */ | |||
|
64 | } ZSDTMT_parameter; | |||
|
65 | ||||
|
66 | /* ZSTDMT_setMTCtxParameter() : | |||
|
67 | * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. | |||
|
68 | * The function must be called typically after ZSTD_createCCtx(). | |||
|
69 | * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. | |||
|
70 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ | |||
|
71 | ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value); | |||
|
72 | ||||
|
73 | ||||
|
74 | #if defined (__cplusplus) | |||
|
75 | } | |||
|
76 | #endif | |||
|
77 | ||||
|
78 | #endif /* ZSTDMT_COMPRESS_H */ |
This diff has been collapsed as it changes many lines, (1021 lines changed) Show them Hide them | |||||
@@ -0,0 +1,1021 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under the BSD-style license found in the | |||
|
6 | * LICENSE file in the root directory of this source tree. An additional grant | |||
|
7 | * of patent rights can be found in the PATENTS file in the same directory. | |||
|
8 | */ | |||
|
9 | ||||
|
10 | /*-************************************* | |||
|
11 | * Dependencies | |||
|
12 | ***************************************/ | |||
|
13 | #include <stdio.h> /* fprintf */ | |||
|
14 | #include <stdlib.h> /* malloc, free, qsort */ | |||
|
15 | #include <string.h> /* memset */ | |||
|
16 | #include <time.h> /* clock */ | |||
|
17 | ||||
|
18 | #include "mem.h" /* read */ | |||
|
19 | #include "pool.h" | |||
|
20 | #include "threading.h" | |||
|
21 | #include "zstd_internal.h" /* includes zstd.h */ | |||
|
22 | #ifndef ZDICT_STATIC_LINKING_ONLY | |||
|
23 | #define ZDICT_STATIC_LINKING_ONLY | |||
|
24 | #endif | |||
|
25 | #include "zdict.h" | |||
|
26 | ||||
|
27 | /*-************************************* | |||
|
28 | * Constants | |||
|
29 | ***************************************/ | |||
|
30 | #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB)) | |||
|
31 | ||||
|
32 | /*-************************************* | |||
|
33 | * Console display | |||
|
34 | ***************************************/ | |||
|
35 | static int g_displayLevel = 2; | |||
|
36 | #define DISPLAY(...) \ | |||
|
37 | { \ | |||
|
38 | fprintf(stderr, __VA_ARGS__); \ | |||
|
39 | fflush(stderr); \ | |||
|
40 | } | |||
|
41 | #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ | |||
|
42 | if (displayLevel >= l) { \ | |||
|
43 | DISPLAY(__VA_ARGS__); \ | |||
|
44 | } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ | |||
|
45 | #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) | |||
|
46 | ||||
|
47 | #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ | |||
|
48 | if (displayLevel >= l) { \ | |||
|
49 | if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ | |||
|
50 | g_time = clock(); \ | |||
|
51 | DISPLAY(__VA_ARGS__); \ | |||
|
52 | if (displayLevel >= 4) \ | |||
|
53 | fflush(stdout); \ | |||
|
54 | } \ | |||
|
55 | } | |||
|
56 | #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) | |||
|
57 | static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; | |||
|
58 | static clock_t g_time = 0; | |||
|
59 | ||||
|
60 | /*-************************************* | |||
|
61 | * Hash table | |||
|
62 | *************************************** | |||
|
63 | * A small specialized hash map for storing activeDmers. | |||
|
64 | * The map does not resize, so if it becomes full it will loop forever. | |||
|
65 | * Thus, the map must be large enough to store every value. | |||
|
66 | * The map implements linear probing and keeps its load less than 0.5. | |||
|
67 | */ | |||
|
68 | ||||
|
69 | #define MAP_EMPTY_VALUE ((U32)-1) | |||
|
70 | typedef struct COVER_map_pair_t_s { | |||
|
71 | U32 key; | |||
|
72 | U32 value; | |||
|
73 | } COVER_map_pair_t; | |||
|
74 | ||||
|
75 | typedef struct COVER_map_s { | |||
|
76 | COVER_map_pair_t *data; | |||
|
77 | U32 sizeLog; | |||
|
78 | U32 size; | |||
|
79 | U32 sizeMask; | |||
|
80 | } COVER_map_t; | |||
|
81 | ||||
|
82 | /** | |||
|
83 | * Clear the map. | |||
|
84 | */ | |||
|
85 | static void COVER_map_clear(COVER_map_t *map) { | |||
|
86 | memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); | |||
|
87 | } | |||
|
88 | ||||
|
89 | /** | |||
|
90 | * Initializes a map of the given size. | |||
|
91 | * Returns 1 on success and 0 on failure. | |||
|
92 | * The map must be destroyed with COVER_map_destroy(). | |||
|
93 | * The map is only guaranteed to be large enough to hold size elements. | |||
|
94 | */ | |||
|
95 | static int COVER_map_init(COVER_map_t *map, U32 size) { | |||
|
96 | map->sizeLog = ZSTD_highbit32(size) + 2; | |||
|
97 | map->size = (U32)1 << map->sizeLog; | |||
|
98 | map->sizeMask = map->size - 1; | |||
|
99 | map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); | |||
|
100 | if (!map->data) { | |||
|
101 | map->sizeLog = 0; | |||
|
102 | map->size = 0; | |||
|
103 | return 0; | |||
|
104 | } | |||
|
105 | COVER_map_clear(map); | |||
|
106 | return 1; | |||
|
107 | } | |||
|
108 | ||||
|
109 | /** | |||
|
110 | * Internal hash function | |||
|
111 | */ | |||
|
112 | static const U32 prime4bytes = 2654435761U; | |||
|
113 | static U32 COVER_map_hash(COVER_map_t *map, U32 key) { | |||
|
114 | return (key * prime4bytes) >> (32 - map->sizeLog); | |||
|
115 | } | |||
|
116 | ||||
|
117 | /** | |||
|
118 | * Helper function that returns the index that a key should be placed into. | |||
|
119 | */ | |||
|
120 | static U32 COVER_map_index(COVER_map_t *map, U32 key) { | |||
|
121 | const U32 hash = COVER_map_hash(map, key); | |||
|
122 | U32 i; | |||
|
123 | for (i = hash;; i = (i + 1) & map->sizeMask) { | |||
|
124 | COVER_map_pair_t *pos = &map->data[i]; | |||
|
125 | if (pos->value == MAP_EMPTY_VALUE) { | |||
|
126 | return i; | |||
|
127 | } | |||
|
128 | if (pos->key == key) { | |||
|
129 | return i; | |||
|
130 | } | |||
|
131 | } | |||
|
132 | } | |||
|
133 | ||||
|
134 | /** | |||
|
135 | * Returns the pointer to the value for key. | |||
|
136 | * If key is not in the map, it is inserted and the value is set to 0. | |||
|
137 | * The map must not be full. | |||
|
138 | */ | |||
|
139 | static U32 *COVER_map_at(COVER_map_t *map, U32 key) { | |||
|
140 | COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; | |||
|
141 | if (pos->value == MAP_EMPTY_VALUE) { | |||
|
142 | pos->key = key; | |||
|
143 | pos->value = 0; | |||
|
144 | } | |||
|
145 | return &pos->value; | |||
|
146 | } | |||
|
147 | ||||
|
148 | /** | |||
|
149 | * Deletes key from the map if present. | |||
|
150 | */ | |||
|
151 | static void COVER_map_remove(COVER_map_t *map, U32 key) { | |||
|
152 | U32 i = COVER_map_index(map, key); | |||
|
153 | COVER_map_pair_t *del = &map->data[i]; | |||
|
154 | U32 shift = 1; | |||
|
155 | if (del->value == MAP_EMPTY_VALUE) { | |||
|
156 | return; | |||
|
157 | } | |||
|
158 | for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { | |||
|
159 | COVER_map_pair_t *const pos = &map->data[i]; | |||
|
160 | /* If the position is empty we are done */ | |||
|
161 | if (pos->value == MAP_EMPTY_VALUE) { | |||
|
162 | del->value = MAP_EMPTY_VALUE; | |||
|
163 | return; | |||
|
164 | } | |||
|
165 | /* If pos can be moved to del do so */ | |||
|
166 | if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { | |||
|
167 | del->key = pos->key; | |||
|
168 | del->value = pos->value; | |||
|
169 | del = pos; | |||
|
170 | shift = 1; | |||
|
171 | } else { | |||
|
172 | ++shift; | |||
|
173 | } | |||
|
174 | } | |||
|
175 | } | |||
|
176 | ||||
|
177 | /** | |||
|
178 | * Destroyes a map that is inited with COVER_map_init(). | |||
|
179 | */ | |||
|
180 | static void COVER_map_destroy(COVER_map_t *map) { | |||
|
181 | if (map->data) { | |||
|
182 | free(map->data); | |||
|
183 | } | |||
|
184 | map->data = NULL; | |||
|
185 | map->size = 0; | |||
|
186 | } | |||
|
187 | ||||
|
188 | /*-************************************* | |||
|
189 | * Context | |||
|
190 | ***************************************/ | |||
|
191 | ||||
|
192 | typedef struct { | |||
|
193 | const BYTE *samples; | |||
|
194 | size_t *offsets; | |||
|
195 | const size_t *samplesSizes; | |||
|
196 | size_t nbSamples; | |||
|
197 | U32 *suffix; | |||
|
198 | size_t suffixSize; | |||
|
199 | U32 *freqs; | |||
|
200 | U32 *dmerAt; | |||
|
201 | unsigned d; | |||
|
202 | } COVER_ctx_t; | |||
|
203 | ||||
|
204 | /* We need a global context for qsort... */ | |||
|
205 | static COVER_ctx_t *g_ctx = NULL; | |||
|
206 | ||||
|
207 | /*-************************************* | |||
|
208 | * Helper functions | |||
|
209 | ***************************************/ | |||
|
210 | ||||
|
211 | /** | |||
|
212 | * Returns the sum of the sample sizes. | |||
|
213 | */ | |||
|
214 | static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { | |||
|
215 | size_t sum = 0; | |||
|
216 | size_t i; | |||
|
217 | for (i = 0; i < nbSamples; ++i) { | |||
|
218 | sum += samplesSizes[i]; | |||
|
219 | } | |||
|
220 | return sum; | |||
|
221 | } | |||
|
222 | ||||
|
223 | /** | |||
|
224 | * Returns -1 if the dmer at lp is less than the dmer at rp. | |||
|
225 | * Return 0 if the dmers at lp and rp are equal. | |||
|
226 | * Returns 1 if the dmer at lp is greater than the dmer at rp. | |||
|
227 | */ | |||
|
228 | static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { | |||
|
229 | const U32 lhs = *(const U32 *)lp; | |||
|
230 | const U32 rhs = *(const U32 *)rp; | |||
|
231 | return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); | |||
|
232 | } | |||
|
233 | ||||
|
234 | /** | |||
|
235 | * Same as COVER_cmp() except ties are broken by pointer value | |||
|
236 | * NOTE: g_ctx must be set to call this function. A global is required because | |||
|
237 | * qsort doesn't take an opaque pointer. | |||
|
238 | */ | |||
|
239 | static int COVER_strict_cmp(const void *lp, const void *rp) { | |||
|
240 | int result = COVER_cmp(g_ctx, lp, rp); | |||
|
241 | if (result == 0) { | |||
|
242 | result = lp < rp ? -1 : 1; | |||
|
243 | } | |||
|
244 | return result; | |||
|
245 | } | |||
|
246 | ||||
|
247 | /** | |||
|
248 | * Returns the first pointer in [first, last) whose element does not compare | |||
|
249 | * less than value. If no such element exists it returns last. | |||
|
250 | */ | |||
|
251 | static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, | |||
|
252 | size_t value) { | |||
|
253 | size_t count = last - first; | |||
|
254 | while (count != 0) { | |||
|
255 | size_t step = count / 2; | |||
|
256 | const size_t *ptr = first; | |||
|
257 | ptr += step; | |||
|
258 | if (*ptr < value) { | |||
|
259 | first = ++ptr; | |||
|
260 | count -= step + 1; | |||
|
261 | } else { | |||
|
262 | count = step; | |||
|
263 | } | |||
|
264 | } | |||
|
265 | return first; | |||
|
266 | } | |||
|
267 | ||||
|
268 | /** | |||
|
269 | * Generic groupBy function. | |||
|
270 | * Groups an array sorted by cmp into groups with equivalent values. | |||
|
271 | * Calls grp for each group. | |||
|
272 | */ | |||
|
273 | static void | |||
|
274 | COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, | |||
|
275 | int (*cmp)(COVER_ctx_t *, const void *, const void *), | |||
|
276 | void (*grp)(COVER_ctx_t *, const void *, const void *)) { | |||
|
277 | const BYTE *ptr = (const BYTE *)data; | |||
|
278 | size_t num = 0; | |||
|
279 | while (num < count) { | |||
|
280 | const BYTE *grpEnd = ptr + size; | |||
|
281 | ++num; | |||
|
282 | while (num < count && cmp(ctx, ptr, grpEnd) == 0) { | |||
|
283 | grpEnd += size; | |||
|
284 | ++num; | |||
|
285 | } | |||
|
286 | grp(ctx, ptr, grpEnd); | |||
|
287 | ptr = grpEnd; | |||
|
288 | } | |||
|
289 | } | |||
|
290 | ||||
|
291 | /*-************************************* | |||
|
292 | * Cover functions | |||
|
293 | ***************************************/ | |||
|
294 | ||||
|
295 | /** | |||
|
296 | * Called on each group of positions with the same dmer. | |||
|
297 | * Counts the frequency of each dmer and saves it in the suffix array. | |||
|
298 | * Fills `ctx->dmerAt`. | |||
|
299 | */ | |||
|
300 | static void COVER_group(COVER_ctx_t *ctx, const void *group, | |||
|
301 | const void *groupEnd) { | |||
|
302 | /* The group consists of all the positions with the same first d bytes. */ | |||
|
303 | const U32 *grpPtr = (const U32 *)group; | |||
|
304 | const U32 *grpEnd = (const U32 *)groupEnd; | |||
|
305 | /* The dmerId is how we will reference this dmer. | |||
|
306 | * This allows us to map the whole dmer space to a much smaller space, the | |||
|
307 | * size of the suffix array. | |||
|
308 | */ | |||
|
309 | const U32 dmerId = (U32)(grpPtr - ctx->suffix); | |||
|
310 | /* Count the number of samples this dmer shows up in */ | |||
|
311 | U32 freq = 0; | |||
|
312 | /* Details */ | |||
|
313 | const size_t *curOffsetPtr = ctx->offsets; | |||
|
314 | const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; | |||
|
315 | /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a | |||
|
316 | * different sample than the last. | |||
|
317 | */ | |||
|
318 | size_t curSampleEnd = ctx->offsets[0]; | |||
|
319 | for (; grpPtr != grpEnd; ++grpPtr) { | |||
|
320 | /* Save the dmerId for this position so we can get back to it. */ | |||
|
321 | ctx->dmerAt[*grpPtr] = dmerId; | |||
|
322 | /* Dictionaries only help for the first reference to the dmer. | |||
|
323 | * After that zstd can reference the match from the previous reference. | |||
|
324 | * So only count each dmer once for each sample it is in. | |||
|
325 | */ | |||
|
326 | if (*grpPtr < curSampleEnd) { | |||
|
327 | continue; | |||
|
328 | } | |||
|
329 | freq += 1; | |||
|
330 | /* Binary search to find the end of the sample *grpPtr is in. | |||
|
331 | * In the common case that grpPtr + 1 == grpEnd we can skip the binary | |||
|
332 | * search because the loop is over. | |||
|
333 | */ | |||
|
334 | if (grpPtr + 1 != grpEnd) { | |||
|
335 | const size_t *sampleEndPtr = | |||
|
336 | COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); | |||
|
337 | curSampleEnd = *sampleEndPtr; | |||
|
338 | curOffsetPtr = sampleEndPtr + 1; | |||
|
339 | } | |||
|
340 | } | |||
|
341 | /* At this point we are never going to look at this segment of the suffix | |||
|
342 | * array again. We take advantage of this fact to save memory. | |||
|
343 | * We store the frequency of the dmer in the first position of the group, | |||
|
344 | * which is dmerId. | |||
|
345 | */ | |||
|
346 | ctx->suffix[dmerId] = freq; | |||
|
347 | } | |||
|
348 | ||||
|
349 | /** | |||
|
350 | * A segment is a range in the source as well as the score of the segment. | |||
|
351 | */ | |||
|
352 | typedef struct { | |||
|
353 | U32 begin; | |||
|
354 | U32 end; | |||
|
355 | double score; | |||
|
356 | } COVER_segment_t; | |||
|
357 | ||||
|
358 | /** | |||
|
359 | * Selects the best segment in an epoch. | |||
|
360 | * Segments of are scored according to the function: | |||
|
361 | * | |||
|
362 | * Let F(d) be the frequency of dmer d. | |||
|
363 | * Let S_i be the dmer at position i of segment S which has length k. | |||
|
364 | * | |||
|
365 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) | |||
|
366 | * | |||
|
367 | * Once the dmer d is in the dictionay we set F(d) = 0. | |||
|
368 | */ | |||
|
369 | static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, | |||
|
370 | COVER_map_t *activeDmers, U32 begin, | |||
|
371 | U32 end, COVER_params_t parameters) { | |||
|
372 | /* Constants */ | |||
|
373 | const U32 k = parameters.k; | |||
|
374 | const U32 d = parameters.d; | |||
|
375 | const U32 dmersInK = k - d + 1; | |||
|
376 | /* Try each segment (activeSegment) and save the best (bestSegment) */ | |||
|
377 | COVER_segment_t bestSegment = {0, 0, 0}; | |||
|
378 | COVER_segment_t activeSegment; | |||
|
379 | /* Reset the activeDmers in the segment */ | |||
|
380 | COVER_map_clear(activeDmers); | |||
|
381 | /* The activeSegment starts at the beginning of the epoch. */ | |||
|
382 | activeSegment.begin = begin; | |||
|
383 | activeSegment.end = begin; | |||
|
384 | activeSegment.score = 0; | |||
|
385 | /* Slide the activeSegment through the whole epoch. | |||
|
386 | * Save the best segment in bestSegment. | |||
|
387 | */ | |||
|
388 | while (activeSegment.end < end) { | |||
|
389 | /* The dmerId for the dmer at the next position */ | |||
|
390 | U32 newDmer = ctx->dmerAt[activeSegment.end]; | |||
|
391 | /* The entry in activeDmers for this dmerId */ | |||
|
392 | U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); | |||
|
393 | /* If the dmer isn't already present in the segment add its score. */ | |||
|
394 | if (*newDmerOcc == 0) { | |||
|
395 | /* The paper suggest using the L-0.5 norm, but experiments show that it | |||
|
396 | * doesn't help. | |||
|
397 | */ | |||
|
398 | activeSegment.score += freqs[newDmer]; | |||
|
399 | } | |||
|
400 | /* Add the dmer to the segment */ | |||
|
401 | activeSegment.end += 1; | |||
|
402 | *newDmerOcc += 1; | |||
|
403 | ||||
|
404 | /* If the window is now too large, drop the first position */ | |||
|
405 | if (activeSegment.end - activeSegment.begin == dmersInK + 1) { | |||
|
406 | U32 delDmer = ctx->dmerAt[activeSegment.begin]; | |||
|
407 | U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); | |||
|
408 | activeSegment.begin += 1; | |||
|
409 | *delDmerOcc -= 1; | |||
|
410 | /* If this is the last occurence of the dmer, subtract its score */ | |||
|
411 | if (*delDmerOcc == 0) { | |||
|
412 | COVER_map_remove(activeDmers, delDmer); | |||
|
413 | activeSegment.score -= freqs[delDmer]; | |||
|
414 | } | |||
|
415 | } | |||
|
416 | ||||
|
417 | /* If this segment is the best so far save it */ | |||
|
418 | if (activeSegment.score > bestSegment.score) { | |||
|
419 | bestSegment = activeSegment; | |||
|
420 | } | |||
|
421 | } | |||
|
422 | { | |||
|
423 | /* Trim off the zero frequency head and tail from the segment. */ | |||
|
424 | U32 newBegin = bestSegment.end; | |||
|
425 | U32 newEnd = bestSegment.begin; | |||
|
426 | U32 pos; | |||
|
427 | for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { | |||
|
428 | U32 freq = freqs[ctx->dmerAt[pos]]; | |||
|
429 | if (freq != 0) { | |||
|
430 | newBegin = MIN(newBegin, pos); | |||
|
431 | newEnd = pos + 1; | |||
|
432 | } | |||
|
433 | } | |||
|
434 | bestSegment.begin = newBegin; | |||
|
435 | bestSegment.end = newEnd; | |||
|
436 | } | |||
|
437 | { | |||
|
438 | /* Zero out the frequency of each dmer covered by the chosen segment. */ | |||
|
439 | U32 pos; | |||
|
440 | for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { | |||
|
441 | freqs[ctx->dmerAt[pos]] = 0; | |||
|
442 | } | |||
|
443 | } | |||
|
444 | return bestSegment; | |||
|
445 | } | |||
|
446 | ||||
|
447 | /** | |||
|
448 | * Check the validity of the parameters. | |||
|
449 | * Returns non-zero if the parameters are valid and 0 otherwise. | |||
|
450 | */ | |||
|
451 | static int COVER_checkParameters(COVER_params_t parameters) { | |||
|
452 | /* k and d are required parameters */ | |||
|
453 | if (parameters.d == 0 || parameters.k == 0) { | |||
|
454 | return 0; | |||
|
455 | } | |||
|
456 | /* d <= k */ | |||
|
457 | if (parameters.d > parameters.k) { | |||
|
458 | return 0; | |||
|
459 | } | |||
|
460 | return 1; | |||
|
461 | } | |||
|
462 | ||||
|
463 | /** | |||
|
464 | * Clean up a context initialized with `COVER_ctx_init()`. | |||
|
465 | */ | |||
|
466 | static void COVER_ctx_destroy(COVER_ctx_t *ctx) { | |||
|
467 | if (!ctx) { | |||
|
468 | return; | |||
|
469 | } | |||
|
470 | if (ctx->suffix) { | |||
|
471 | free(ctx->suffix); | |||
|
472 | ctx->suffix = NULL; | |||
|
473 | } | |||
|
474 | if (ctx->freqs) { | |||
|
475 | free(ctx->freqs); | |||
|
476 | ctx->freqs = NULL; | |||
|
477 | } | |||
|
478 | if (ctx->dmerAt) { | |||
|
479 | free(ctx->dmerAt); | |||
|
480 | ctx->dmerAt = NULL; | |||
|
481 | } | |||
|
482 | if (ctx->offsets) { | |||
|
483 | free(ctx->offsets); | |||
|
484 | ctx->offsets = NULL; | |||
|
485 | } | |||
|
486 | } | |||
|
487 | ||||
|
488 | /** | |||
|
489 | * Prepare a context for dictionary building. | |||
|
490 | * The context is only dependent on the parameter `d` and can used multiple | |||
|
491 | * times. | |||
|
492 | * Returns 1 on success or zero on error. | |||
|
493 | * The context must be destroyed with `COVER_ctx_destroy()`. | |||
|
494 | */ | |||
|
495 | static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, | |||
|
496 | const size_t *samplesSizes, unsigned nbSamples, | |||
|
497 | unsigned d) { | |||
|
498 | const BYTE *const samples = (const BYTE *)samplesBuffer; | |||
|
499 | const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); | |||
|
500 | /* Checks */ | |||
|
501 | if (totalSamplesSize < d || | |||
|
502 | totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { | |||
|
503 | DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n", | |||
|
504 | (COVER_MAX_SAMPLES_SIZE >> 20)); | |||
|
505 | return 0; | |||
|
506 | } | |||
|
507 | /* Zero the context */ | |||
|
508 | memset(ctx, 0, sizeof(*ctx)); | |||
|
509 | DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples, | |||
|
510 | (U32)totalSamplesSize); | |||
|
511 | ctx->samples = samples; | |||
|
512 | ctx->samplesSizes = samplesSizes; | |||
|
513 | ctx->nbSamples = nbSamples; | |||
|
514 | /* Partial suffix array */ | |||
|
515 | ctx->suffixSize = totalSamplesSize - d + 1; | |||
|
516 | ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | |||
|
517 | /* Maps index to the dmerID */ | |||
|
518 | ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | |||
|
519 | /* The offsets of each file */ | |||
|
520 | ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); | |||
|
521 | if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { | |||
|
522 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); | |||
|
523 | COVER_ctx_destroy(ctx); | |||
|
524 | return 0; | |||
|
525 | } | |||
|
526 | ctx->freqs = NULL; | |||
|
527 | ctx->d = d; | |||
|
528 | ||||
|
529 | /* Fill offsets from the samlesSizes */ | |||
|
530 | { | |||
|
531 | U32 i; | |||
|
532 | ctx->offsets[0] = 0; | |||
|
533 | for (i = 1; i <= nbSamples; ++i) { | |||
|
534 | ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; | |||
|
535 | } | |||
|
536 | } | |||
|
537 | DISPLAYLEVEL(2, "Constructing partial suffix array\n"); | |||
|
538 | { | |||
|
539 | /* suffix is a partial suffix array. | |||
|
540 | * It only sorts suffixes by their first parameters.d bytes. | |||
|
541 | * The sort is stable, so each dmer group is sorted by position in input. | |||
|
542 | */ | |||
|
543 | U32 i; | |||
|
544 | for (i = 0; i < ctx->suffixSize; ++i) { | |||
|
545 | ctx->suffix[i] = i; | |||
|
546 | } | |||
|
547 | /* qsort doesn't take an opaque pointer, so pass as a global */ | |||
|
548 | g_ctx = ctx; | |||
|
549 | qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp); | |||
|
550 | } | |||
|
551 | DISPLAYLEVEL(2, "Computing frequencies\n"); | |||
|
552 | /* For each dmer group (group of positions with the same first d bytes): | |||
|
553 | * 1. For each position we set dmerAt[position] = dmerID. The dmerID is | |||
|
554 | * (groupBeginPtr - suffix). This allows us to go from position to | |||
|
555 | * dmerID so we can look up values in freq. | |||
|
556 | * 2. We calculate how many samples the dmer occurs in and save it in | |||
|
557 | * freqs[dmerId]. | |||
|
558 | */ | |||
|
559 | COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp, | |||
|
560 | &COVER_group); | |||
|
561 | ctx->freqs = ctx->suffix; | |||
|
562 | ctx->suffix = NULL; | |||
|
563 | return 1; | |||
|
564 | } | |||
|
565 | ||||
|
566 | /** | |||
|
567 | * Given the prepared context build the dictionary. | |||
|
568 | */ | |||
|
569 | static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, | |||
|
570 | COVER_map_t *activeDmers, void *dictBuffer, | |||
|
571 | size_t dictBufferCapacity, | |||
|
572 | COVER_params_t parameters) { | |||
|
573 | BYTE *const dict = (BYTE *)dictBuffer; | |||
|
574 | size_t tail = dictBufferCapacity; | |||
|
575 | /* Divide the data up into epochs of equal size. | |||
|
576 | * We will select at least one segment from each epoch. | |||
|
577 | */ | |||
|
578 | const U32 epochs = (U32)(dictBufferCapacity / parameters.k); | |||
|
579 | const U32 epochSize = (U32)(ctx->suffixSize / epochs); | |||
|
580 | size_t epoch; | |||
|
581 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs, | |||
|
582 | epochSize); | |||
|
583 | /* Loop through the epochs until there are no more segments or the dictionary | |||
|
584 | * is full. | |||
|
585 | */ | |||
|
586 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { | |||
|
587 | const U32 epochBegin = (U32)(epoch * epochSize); | |||
|
588 | const U32 epochEnd = epochBegin + epochSize; | |||
|
589 | size_t segmentSize; | |||
|
590 | /* Select a segment */ | |||
|
591 | COVER_segment_t segment = COVER_selectSegment( | |||
|
592 | ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); | |||
|
593 | /* Trim the segment if necessary and if it is empty then we are done */ | |||
|
594 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); | |||
|
595 | if (segmentSize == 0) { | |||
|
596 | break; | |||
|
597 | } | |||
|
598 | /* We fill the dictionary from the back to allow the best segments to be | |||
|
599 | * referenced with the smallest offsets. | |||
|
600 | */ | |||
|
601 | tail -= segmentSize; | |||
|
602 | memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); | |||
|
603 | DISPLAYUPDATE( | |||
|
604 | 2, "\r%u%% ", | |||
|
605 | (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); | |||
|
606 | } | |||
|
607 | DISPLAYLEVEL(2, "\r%79s\r", ""); | |||
|
608 | return tail; | |||
|
609 | } | |||
|
610 | ||||
|
611 | /** | |||
|
612 | * Translate from COVER_params_t to ZDICT_params_t required for finalizing the | |||
|
613 | * dictionary. | |||
|
614 | */ | |||
|
615 | static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) { | |||
|
616 | ZDICT_params_t zdictParams; | |||
|
617 | memset(&zdictParams, 0, sizeof(zdictParams)); | |||
|
618 | zdictParams.notificationLevel = 1; | |||
|
619 | zdictParams.dictID = parameters.dictID; | |||
|
620 | zdictParams.compressionLevel = parameters.compressionLevel; | |||
|
621 | return zdictParams; | |||
|
622 | } | |||
|
623 | ||||
|
624 | /** | |||
|
625 | * Constructs a dictionary using a heuristic based on the following paper: | |||
|
626 | * | |||
|
627 | * Liao, Petri, Moffat, Wirth | |||
|
628 | * Effective Construction of Relative Lempel-Ziv Dictionaries | |||
|
629 | * Published in WWW 2016. | |||
|
630 | */ | |||
|
631 | ZDICTLIB_API size_t COVER_trainFromBuffer( | |||
|
632 | void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | |||
|
633 | const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) { | |||
|
634 | BYTE *const dict = (BYTE *)dictBuffer; | |||
|
635 | COVER_ctx_t ctx; | |||
|
636 | COVER_map_t activeDmers; | |||
|
637 | /* Checks */ | |||
|
638 | if (!COVER_checkParameters(parameters)) { | |||
|
639 | DISPLAYLEVEL(1, "Cover parameters incorrect\n"); | |||
|
640 | return ERROR(GENERIC); | |||
|
641 | } | |||
|
642 | if (nbSamples == 0) { | |||
|
643 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | |||
|
644 | return ERROR(GENERIC); | |||
|
645 | } | |||
|
646 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | |||
|
647 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | |||
|
648 | ZDICT_DICTSIZE_MIN); | |||
|
649 | return ERROR(dstSize_tooSmall); | |||
|
650 | } | |||
|
651 | /* Initialize global data */ | |||
|
652 | g_displayLevel = parameters.notificationLevel; | |||
|
653 | /* Initialize context and activeDmers */ | |||
|
654 | if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | |||
|
655 | parameters.d)) { | |||
|
656 | return ERROR(GENERIC); | |||
|
657 | } | |||
|
658 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | |||
|
659 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | |||
|
660 | COVER_ctx_destroy(&ctx); | |||
|
661 | return ERROR(GENERIC); | |||
|
662 | } | |||
|
663 | ||||
|
664 | DISPLAYLEVEL(2, "Building dictionary\n"); | |||
|
665 | { | |||
|
666 | const size_t tail = | |||
|
667 | COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, | |||
|
668 | dictBufferCapacity, parameters); | |||
|
669 | ZDICT_params_t zdictParams = COVER_translateParams(parameters); | |||
|
670 | const size_t dictionarySize = ZDICT_finalizeDictionary( | |||
|
671 | dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | |||
|
672 | samplesBuffer, samplesSizes, nbSamples, zdictParams); | |||
|
673 | if (!ZSTD_isError(dictionarySize)) { | |||
|
674 | DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", | |||
|
675 | (U32)dictionarySize); | |||
|
676 | } | |||
|
677 | COVER_ctx_destroy(&ctx); | |||
|
678 | COVER_map_destroy(&activeDmers); | |||
|
679 | return dictionarySize; | |||
|
680 | } | |||
|
681 | } | |||
|
682 | ||||
|
683 | /** | |||
|
684 | * COVER_best_t is used for two purposes: | |||
|
685 | * 1. Synchronizing threads. | |||
|
686 | * 2. Saving the best parameters and dictionary. | |||
|
687 | * | |||
|
688 | * All of the methods except COVER_best_init() are thread safe if zstd is | |||
|
689 | * compiled with multithreaded support. | |||
|
690 | */ | |||
|
691 | typedef struct COVER_best_s { | |||
|
692 | pthread_mutex_t mutex; | |||
|
693 | pthread_cond_t cond; | |||
|
694 | size_t liveJobs; | |||
|
695 | void *dict; | |||
|
696 | size_t dictSize; | |||
|
697 | COVER_params_t parameters; | |||
|
698 | size_t compressedSize; | |||
|
699 | } COVER_best_t; | |||
|
700 | ||||
|
701 | /** | |||
|
702 | * Initialize the `COVER_best_t`. | |||
|
703 | */ | |||
|
704 | static void COVER_best_init(COVER_best_t *best) { | |||
|
705 | if (!best) { | |||
|
706 | return; | |||
|
707 | } | |||
|
708 | pthread_mutex_init(&best->mutex, NULL); | |||
|
709 | pthread_cond_init(&best->cond, NULL); | |||
|
710 | best->liveJobs = 0; | |||
|
711 | best->dict = NULL; | |||
|
712 | best->dictSize = 0; | |||
|
713 | best->compressedSize = (size_t)-1; | |||
|
714 | memset(&best->parameters, 0, sizeof(best->parameters)); | |||
|
715 | } | |||
|
716 | ||||
|
717 | /** | |||
|
718 | * Wait until liveJobs == 0. | |||
|
719 | */ | |||
|
720 | static void COVER_best_wait(COVER_best_t *best) { | |||
|
721 | if (!best) { | |||
|
722 | return; | |||
|
723 | } | |||
|
724 | pthread_mutex_lock(&best->mutex); | |||
|
725 | while (best->liveJobs != 0) { | |||
|
726 | pthread_cond_wait(&best->cond, &best->mutex); | |||
|
727 | } | |||
|
728 | pthread_mutex_unlock(&best->mutex); | |||
|
729 | } | |||
|
730 | ||||
|
731 | /** | |||
|
732 | * Call COVER_best_wait() and then destroy the COVER_best_t. | |||
|
733 | */ | |||
|
734 | static void COVER_best_destroy(COVER_best_t *best) { | |||
|
735 | if (!best) { | |||
|
736 | return; | |||
|
737 | } | |||
|
738 | COVER_best_wait(best); | |||
|
739 | if (best->dict) { | |||
|
740 | free(best->dict); | |||
|
741 | } | |||
|
742 | pthread_mutex_destroy(&best->mutex); | |||
|
743 | pthread_cond_destroy(&best->cond); | |||
|
744 | } | |||
|
745 | ||||
|
746 | /** | |||
|
747 | * Called when a thread is about to be launched. | |||
|
748 | * Increments liveJobs. | |||
|
749 | */ | |||
|
750 | static void COVER_best_start(COVER_best_t *best) { | |||
|
751 | if (!best) { | |||
|
752 | return; | |||
|
753 | } | |||
|
754 | pthread_mutex_lock(&best->mutex); | |||
|
755 | ++best->liveJobs; | |||
|
756 | pthread_mutex_unlock(&best->mutex); | |||
|
757 | } | |||
|
758 | ||||
|
759 | /** | |||
|
760 | * Called when a thread finishes executing, both on error or success. | |||
|
761 | * Decrements liveJobs and signals any waiting threads if liveJobs == 0. | |||
|
762 | * If this dictionary is the best so far save it and its parameters. | |||
|
763 | */ | |||
|
764 | static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, | |||
|
765 | COVER_params_t parameters, void *dict, | |||
|
766 | size_t dictSize) { | |||
|
767 | if (!best) { | |||
|
768 | return; | |||
|
769 | } | |||
|
770 | { | |||
|
771 | size_t liveJobs; | |||
|
772 | pthread_mutex_lock(&best->mutex); | |||
|
773 | --best->liveJobs; | |||
|
774 | liveJobs = best->liveJobs; | |||
|
775 | /* If the new dictionary is better */ | |||
|
776 | if (compressedSize < best->compressedSize) { | |||
|
777 | /* Allocate space if necessary */ | |||
|
778 | if (!best->dict || best->dictSize < dictSize) { | |||
|
779 | if (best->dict) { | |||
|
780 | free(best->dict); | |||
|
781 | } | |||
|
782 | best->dict = malloc(dictSize); | |||
|
783 | if (!best->dict) { | |||
|
784 | best->compressedSize = ERROR(GENERIC); | |||
|
785 | best->dictSize = 0; | |||
|
786 | return; | |||
|
787 | } | |||
|
788 | } | |||
|
789 | /* Save the dictionary, parameters, and size */ | |||
|
790 | memcpy(best->dict, dict, dictSize); | |||
|
791 | best->dictSize = dictSize; | |||
|
792 | best->parameters = parameters; | |||
|
793 | best->compressedSize = compressedSize; | |||
|
794 | } | |||
|
795 | pthread_mutex_unlock(&best->mutex); | |||
|
796 | if (liveJobs == 0) { | |||
|
797 | pthread_cond_broadcast(&best->cond); | |||
|
798 | } | |||
|
799 | } | |||
|
800 | } | |||
|
801 | ||||
|
802 | /** | |||
|
803 | * Parameters for COVER_tryParameters(). | |||
|
804 | */ | |||
|
805 | typedef struct COVER_tryParameters_data_s { | |||
|
806 | const COVER_ctx_t *ctx; | |||
|
807 | COVER_best_t *best; | |||
|
808 | size_t dictBufferCapacity; | |||
|
809 | COVER_params_t parameters; | |||
|
810 | } COVER_tryParameters_data_t; | |||
|
811 | ||||
|
812 | /** | |||
|
813 | * Tries a set of parameters and upates the COVER_best_t with the results. | |||
|
814 | * This function is thread safe if zstd is compiled with multithreaded support. | |||
|
815 | * It takes its parameters as an *OWNING* opaque pointer to support threading. | |||
|
816 | */ | |||
|
817 | static void COVER_tryParameters(void *opaque) { | |||
|
818 | /* Save parameters as local variables */ | |||
|
819 | COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; | |||
|
820 | const COVER_ctx_t *const ctx = data->ctx; | |||
|
821 | const COVER_params_t parameters = data->parameters; | |||
|
822 | size_t dictBufferCapacity = data->dictBufferCapacity; | |||
|
823 | size_t totalCompressedSize = ERROR(GENERIC); | |||
|
824 | /* Allocate space for hash table, dict, and freqs */ | |||
|
825 | COVER_map_t activeDmers; | |||
|
826 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); | |||
|
827 | U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | |||
|
828 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | |||
|
829 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | |||
|
830 | goto _cleanup; | |||
|
831 | } | |||
|
832 | if (!dict || !freqs) { | |||
|
833 | DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); | |||
|
834 | goto _cleanup; | |||
|
835 | } | |||
|
836 | /* Copy the frequencies because we need to modify them */ | |||
|
837 | memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); | |||
|
838 | /* Build the dictionary */ | |||
|
839 | { | |||
|
840 | const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, | |||
|
841 | dictBufferCapacity, parameters); | |||
|
842 | const ZDICT_params_t zdictParams = COVER_translateParams(parameters); | |||
|
843 | dictBufferCapacity = ZDICT_finalizeDictionary( | |||
|
844 | dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | |||
|
845 | ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams); | |||
|
846 | if (ZDICT_isError(dictBufferCapacity)) { | |||
|
847 | DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); | |||
|
848 | goto _cleanup; | |||
|
849 | } | |||
|
850 | } | |||
|
851 | /* Check total compressed size */ | |||
|
852 | { | |||
|
853 | /* Pointers */ | |||
|
854 | ZSTD_CCtx *cctx; | |||
|
855 | ZSTD_CDict *cdict; | |||
|
856 | void *dst; | |||
|
857 | /* Local variables */ | |||
|
858 | size_t dstCapacity; | |||
|
859 | size_t i; | |||
|
860 | /* Allocate dst with enough space to compress the maximum sized sample */ | |||
|
861 | { | |||
|
862 | size_t maxSampleSize = 0; | |||
|
863 | for (i = 0; i < ctx->nbSamples; ++i) { | |||
|
864 | maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize); | |||
|
865 | } | |||
|
866 | dstCapacity = ZSTD_compressBound(maxSampleSize); | |||
|
867 | dst = malloc(dstCapacity); | |||
|
868 | } | |||
|
869 | /* Create the cctx and cdict */ | |||
|
870 | cctx = ZSTD_createCCtx(); | |||
|
871 | cdict = | |||
|
872 | ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel); | |||
|
873 | if (!dst || !cctx || !cdict) { | |||
|
874 | goto _compressCleanup; | |||
|
875 | } | |||
|
876 | /* Compress each sample and sum their sizes (or error) */ | |||
|
877 | totalCompressedSize = 0; | |||
|
878 | for (i = 0; i < ctx->nbSamples; ++i) { | |||
|
879 | const size_t size = ZSTD_compress_usingCDict( | |||
|
880 | cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i], | |||
|
881 | ctx->samplesSizes[i], cdict); | |||
|
882 | if (ZSTD_isError(size)) { | |||
|
883 | totalCompressedSize = ERROR(GENERIC); | |||
|
884 | goto _compressCleanup; | |||
|
885 | } | |||
|
886 | totalCompressedSize += size; | |||
|
887 | } | |||
|
888 | _compressCleanup: | |||
|
889 | ZSTD_freeCCtx(cctx); | |||
|
890 | ZSTD_freeCDict(cdict); | |||
|
891 | if (dst) { | |||
|
892 | free(dst); | |||
|
893 | } | |||
|
894 | } | |||
|
895 | ||||
|
896 | _cleanup: | |||
|
897 | COVER_best_finish(data->best, totalCompressedSize, parameters, dict, | |||
|
898 | dictBufferCapacity); | |||
|
899 | free(data); | |||
|
900 | COVER_map_destroy(&activeDmers); | |||
|
901 | if (dict) { | |||
|
902 | free(dict); | |||
|
903 | } | |||
|
904 | if (freqs) { | |||
|
905 | free(freqs); | |||
|
906 | } | |||
|
907 | } | |||
|
908 | ||||
|
909 | ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer, | |||
|
910 | size_t dictBufferCapacity, | |||
|
911 | const void *samplesBuffer, | |||
|
912 | const size_t *samplesSizes, | |||
|
913 | unsigned nbSamples, | |||
|
914 | COVER_params_t *parameters) { | |||
|
915 | /* constants */ | |||
|
916 | const unsigned nbThreads = parameters->nbThreads; | |||
|
917 | const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; | |||
|
918 | const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d; | |||
|
919 | const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k; | |||
|
920 | const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k; | |||
|
921 | const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps; | |||
|
922 | const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); | |||
|
923 | const unsigned kIterations = | |||
|
924 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); | |||
|
925 | /* Local variables */ | |||
|
926 | const int displayLevel = parameters->notificationLevel; | |||
|
927 | unsigned iteration = 1; | |||
|
928 | unsigned d; | |||
|
929 | unsigned k; | |||
|
930 | COVER_best_t best; | |||
|
931 | POOL_ctx *pool = NULL; | |||
|
932 | /* Checks */ | |||
|
933 | if (kMinK < kMaxD || kMaxK < kMinK) { | |||
|
934 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); | |||
|
935 | return ERROR(GENERIC); | |||
|
936 | } | |||
|
937 | if (nbSamples == 0) { | |||
|
938 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | |||
|
939 | return ERROR(GENERIC); | |||
|
940 | } | |||
|
941 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | |||
|
942 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | |||
|
943 | ZDICT_DICTSIZE_MIN); | |||
|
944 | return ERROR(dstSize_tooSmall); | |||
|
945 | } | |||
|
946 | if (nbThreads > 1) { | |||
|
947 | pool = POOL_create(nbThreads, 1); | |||
|
948 | if (!pool) { | |||
|
949 | return ERROR(memory_allocation); | |||
|
950 | } | |||
|
951 | } | |||
|
952 | /* Initialization */ | |||
|
953 | COVER_best_init(&best); | |||
|
954 | /* Turn down global display level to clean up display at level 2 and below */ | |||
|
955 | g_displayLevel = parameters->notificationLevel - 1; | |||
|
956 | /* Loop through d first because each new value needs a new context */ | |||
|
957 | LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", | |||
|
958 | kIterations); | |||
|
959 | for (d = kMinD; d <= kMaxD; d += 2) { | |||
|
960 | /* Initialize the context for this value of d */ | |||
|
961 | COVER_ctx_t ctx; | |||
|
962 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); | |||
|
963 | if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) { | |||
|
964 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); | |||
|
965 | COVER_best_destroy(&best); | |||
|
966 | return ERROR(GENERIC); | |||
|
967 | } | |||
|
968 | /* Loop through k reusing the same context */ | |||
|
969 | for (k = kMinK; k <= kMaxK; k += kStepSize) { | |||
|
970 | /* Prepare the arguments */ | |||
|
971 | COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( | |||
|
972 | sizeof(COVER_tryParameters_data_t)); | |||
|
973 | LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); | |||
|
974 | if (!data) { | |||
|
975 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); | |||
|
976 | COVER_best_destroy(&best); | |||
|
977 | COVER_ctx_destroy(&ctx); | |||
|
978 | return ERROR(GENERIC); | |||
|
979 | } | |||
|
980 | data->ctx = &ctx; | |||
|
981 | data->best = &best; | |||
|
982 | data->dictBufferCapacity = dictBufferCapacity; | |||
|
983 | data->parameters = *parameters; | |||
|
984 | data->parameters.k = k; | |||
|
985 | data->parameters.d = d; | |||
|
986 | data->parameters.steps = kSteps; | |||
|
987 | /* Check the parameters */ | |||
|
988 | if (!COVER_checkParameters(data->parameters)) { | |||
|
989 | DISPLAYLEVEL(1, "Cover parameters incorrect\n"); | |||
|
990 | continue; | |||
|
991 | } | |||
|
992 | /* Call the function and pass ownership of data to it */ | |||
|
993 | COVER_best_start(&best); | |||
|
994 | if (pool) { | |||
|
995 | POOL_add(pool, &COVER_tryParameters, data); | |||
|
996 | } else { | |||
|
997 | COVER_tryParameters(data); | |||
|
998 | } | |||
|
999 | /* Print status */ | |||
|
1000 | LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", | |||
|
1001 | (U32)((iteration * 100) / kIterations)); | |||
|
1002 | ++iteration; | |||
|
1003 | } | |||
|
1004 | COVER_best_wait(&best); | |||
|
1005 | COVER_ctx_destroy(&ctx); | |||
|
1006 | } | |||
|
1007 | LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); | |||
|
1008 | /* Fill the output buffer and parameters with output of the best parameters */ | |||
|
1009 | { | |||
|
1010 | const size_t dictSize = best.dictSize; | |||
|
1011 | if (ZSTD_isError(best.compressedSize)) { | |||
|
1012 | COVER_best_destroy(&best); | |||
|
1013 | return best.compressedSize; | |||
|
1014 | } | |||
|
1015 | *parameters = best.parameters; | |||
|
1016 | memcpy(dictBuffer, best.dict, dictSize); | |||
|
1017 | COVER_best_destroy(&best); | |||
|
1018 | POOL_free(pool); | |||
|
1019 | return dictSize; | |||
|
1020 | } | |||
|
1021 | } |
@@ -1,6 +1,33 b'' | |||||
1 | Version History |
|
1 | Version History | |
2 | =============== |
|
2 | =============== | |
3 |
|
3 | |||
|
4 | 0.7.0 (released 2017-02-07) | |||
|
5 | --------------------------- | |||
|
6 | ||||
|
7 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. | |||
|
8 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient | |||
|
9 | decompression of *content-only dictionary chains*. | |||
|
10 | * CFFI module fully implemented; all tests run against both C extension and | |||
|
11 | CFFI implementation. | |||
|
12 | * Vendored version of zstd updated to 1.1.3. | |||
|
13 | * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference() | |||
|
14 | to avoid extra memory allocation of dict data. | |||
|
15 | * Add function names to error messages (by using ":name" in PyArg_Parse* | |||
|
16 | functions). | |||
|
17 | * Reuse decompression context across operations. Previously, we created a | |||
|
18 | new ZSTD_DCtx for each decompress(). This was measured to slow down | |||
|
19 | decompression by 40-200MB/s. The API guarantees say ZstdDecompressor | |||
|
20 | is not thread safe. So we reuse the ZSTD_DCtx across operations and make | |||
|
21 | things faster in the process. | |||
|
22 | * ZstdCompressor.write_to()'s compress() and flush() methods now return number | |||
|
23 | of bytes written. | |||
|
24 | * ZstdDecompressor.write_to()'s write() method now returns the number of bytes | |||
|
25 | written to the underlying output object. | |||
|
26 | * CompressionParameters instances now expose their values as attributes. | |||
|
27 | * CompressionParameters instances no longer are subscriptable nor behave | |||
|
28 | as tuples (backwards incompatible). Use attributes to obtain values. | |||
|
29 | * DictParameters instances now expose their values as attributes. | |||
|
30 | ||||
4 | 0.6.0 (released 2017-01-14) |
|
31 | 0.6.0 (released 2017-01-14) | |
5 | --------------------------- |
|
32 | --------------------------- | |
6 |
|
33 |
@@ -4,10 +4,11 b' python-zstandard' | |||||
4 |
|
4 | |||
5 | This project provides Python bindings for interfacing with the |
|
5 | This project provides Python bindings for interfacing with the | |
6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension |
|
6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension | |
7 |
and CFFI interface |
|
7 | and CFFI interface are provided. | |
8 |
|
8 | |||
9 |
The primary goal of the |
|
9 | The primary goal of the project is to provide a rich interface to the | |
10 | the underlying C API. This means exposing most of the features and flexibility |
|
10 | underlying C API through a Pythonic interface while not sacrificing | |
|
11 | performance. This means exposing most of the features and flexibility | |||
11 | of the C API while not sacrificing usability or safety that Python provides. |
|
12 | of the C API while not sacrificing usability or safety that Python provides. | |
12 |
|
13 | |||
13 | The canonical home for this project is |
|
14 | The canonical home for this project is | |
@@ -23,6 +24,9 b' with the current API and that functional' | |||||
23 | may be some backwards incompatible changes before 1.0. Though the author |
|
24 | may be some backwards incompatible changes before 1.0. Though the author | |
24 | does not intend to make any major changes to the Python API. |
|
25 | does not intend to make any major changes to the Python API. | |
25 |
|
26 | |||
|
27 | This project is vendored and distributed with Mercurial 4.1, where it is | |||
|
28 | used in a production capacity. | |||
|
29 | ||||
26 | There is continuous integration for Python versions 2.6, 2.7, and 3.3+ |
|
30 | There is continuous integration for Python versions 2.6, 2.7, and 3.3+ | |
27 | on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably |
|
31 | on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably | |
28 | confident the extension is stable and works as advertised on these |
|
32 | confident the extension is stable and works as advertised on these | |
@@ -48,14 +52,15 b' low level compression and decompression ' | |||||
48 | support compression without the framing headers. But the author doesn't |
|
52 | support compression without the framing headers. But the author doesn't | |
49 | believe it a high priority at this time. |
|
53 | believe it a high priority at this time. | |
50 |
|
54 | |||
51 | The CFFI bindings are half-baked and need to be finished. |
|
55 | The CFFI bindings are feature complete and all tests run against both | |
|
56 | the C extension and CFFI bindings to ensure behavior parity. | |||
52 |
|
57 | |||
53 | Requirements |
|
58 | Requirements | |
54 | ============ |
|
59 | ============ | |
55 |
|
60 | |||
56 |
This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, and |
|
61 | This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and | |
57 |
on common platforms (Linux, Windows, and OS X). Only x86_64 is |
|
62 | 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is | |
58 | well-tested as an architecture. |
|
63 | currently well-tested as an architecture. | |
59 |
|
64 | |||
60 | Installing |
|
65 | Installing | |
61 | ========== |
|
66 | ========== | |
@@ -106,15 +111,11 b' compressing at several hundred MB/s and ' | |||||
106 | Comparison to Other Python Bindings |
|
111 | Comparison to Other Python Bindings | |
107 | =================================== |
|
112 | =================================== | |
108 |
|
113 | |||
109 |
https://pypi.python.org/pypi/zstd is an alternat |
|
114 | https://pypi.python.org/pypi/zstd is an alternate Python binding to | |
110 | Zstandard. At the time this was written, the latest release of that |
|
115 | Zstandard. At the time this was written, the latest release of that | |
111 | package (1.0.0.2) had the following significant differences from this package: |
|
116 | package (1.1.2) only exposed the simple APIs for compression and decompression. | |
112 |
|
117 | This package exposes much more of the zstd API, including streaming and | ||
113 | * It only exposes the simple API for compression and decompression operations. |
|
118 | dictionary compression. This package also has CFFI support. | |
114 | This extension exposes the streaming API, dictionary training, and more. |
|
|||
115 | * It adds a custom framing header to compressed data and there is no way to |
|
|||
116 | disable it. This means that data produced with that module cannot be used by |
|
|||
117 | other Zstandard implementations. |
|
|||
118 |
|
119 | |||
119 | Bundling of Zstandard Source Code |
|
120 | Bundling of Zstandard Source Code | |
120 | ================================= |
|
121 | ================================= | |
@@ -260,6 +261,10 b' A ``flush()`` method can be called to ev' | |||||
260 | compressor's internal state into the output object. This may result in 0 or |
|
261 | compressor's internal state into the output object. This may result in 0 or | |
261 | more ``write()`` calls to the output object. |
|
262 | more ``write()`` calls to the output object. | |
262 |
|
263 | |||
|
264 | Both ``write()`` and ``flush()`` return the number of bytes written to the | |||
|
265 | object's ``write()``. In many cases, small inputs do not accumulate enough | |||
|
266 | data to cause a write and ``write()`` will return ``0``. | |||
|
267 | ||||
263 | If the size of the data being fed to this streaming compressor is known, |
|
268 | If the size of the data being fed to this streaming compressor is known, | |
264 | you can declare it before compression begins:: |
|
269 | you can declare it before compression begins:: | |
265 |
|
270 | |||
@@ -476,6 +481,10 b' This behaves similarly to ``zstd.ZstdCom' | |||||
476 | the decompressor by calling ``write(data)`` and decompressed output is written |
|
481 | the decompressor by calling ``write(data)`` and decompressed output is written | |
477 | to the output object by calling its ``write(data)`` method. |
|
482 | to the output object by calling its ``write(data)`` method. | |
478 |
|
483 | |||
|
484 | Calls to ``write()`` will return the number of bytes written to the output | |||
|
485 | object. Not all inputs will result in bytes being written, so return values | |||
|
486 | of ``0`` are possible. | |||
|
487 | ||||
479 | The size of chunks being ``write()`` to the destination can be specified:: |
|
488 | The size of chunks being ``write()`` to the destination can be specified:: | |
480 |
|
489 | |||
481 | dctx = zstd.ZstdDecompressor() |
|
490 | dctx = zstd.ZstdDecompressor() | |
@@ -576,6 +585,53 b' Here is how this API should be used::' | |||||
576 | data = dobj.decompress(compressed_chunk_0) |
|
585 | data = dobj.decompress(compressed_chunk_0) | |
577 | data = dobj.decompress(compressed_chunk_1) |
|
586 | data = dobj.decompress(compressed_chunk_1) | |
578 |
|
587 | |||
|
588 | Content-Only Dictionary Chain Decompression | |||
|
589 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |||
|
590 | ||||
|
591 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of | |||
|
592 | zstd frames produced using chained *content-only* dictionary compression. Such | |||
|
593 | a list of frames is produced by compressing discrete inputs where each | |||
|
594 | non-initial input is compressed with a *content-only* dictionary consisting | |||
|
595 | of the content of the previous input. | |||
|
596 | ||||
|
597 | For example, say you have the following inputs:: | |||
|
598 | ||||
|
599 | inputs = [b'input 1', b'input 2', b'input 3'] | |||
|
600 | ||||
|
601 | The zstd frame chain consists of: | |||
|
602 | ||||
|
603 | 1. ``b'input 1'`` compressed in standalone/discrete mode | |||
|
604 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary | |||
|
605 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary | |||
|
606 | ||||
|
607 | Each zstd frame **must** have the content size written. | |||
|
608 | ||||
|
609 | The following Python code can be used to produce a *content-only dictionary | |||
|
610 | chain*:: | |||
|
611 | ||||
|
612 | def make_chain(inputs): | |||
|
613 | frames = [] | |||
|
614 | ||||
|
615 | # First frame is compressed in standalone/discrete mode. | |||
|
616 | zctx = zstd.ZstdCompressor(write_content_size=True) | |||
|
617 | frames.append(zctx.compress(inputs[0])) | |||
|
618 | ||||
|
619 | # Subsequent frames use the previous fulltext as a content-only dictionary | |||
|
620 | for i, raw in enumerate(inputs[1:]): | |||
|
621 | dict_data = zstd.ZstdCompressionDict(inputs[i]) | |||
|
622 | zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data) | |||
|
623 | frames.append(zctx.compress(raw)) | |||
|
624 | ||||
|
625 | return frames | |||
|
626 | ||||
|
627 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last | |||
|
628 | element in the input chain. | |||
|
629 | ||||
|
630 | It is possible to implement *content-only dictionary chain* decompression | |||
|
631 | on top of other Python APIs. However, this function will likely be significantly | |||
|
632 | faster, especially for long input chains, as it avoids the overhead of | |||
|
633 | instantiating and passing around intermediate objects between C and Python. | |||
|
634 | ||||
579 | Choosing an API |
|
635 | Choosing an API | |
580 | --------------- |
|
636 | --------------- | |
581 |
|
637 | |||
@@ -634,6 +690,13 b' Instances can be constructed from bytes:' | |||||
634 |
|
690 | |||
635 | dict_data = zstd.ZstdCompressionDict(data) |
|
691 | dict_data = zstd.ZstdCompressionDict(data) | |
636 |
|
692 | |||
|
693 | It is possible to construct a dictionary from *any* data. Unless the | |||
|
694 | data begins with a magic header, the dictionary will be treated as | |||
|
695 | *content-only*. *Content-only* dictionaries allow compression operations | |||
|
696 | that follow to reference raw data within the content. For one use of | |||
|
697 | *content-only* dictionaries, see | |||
|
698 | ``ZstdDecompressor.decompress_content_dict_chain()``. | |||
|
699 | ||||
637 | More interestingly, instances can be created by *training* on sample data:: |
|
700 | More interestingly, instances can be created by *training* on sample data:: | |
638 |
|
701 | |||
639 | dict_data = zstd.train_dictionary(size, samples) |
|
702 | dict_data = zstd.train_dictionary(size, samples) | |
@@ -700,19 +763,57 b' You can then configure a compressor to u' | |||||
700 |
|
763 | |||
701 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
764 | cctx = zstd.ZstdCompressor(compression_params=params) | |
702 |
|
765 | |||
703 |
The members of |
|
766 | The members/attributes of ``CompressionParameters`` instances are as follows:: | |
704 |
|
767 | |||
705 |
* |
|
768 | * window_log | |
706 |
* |
|
769 | * chain_log | |
707 |
* |
|
770 | * hash_log | |
708 |
* |
|
771 | * search_log | |
709 |
* |
|
772 | * search_length | |
710 |
* |
|
773 | * target_length | |
711 | * 6 - Strategy (one of the ``zstd.STRATEGY_`` constants) |
|
774 | * strategy | |
|
775 | ||||
|
776 | This is the order the arguments are passed to the constructor if not using | |||
|
777 | named arguments. | |||
712 |
|
778 | |||
713 | You'll need to read the Zstandard documentation for what these parameters |
|
779 | You'll need to read the Zstandard documentation for what these parameters | |
714 | do. |
|
780 | do. | |
715 |
|
781 | |||
|
782 | Frame Inspection | |||
|
783 | ---------------- | |||
|
784 | ||||
|
785 | Data emitted from zstd compression is encapsulated in a *frame*. This frame | |||
|
786 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing | |||
|
787 | the frame in more detail. For more info, see | |||
|
788 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. | |||
|
789 | ||||
|
790 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes | |||
|
791 | instance and return a ``FrameParameters`` object describing the frame. | |||
|
792 | ||||
|
793 | Depending on which fields are present in the frame and their values, the | |||
|
794 | length of the frame parameters varies. If insufficient bytes are passed | |||
|
795 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure | |||
|
796 | frame parameters can be parsed, pass in at least 18 bytes. | |||
|
797 | ||||
|
798 | ``FrameParameters`` instances have the following attributes: | |||
|
799 | ||||
|
800 | content_size | |||
|
801 | Integer size of original, uncompressed content. This will be ``0`` if the | |||
|
802 | original content size isn't written to the frame (controlled with the | |||
|
803 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input | |||
|
804 | content size was ``0``. | |||
|
805 | ||||
|
806 | window_size | |||
|
807 | Integer size of maximum back-reference distance in compressed data. | |||
|
808 | ||||
|
809 | dict_id | |||
|
810 | Integer of dictionary ID used for compression. ``0`` if no dictionary | |||
|
811 | ID was used or if the dictionary ID was ``0``. | |||
|
812 | ||||
|
813 | has_checksum | |||
|
814 | Bool indicating whether a 4 byte content checksum is stored at the end | |||
|
815 | of the frame. | |||
|
816 | ||||
716 | Misc Functionality |
|
817 | Misc Functionality | |
717 | ------------------ |
|
818 | ------------------ | |
718 |
|
819 | |||
@@ -776,19 +877,32 b' TARGETLENGTH_MIN' | |||||
776 | TARGETLENGTH_MAX |
|
877 | TARGETLENGTH_MAX | |
777 | Maximum value for compression parameter |
|
878 | Maximum value for compression parameter | |
778 | STRATEGY_FAST |
|
879 | STRATEGY_FAST | |
779 |
Compression strateg |
|
880 | Compression strategy | |
780 | STRATEGY_DFAST |
|
881 | STRATEGY_DFAST | |
781 |
Compression strateg |
|
882 | Compression strategy | |
782 | STRATEGY_GREEDY |
|
883 | STRATEGY_GREEDY | |
783 |
Compression strateg |
|
884 | Compression strategy | |
784 | STRATEGY_LAZY |
|
885 | STRATEGY_LAZY | |
785 |
Compression strateg |
|
886 | Compression strategy | |
786 | STRATEGY_LAZY2 |
|
887 | STRATEGY_LAZY2 | |
787 |
Compression strateg |
|
888 | Compression strategy | |
788 | STRATEGY_BTLAZY2 |
|
889 | STRATEGY_BTLAZY2 | |
789 |
Compression strateg |
|
890 | Compression strategy | |
790 | STRATEGY_BTOPT |
|
891 | STRATEGY_BTOPT | |
791 |
Compression strateg |
|
892 | Compression strategy | |
|
893 | ||||
|
894 | Performance Considerations | |||
|
895 | -------------------------- | |||
|
896 | ||||
|
897 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a | |||
|
898 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` | |||
|
899 | or ``ZstdDecompressor`` instance for multiple operations is faster than | |||
|
900 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each | |||
|
901 | operation. The differences are magnified as the size of data decreases. For | |||
|
902 | example, the difference between *context* reuse and non-reuse for 100,000 | |||
|
903 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) | |||
|
904 | whereas 10 1,000,000 byte inputs will be more similar in speed (because the | |||
|
905 | time spent doing compression dwarfs time spent creating new *contexts*). | |||
792 |
|
906 | |||
793 | Note on Zstandard's *Experimental* API |
|
907 | Note on Zstandard's *Experimental* API | |
794 | ====================================== |
|
908 | ====================================== |
@@ -28,7 +28,8 b' ZstdCompressionDict* train_dictionary(Py' | |||||
28 | void* dict; |
|
28 | void* dict; | |
29 | ZstdCompressionDict* result; |
|
29 | ZstdCompressionDict* result; | |
30 |
|
30 | |||
31 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", |
|
31 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!:train_dictionary", | |
|
32 | kwlist, | |||
32 | &capacity, |
|
33 | &capacity, | |
33 | &PyList_Type, &samples, |
|
34 | &PyList_Type, &samples, | |
34 | (PyObject*)&DictParametersType, ¶meters)) { |
|
35 | (PyObject*)&DictParametersType, ¶meters)) { | |
@@ -57,7 +58,6 b' ZstdCompressionDict* train_dictionary(Py' | |||||
57 | sampleItem = PyList_GetItem(samples, sampleIndex); |
|
58 | sampleItem = PyList_GetItem(samples, sampleIndex); | |
58 | if (!PyBytes_Check(sampleItem)) { |
|
59 | if (!PyBytes_Check(sampleItem)) { | |
59 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); |
|
60 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); | |
60 | /* TODO probably need to perform DECREF here */ |
|
|||
61 | return NULL; |
|
61 | return NULL; | |
62 | } |
|
62 | } | |
63 | samplesSize += PyBytes_GET_SIZE(sampleItem); |
|
63 | samplesSize += PyBytes_GET_SIZE(sampleItem); | |
@@ -133,10 +133,11 b' static int ZstdCompressionDict_init(Zstd' | |||||
133 | self->dictSize = 0; |
|
133 | self->dictSize = 0; | |
134 |
|
134 | |||
135 | #if PY_MAJOR_VERSION >= 3 |
|
135 | #if PY_MAJOR_VERSION >= 3 | |
136 |
if (!PyArg_ParseTuple(args, "y#", |
|
136 | if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict", | |
137 | #else |
|
137 | #else | |
138 |
if (!PyArg_ParseTuple(args, "s#", |
|
138 | if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict", | |
139 | #endif |
|
139 | #endif | |
|
140 | &source, &sourceSize)) { | |||
140 | return -1; |
|
141 | return -1; | |
141 | } |
|
142 | } | |
142 |
|
143 |
@@ -25,7 +25,8 b' CompressionParametersObject* get_compres' | |||||
25 | ZSTD_compressionParameters params; |
|
25 | ZSTD_compressionParameters params; | |
26 | CompressionParametersObject* result; |
|
26 | CompressionParametersObject* result; | |
27 |
|
27 | |||
28 |
if (!PyArg_ParseTuple(args, "i|Kn", |
|
28 | if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters", | |
|
29 | &compressionLevel, &sourceSize, &dictSize)) { | |||
29 | return NULL; |
|
30 | return NULL; | |
30 | } |
|
31 | } | |
31 |
|
32 | |||
@@ -47,12 +48,85 b' CompressionParametersObject* get_compres' | |||||
47 | return result; |
|
48 | return result; | |
48 | } |
|
49 | } | |
49 |
|
50 | |||
|
51 | static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) { | |||
|
52 | static char* kwlist[] = { | |||
|
53 | "window_log", | |||
|
54 | "chain_log", | |||
|
55 | "hash_log", | |||
|
56 | "search_log", | |||
|
57 | "search_length", | |||
|
58 | "target_length", | |||
|
59 | "strategy", | |||
|
60 | NULL | |||
|
61 | }; | |||
|
62 | ||||
|
63 | unsigned windowLog; | |||
|
64 | unsigned chainLog; | |||
|
65 | unsigned hashLog; | |||
|
66 | unsigned searchLog; | |||
|
67 | unsigned searchLength; | |||
|
68 | unsigned targetLength; | |||
|
69 | unsigned strategy; | |||
|
70 | ||||
|
71 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters", | |||
|
72 | kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength, | |||
|
73 | &targetLength, &strategy)) { | |||
|
74 | return -1; | |||
|
75 | } | |||
|
76 | ||||
|
77 | if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) { | |||
|
78 | PyErr_SetString(PyExc_ValueError, "invalid window log value"); | |||
|
79 | return -1; | |||
|
80 | } | |||
|
81 | ||||
|
82 | if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) { | |||
|
83 | PyErr_SetString(PyExc_ValueError, "invalid chain log value"); | |||
|
84 | return -1; | |||
|
85 | } | |||
|
86 | ||||
|
87 | if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) { | |||
|
88 | PyErr_SetString(PyExc_ValueError, "invalid hash log value"); | |||
|
89 | return -1; | |||
|
90 | } | |||
|
91 | ||||
|
92 | if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) { | |||
|
93 | PyErr_SetString(PyExc_ValueError, "invalid search log value"); | |||
|
94 | return -1; | |||
|
95 | } | |||
|
96 | ||||
|
97 | if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) { | |||
|
98 | PyErr_SetString(PyExc_ValueError, "invalid search length value"); | |||
|
99 | return -1; | |||
|
100 | } | |||
|
101 | ||||
|
102 | if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) { | |||
|
103 | PyErr_SetString(PyExc_ValueError, "invalid target length value"); | |||
|
104 | return -1; | |||
|
105 | } | |||
|
106 | ||||
|
107 | if (strategy < ZSTD_fast || strategy > ZSTD_btopt) { | |||
|
108 | PyErr_SetString(PyExc_ValueError, "invalid strategy value"); | |||
|
109 | return -1; | |||
|
110 | } | |||
|
111 | ||||
|
112 | self->windowLog = windowLog; | |||
|
113 | self->chainLog = chainLog; | |||
|
114 | self->hashLog = hashLog; | |||
|
115 | self->searchLog = searchLog; | |||
|
116 | self->searchLength = searchLength; | |||
|
117 | self->targetLength = targetLength; | |||
|
118 | self->strategy = strategy; | |||
|
119 | ||||
|
120 | return 0; | |||
|
121 | } | |||
|
122 | ||||
50 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) { |
|
123 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) { | |
51 | CompressionParametersObject* params; |
|
124 | CompressionParametersObject* params; | |
52 | ZSTD_compressionParameters zparams; |
|
125 | ZSTD_compressionParameters zparams; | |
53 | PyObject* result; |
|
126 | PyObject* result; | |
54 |
|
127 | |||
55 |
if (!PyArg_ParseTuple(args, "O!", |
|
128 | if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size", | |
|
129 | &CompressionParametersType, ¶ms)) { | |||
56 | return NULL; |
|
130 | return NULL; | |
57 | } |
|
131 | } | |
58 |
|
132 | |||
@@ -64,113 +138,33 b' PyObject* estimate_compression_context_s' | |||||
64 | PyDoc_STRVAR(CompressionParameters__doc__, |
|
138 | PyDoc_STRVAR(CompressionParameters__doc__, | |
65 | "CompressionParameters: low-level control over zstd compression"); |
|
139 | "CompressionParameters: low-level control over zstd compression"); | |
66 |
|
140 | |||
67 | static PyObject* CompressionParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) { |
|
|||
68 | CompressionParametersObject* self; |
|
|||
69 | unsigned windowLog; |
|
|||
70 | unsigned chainLog; |
|
|||
71 | unsigned hashLog; |
|
|||
72 | unsigned searchLog; |
|
|||
73 | unsigned searchLength; |
|
|||
74 | unsigned targetLength; |
|
|||
75 | unsigned strategy; |
|
|||
76 |
|
||||
77 | if (!PyArg_ParseTuple(args, "IIIIIII", &windowLog, &chainLog, &hashLog, &searchLog, |
|
|||
78 | &searchLength, &targetLength, &strategy)) { |
|
|||
79 | return NULL; |
|
|||
80 | } |
|
|||
81 |
|
||||
82 | if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) { |
|
|||
83 | PyErr_SetString(PyExc_ValueError, "invalid window log value"); |
|
|||
84 | return NULL; |
|
|||
85 | } |
|
|||
86 |
|
||||
87 | if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) { |
|
|||
88 | PyErr_SetString(PyExc_ValueError, "invalid chain log value"); |
|
|||
89 | return NULL; |
|
|||
90 | } |
|
|||
91 |
|
||||
92 | if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) { |
|
|||
93 | PyErr_SetString(PyExc_ValueError, "invalid hash log value"); |
|
|||
94 | return NULL; |
|
|||
95 | } |
|
|||
96 |
|
||||
97 | if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) { |
|
|||
98 | PyErr_SetString(PyExc_ValueError, "invalid search log value"); |
|
|||
99 | return NULL; |
|
|||
100 | } |
|
|||
101 |
|
||||
102 | if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) { |
|
|||
103 | PyErr_SetString(PyExc_ValueError, "invalid search length value"); |
|
|||
104 | return NULL; |
|
|||
105 | } |
|
|||
106 |
|
||||
107 | if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) { |
|
|||
108 | PyErr_SetString(PyExc_ValueError, "invalid target length value"); |
|
|||
109 | return NULL; |
|
|||
110 | } |
|
|||
111 |
|
||||
112 | if (strategy < ZSTD_fast || strategy > ZSTD_btopt) { |
|
|||
113 | PyErr_SetString(PyExc_ValueError, "invalid strategy value"); |
|
|||
114 | return NULL; |
|
|||
115 | } |
|
|||
116 |
|
||||
117 | self = (CompressionParametersObject*)subtype->tp_alloc(subtype, 1); |
|
|||
118 | if (!self) { |
|
|||
119 | return NULL; |
|
|||
120 | } |
|
|||
121 |
|
||||
122 | self->windowLog = windowLog; |
|
|||
123 | self->chainLog = chainLog; |
|
|||
124 | self->hashLog = hashLog; |
|
|||
125 | self->searchLog = searchLog; |
|
|||
126 | self->searchLength = searchLength; |
|
|||
127 | self->targetLength = targetLength; |
|
|||
128 | self->strategy = strategy; |
|
|||
129 |
|
||||
130 | return (PyObject*)self; |
|
|||
131 | } |
|
|||
132 |
|
||||
133 | static void CompressionParameters_dealloc(PyObject* self) { |
|
141 | static void CompressionParameters_dealloc(PyObject* self) { | |
134 | PyObject_Del(self); |
|
142 | PyObject_Del(self); | |
135 | } |
|
143 | } | |
136 |
|
144 | |||
137 |
static Py |
|
145 | static PyMemberDef CompressionParameters_members[] = { | |
138 | return 7; |
|
146 | { "window_log", T_UINT, | |
139 | } |
|
147 | offsetof(CompressionParametersObject, windowLog), READONLY, | |
140 |
|
148 | "window log" }, | ||
141 | static PyObject* CompressionParameters_item(PyObject* o, Py_ssize_t i) { |
|
149 | { "chain_log", T_UINT, | |
142 | CompressionParametersObject* self = (CompressionParametersObject*)o; |
|
150 | offsetof(CompressionParametersObject, chainLog), READONLY, | |
143 |
|
151 | "chain log" }, | ||
144 | switch (i) { |
|
152 | { "hash_log", T_UINT, | |
145 | case 0: |
|
153 | offsetof(CompressionParametersObject, hashLog), READONLY, | |
146 | return PyLong_FromLong(self->windowLog); |
|
154 | "hash log" }, | |
147 | case 1: |
|
155 | { "search_log", T_UINT, | |
148 | return PyLong_FromLong(self->chainLog); |
|
156 | offsetof(CompressionParametersObject, searchLog), READONLY, | |
149 | case 2: |
|
157 | "search log" }, | |
150 | return PyLong_FromLong(self->hashLog); |
|
158 | { "search_length", T_UINT, | |
151 | case 3: |
|
159 | offsetof(CompressionParametersObject, searchLength), READONLY, | |
152 | return PyLong_FromLong(self->searchLog); |
|
160 | "search length" }, | |
153 | case 4: |
|
161 | { "target_length", T_UINT, | |
154 | return PyLong_FromLong(self->searchLength); |
|
162 | offsetof(CompressionParametersObject, targetLength), READONLY, | |
155 | case 5: |
|
163 | "target length" }, | |
156 | return PyLong_FromLong(self->targetLength); |
|
164 | { "strategy", T_INT, | |
157 | case 6: |
|
165 | offsetof(CompressionParametersObject, strategy), READONLY, | |
158 | return PyLong_FromLong(self->strategy); |
|
166 | "strategy" }, | |
159 | default: |
|
167 | { NULL } | |
160 | PyErr_SetString(PyExc_IndexError, "index out of range"); |
|
|||
161 | return NULL; |
|
|||
162 | } |
|
|||
163 | } |
|
|||
164 |
|
||||
165 | static PySequenceMethods CompressionParameters_sq = { |
|
|||
166 | CompressionParameters_length, /* sq_length */ |
|
|||
167 | 0, /* sq_concat */ |
|
|||
168 | 0, /* sq_repeat */ |
|
|||
169 | CompressionParameters_item, /* sq_item */ |
|
|||
170 | 0, /* sq_ass_item */ |
|
|||
171 | 0, /* sq_contains */ |
|
|||
172 | 0, /* sq_inplace_concat */ |
|
|||
173 | 0 /* sq_inplace_repeat */ |
|
|||
174 | }; |
|
168 | }; | |
175 |
|
169 | |||
176 | PyTypeObject CompressionParametersType = { |
|
170 | PyTypeObject CompressionParametersType = { | |
@@ -185,7 +179,7 b' PyTypeObject CompressionParametersType =' | |||||
185 | 0, /* tp_compare */ |
|
179 | 0, /* tp_compare */ | |
186 | 0, /* tp_repr */ |
|
180 | 0, /* tp_repr */ | |
187 | 0, /* tp_as_number */ |
|
181 | 0, /* tp_as_number */ | |
188 | &CompressionParameters_sq, /* tp_as_sequence */ |
|
182 | 0, /* tp_as_sequence */ | |
189 | 0, /* tp_as_mapping */ |
|
183 | 0, /* tp_as_mapping */ | |
190 | 0, /* tp_hash */ |
|
184 | 0, /* tp_hash */ | |
191 | 0, /* tp_call */ |
|
185 | 0, /* tp_call */ | |
@@ -193,7 +187,7 b' PyTypeObject CompressionParametersType =' | |||||
193 | 0, /* tp_getattro */ |
|
187 | 0, /* tp_getattro */ | |
194 | 0, /* tp_setattro */ |
|
188 | 0, /* tp_setattro */ | |
195 | 0, /* tp_as_buffer */ |
|
189 | 0, /* tp_as_buffer */ | |
196 |
Py_TPFLAGS_DEFAULT, |
|
190 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
197 | CompressionParameters__doc__, /* tp_doc */ |
|
191 | CompressionParameters__doc__, /* tp_doc */ | |
198 | 0, /* tp_traverse */ |
|
192 | 0, /* tp_traverse */ | |
199 | 0, /* tp_clear */ |
|
193 | 0, /* tp_clear */ | |
@@ -202,16 +196,16 b' PyTypeObject CompressionParametersType =' | |||||
202 | 0, /* tp_iter */ |
|
196 | 0, /* tp_iter */ | |
203 | 0, /* tp_iternext */ |
|
197 | 0, /* tp_iternext */ | |
204 | 0, /* tp_methods */ |
|
198 | 0, /* tp_methods */ | |
205 | 0, /* tp_members */ |
|
199 | CompressionParameters_members, /* tp_members */ | |
206 | 0, /* tp_getset */ |
|
200 | 0, /* tp_getset */ | |
207 | 0, /* tp_base */ |
|
201 | 0, /* tp_base */ | |
208 | 0, /* tp_dict */ |
|
202 | 0, /* tp_dict */ | |
209 | 0, /* tp_descr_get */ |
|
203 | 0, /* tp_descr_get */ | |
210 | 0, /* tp_descr_set */ |
|
204 | 0, /* tp_descr_set */ | |
211 | 0, /* tp_dictoffset */ |
|
205 | 0, /* tp_dictoffset */ | |
212 | 0, /* tp_init */ |
|
206 | (initproc)CompressionParameters_init, /* tp_init */ | |
213 | 0, /* tp_alloc */ |
|
207 | 0, /* tp_alloc */ | |
214 | CompressionParameters_new, /* tp_new */ |
|
208 | PyType_GenericNew, /* tp_new */ | |
215 | }; |
|
209 | }; | |
216 |
|
210 | |||
217 | void compressionparams_module_init(PyObject* mod) { |
|
211 | void compressionparams_module_init(PyObject* mod) { |
@@ -52,7 +52,7 b' static PyObject* ZstdCompressionWriter_e' | |||||
52 | ZSTD_outBuffer output; |
|
52 | ZSTD_outBuffer output; | |
53 | PyObject* res; |
|
53 | PyObject* res; | |
54 |
|
54 | |||
55 | if (!PyArg_ParseTuple(args, "OOO", &exc_type, &exc_value, &exc_tb)) { |
|
55 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { | |
56 | return NULL; |
|
56 | return NULL; | |
57 | } |
|
57 | } | |
58 |
|
58 | |||
@@ -119,11 +119,12 b' static PyObject* ZstdCompressionWriter_w' | |||||
119 | ZSTD_inBuffer input; |
|
119 | ZSTD_inBuffer input; | |
120 | ZSTD_outBuffer output; |
|
120 | ZSTD_outBuffer output; | |
121 | PyObject* res; |
|
121 | PyObject* res; | |
|
122 | Py_ssize_t totalWrite = 0; | |||
122 |
|
123 | |||
123 | #if PY_MAJOR_VERSION >= 3 |
|
124 | #if PY_MAJOR_VERSION >= 3 | |
124 | if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { |
|
125 | if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) { | |
125 | #else |
|
126 | #else | |
126 | if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { |
|
127 | if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) { | |
127 | #endif |
|
128 | #endif | |
128 | return NULL; |
|
129 | return NULL; | |
129 | } |
|
130 | } | |
@@ -164,20 +165,21 b' static PyObject* ZstdCompressionWriter_w' | |||||
164 | #endif |
|
165 | #endif | |
165 | output.dst, output.pos); |
|
166 | output.dst, output.pos); | |
166 | Py_XDECREF(res); |
|
167 | Py_XDECREF(res); | |
|
168 | totalWrite += output.pos; | |||
167 | } |
|
169 | } | |
168 | output.pos = 0; |
|
170 | output.pos = 0; | |
169 | } |
|
171 | } | |
170 |
|
172 | |||
171 | PyMem_Free(output.dst); |
|
173 | PyMem_Free(output.dst); | |
172 |
|
174 | |||
173 | /* TODO return bytes written */ |
|
175 | return PyLong_FromSsize_t(totalWrite); | |
174 | Py_RETURN_NONE; |
|
|||
175 | } |
|
176 | } | |
176 |
|
177 | |||
177 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { |
|
178 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { | |
178 | size_t zresult; |
|
179 | size_t zresult; | |
179 | ZSTD_outBuffer output; |
|
180 | ZSTD_outBuffer output; | |
180 | PyObject* res; |
|
181 | PyObject* res; | |
|
182 | Py_ssize_t totalWrite = 0; | |||
181 |
|
183 | |||
182 | if (!self->entered) { |
|
184 | if (!self->entered) { | |
183 | PyErr_SetString(ZstdError, "flush must be called from an active context manager"); |
|
185 | PyErr_SetString(ZstdError, "flush must be called from an active context manager"); | |
@@ -215,14 +217,14 b' static PyObject* ZstdCompressionWriter_f' | |||||
215 | #endif |
|
217 | #endif | |
216 | output.dst, output.pos); |
|
218 | output.dst, output.pos); | |
217 | Py_XDECREF(res); |
|
219 | Py_XDECREF(res); | |
|
220 | totalWrite += output.pos; | |||
218 | } |
|
221 | } | |
219 | output.pos = 0; |
|
222 | output.pos = 0; | |
220 | } |
|
223 | } | |
221 |
|
224 | |||
222 | PyMem_Free(output.dst); |
|
225 | PyMem_Free(output.dst); | |
223 |
|
226 | |||
224 | /* TODO return bytes written */ |
|
227 | return PyLong_FromSsize_t(totalWrite); | |
225 | Py_RETURN_NONE; |
|
|||
226 | } |
|
228 | } | |
227 |
|
229 | |||
228 | static PyMethodDef ZstdCompressionWriter_methods[] = { |
|
230 | static PyMethodDef ZstdCompressionWriter_methods[] = { |
@@ -42,9 +42,9 b' static PyObject* ZstdCompressionObj_comp' | |||||
42 | } |
|
42 | } | |
43 |
|
43 | |||
44 | #if PY_MAJOR_VERSION >= 3 |
|
44 | #if PY_MAJOR_VERSION >= 3 | |
45 | if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { |
|
45 | if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) { | |
46 | #else |
|
46 | #else | |
47 | if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { |
|
47 | if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) { | |
48 | #endif |
|
48 | #endif | |
49 | return NULL; |
|
49 | return NULL; | |
50 | } |
|
50 | } | |
@@ -98,7 +98,7 b' static PyObject* ZstdCompressionObj_flus' | |||||
98 | PyObject* result = NULL; |
|
98 | PyObject* result = NULL; | |
99 | Py_ssize_t resultSize = 0; |
|
99 | Py_ssize_t resultSize = 0; | |
100 |
|
100 | |||
101 | if (!PyArg_ParseTuple(args, "|i", &flushMode)) { |
|
101 | if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) { | |
102 | return NULL; |
|
102 | return NULL; | |
103 | } |
|
103 | } | |
104 |
|
104 |
@@ -16,7 +16,7 b' int populate_cdict(ZstdCompressor* compr' | |||||
16 | Py_BEGIN_ALLOW_THREADS |
|
16 | Py_BEGIN_ALLOW_THREADS | |
17 | memset(&zmem, 0, sizeof(zmem)); |
|
17 | memset(&zmem, 0, sizeof(zmem)); | |
18 | compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, |
|
18 | compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, | |
19 | compressor->dict->dictSize, *zparams, zmem); |
|
19 | compressor->dict->dictSize, 1, *zparams, zmem); | |
20 | Py_END_ALLOW_THREADS |
|
20 | Py_END_ALLOW_THREADS | |
21 |
|
21 | |||
22 | if (!compressor->cdict) { |
|
22 | if (!compressor->cdict) { | |
@@ -128,8 +128,8 b' static int ZstdCompressor_init(ZstdCompr' | |||||
128 | self->cparams = NULL; |
|
128 | self->cparams = NULL; | |
129 | self->cdict = NULL; |
|
129 | self->cdict = NULL; | |
130 |
|
130 | |||
131 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", |
|
131 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO:ZstdCompressor", | |
132 | &level, &ZstdCompressionDictType, &dict, |
|
132 | kwlist, &level, &ZstdCompressionDictType, &dict, | |
133 | &CompressionParametersType, ¶ms, |
|
133 | &CompressionParametersType, ¶ms, | |
134 | &writeChecksum, &writeContentSize, &writeDictID)) { |
|
134 | &writeChecksum, &writeContentSize, &writeDictID)) { | |
135 | return -1; |
|
135 | return -1; | |
@@ -243,8 +243,8 b' static PyObject* ZstdCompressor_copy_str' | |||||
243 | PyObject* totalReadPy; |
|
243 | PyObject* totalReadPy; | |
244 | PyObject* totalWritePy; |
|
244 | PyObject* totalWritePy; | |
245 |
|
245 | |||
246 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk", kwlist, |
|
246 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist, | |
247 | &inSize, &outSize)) { |
|
247 | &source, &dest, &sourceSize, &inSize, &outSize)) { | |
248 | return NULL; |
|
248 | return NULL; | |
249 | } |
|
249 | } | |
250 |
|
250 | |||
@@ -402,9 +402,9 b' static PyObject* ZstdCompressor_compress' | |||||
402 | ZSTD_parameters zparams; |
|
402 | ZSTD_parameters zparams; | |
403 |
|
403 | |||
404 | #if PY_MAJOR_VERSION >= 3 |
|
404 | #if PY_MAJOR_VERSION >= 3 | |
405 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O", |
|
405 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress", | |
406 | #else |
|
406 | #else | |
407 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O", |
|
407 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress", | |
408 | #endif |
|
408 | #endif | |
409 | kwlist, &source, &sourceSize, &allowEmpty)) { |
|
409 | kwlist, &source, &sourceSize, &allowEmpty)) { | |
410 | return NULL; |
|
410 | return NULL; | |
@@ -512,7 +512,7 b' static ZstdCompressionObj* ZstdCompresso' | |||||
512 | return NULL; |
|
512 | return NULL; | |
513 | } |
|
513 | } | |
514 |
|
514 | |||
515 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &inSize)) { |
|
515 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) { | |
516 | return NULL; |
|
516 | return NULL; | |
517 | } |
|
517 | } | |
518 |
|
518 | |||
@@ -574,8 +574,8 b' static ZstdCompressorIterator* ZstdCompr' | |||||
574 | size_t outSize = ZSTD_CStreamOutSize(); |
|
574 | size_t outSize = ZSTD_CStreamOutSize(); | |
575 | ZstdCompressorIterator* result; |
|
575 | ZstdCompressorIterator* result; | |
576 |
|
576 | |||
577 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk", kwlist, |
|
577 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist, | |
578 | &inSize, &outSize)) { |
|
578 | &reader, &sourceSize, &inSize, &outSize)) { | |
579 | return NULL; |
|
579 | return NULL; | |
580 | } |
|
580 | } | |
581 |
|
581 | |||
@@ -693,8 +693,8 b' static ZstdCompressionWriter* ZstdCompre' | |||||
693 | Py_ssize_t sourceSize = 0; |
|
693 | Py_ssize_t sourceSize = 0; | |
694 | size_t outSize = ZSTD_CStreamOutSize(); |
|
694 | size_t outSize = ZSTD_CStreamOutSize(); | |
695 |
|
695 | |||
696 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk", kwlist, |
|
696 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist, | |
697 | &outSize)) { |
|
697 | &writer, &sourceSize, &outSize)) { | |
698 | return NULL; |
|
698 | return NULL; | |
699 | } |
|
699 | } | |
700 |
|
700 |
@@ -71,11 +71,12 b' static PyObject* ZstdDecompressionWriter' | |||||
71 | ZSTD_inBuffer input; |
|
71 | ZSTD_inBuffer input; | |
72 | ZSTD_outBuffer output; |
|
72 | ZSTD_outBuffer output; | |
73 | PyObject* res; |
|
73 | PyObject* res; | |
|
74 | Py_ssize_t totalWrite = 0; | |||
74 |
|
75 | |||
75 | #if PY_MAJOR_VERSION >= 3 |
|
76 | #if PY_MAJOR_VERSION >= 3 | |
76 | if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { |
|
77 | if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) { | |
77 | #else |
|
78 | #else | |
78 | if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { |
|
79 | if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) { | |
79 | #endif |
|
80 | #endif | |
80 | return NULL; |
|
81 | return NULL; | |
81 | } |
|
82 | } | |
@@ -116,15 +117,15 b' static PyObject* ZstdDecompressionWriter' | |||||
116 | #endif |
|
117 | #endif | |
117 | output.dst, output.pos); |
|
118 | output.dst, output.pos); | |
118 | Py_XDECREF(res); |
|
119 | Py_XDECREF(res); | |
|
120 | totalWrite += output.pos; | |||
119 | output.pos = 0; |
|
121 | output.pos = 0; | |
120 | } |
|
122 | } | |
121 | } |
|
123 | } | |
122 |
|
124 | |||
123 | PyMem_Free(output.dst); |
|
125 | PyMem_Free(output.dst); | |
124 |
|
126 | |||
125 | /* TODO return bytes written */ |
|
127 | return PyLong_FromSsize_t(totalWrite); | |
126 | Py_RETURN_NONE; |
|
128 | } | |
127 | } |
|
|||
128 |
|
129 | |||
129 | static PyMethodDef ZstdDecompressionWriter_methods[] = { |
|
130 | static PyMethodDef ZstdDecompressionWriter_methods[] = { | |
130 | { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS, |
|
131 | { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS, |
@@ -41,9 +41,9 b' static PyObject* DecompressionObj_decomp' | |||||
41 | } |
|
41 | } | |
42 |
|
42 | |||
43 | #if PY_MAJOR_VERSION >= 3 |
|
43 | #if PY_MAJOR_VERSION >= 3 | |
44 | if (!PyArg_ParseTuple(args, "y#", |
|
44 | if (!PyArg_ParseTuple(args, "y#:decompress", | |
45 | #else |
|
45 | #else | |
46 | if (!PyArg_ParseTuple(args, "s#", |
|
46 | if (!PyArg_ParseTuple(args, "s#:decompress", | |
47 | #endif |
|
47 | #endif | |
48 | &source, &sourceSize)) { |
|
48 | &source, &sourceSize)) { | |
49 | return NULL; |
|
49 | return NULL; |
@@ -59,23 +59,19 b' static int Decompressor_init(ZstdDecompr' | |||||
59 |
|
59 | |||
60 | ZstdCompressionDict* dict = NULL; |
|
60 | ZstdCompressionDict* dict = NULL; | |
61 |
|
61 | |||
62 |
self-> |
|
62 | self->dctx = NULL; | |
63 | self->dict = NULL; |
|
63 | self->dict = NULL; | |
64 | self->ddict = NULL; |
|
64 | self->ddict = NULL; | |
65 |
|
65 | |||
66 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!", kwlist, |
|
66 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist, | |
67 | &ZstdCompressionDictType, &dict)) { |
|
67 | &ZstdCompressionDictType, &dict)) { | |
68 | return -1; |
|
68 | return -1; | |
69 | } |
|
69 | } | |
70 |
|
70 | |||
71 | /* Instead of creating a ZSTD_DCtx for every decompression operation, |
|
|||
72 | we create an instance at object creation time and recycle it via |
|
|||
73 | ZSTD_copyDCTx() on each use. This means each use is a malloc+memcpy |
|
|||
74 | instead of a malloc+init. */ |
|
|||
75 | /* TODO lazily initialize the reference ZSTD_DCtx on first use since |
|
71 | /* TODO lazily initialize the reference ZSTD_DCtx on first use since | |
76 | not instances of ZstdDecompressor will use a ZSTD_DCtx. */ |
|
72 | not instances of ZstdDecompressor will use a ZSTD_DCtx. */ | |
77 |
self-> |
|
73 | self->dctx = ZSTD_createDCtx(); | |
78 |
if (!self-> |
|
74 | if (!self->dctx) { | |
79 | PyErr_NoMemory(); |
|
75 | PyErr_NoMemory(); | |
80 | goto except; |
|
76 | goto except; | |
81 | } |
|
77 | } | |
@@ -88,17 +84,17 b' static int Decompressor_init(ZstdDecompr' | |||||
88 | return 0; |
|
84 | return 0; | |
89 |
|
85 | |||
90 | except: |
|
86 | except: | |
91 |
if (self-> |
|
87 | if (self->dctx) { | |
92 |
ZSTD_freeDCtx(self-> |
|
88 | ZSTD_freeDCtx(self->dctx); | |
93 |
self-> |
|
89 | self->dctx = NULL; | |
94 | } |
|
90 | } | |
95 |
|
91 | |||
96 | return -1; |
|
92 | return -1; | |
97 | } |
|
93 | } | |
98 |
|
94 | |||
99 | static void Decompressor_dealloc(ZstdDecompressor* self) { |
|
95 | static void Decompressor_dealloc(ZstdDecompressor* self) { | |
100 |
if (self-> |
|
96 | if (self->dctx) { | |
101 |
ZSTD_freeDCtx(self-> |
|
97 | ZSTD_freeDCtx(self->dctx); | |
102 | } |
|
98 | } | |
103 |
|
99 | |||
104 | Py_XDECREF(self->dict); |
|
100 | Py_XDECREF(self->dict); | |
@@ -150,8 +146,8 b' static PyObject* Decompressor_copy_strea' | |||||
150 | PyObject* totalReadPy; |
|
146 | PyObject* totalReadPy; | |
151 | PyObject* totalWritePy; |
|
147 | PyObject* totalWritePy; | |
152 |
|
148 | |||
153 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk", kwlist, |
|
149 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist, | |
154 | &dest, &inSize, &outSize)) { |
|
150 | &source, &dest, &inSize, &outSize)) { | |
155 | return NULL; |
|
151 | return NULL; | |
156 | } |
|
152 | } | |
157 |
|
153 | |||
@@ -243,7 +239,7 b' static PyObject* Decompressor_copy_strea' | |||||
243 | Py_DecRef(totalReadPy); |
|
239 | Py_DecRef(totalReadPy); | |
244 | Py_DecRef(totalWritePy); |
|
240 | Py_DecRef(totalWritePy); | |
245 |
|
241 | |||
246 |
|
|
242 | finally: | |
247 | if (output.dst) { |
|
243 | if (output.dst) { | |
248 | PyMem_Free(output.dst); |
|
244 | PyMem_Free(output.dst); | |
249 | } |
|
245 | } | |
@@ -291,28 +287,19 b' PyObject* Decompressor_decompress(ZstdDe' | |||||
291 | unsigned long long decompressedSize; |
|
287 | unsigned long long decompressedSize; | |
292 | size_t destCapacity; |
|
288 | size_t destCapacity; | |
293 | PyObject* result = NULL; |
|
289 | PyObject* result = NULL; | |
294 | ZSTD_DCtx* dctx = NULL; |
|
|||
295 | void* dictData = NULL; |
|
290 | void* dictData = NULL; | |
296 | size_t dictSize = 0; |
|
291 | size_t dictSize = 0; | |
297 | size_t zresult; |
|
292 | size_t zresult; | |
298 |
|
293 | |||
299 | #if PY_MAJOR_VERSION >= 3 |
|
294 | #if PY_MAJOR_VERSION >= 3 | |
300 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n", |
|
295 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress", | |
301 | #else |
|
296 | #else | |
302 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n", |
|
297 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress", | |
303 | #endif |
|
298 | #endif | |
304 | &source, &sourceSize, &maxOutputSize)) { |
|
299 | kwlist, &source, &sourceSize, &maxOutputSize)) { | |
305 | return NULL; |
|
300 | return NULL; | |
306 | } |
|
301 | } | |
307 |
|
302 | |||
308 | dctx = PyMem_Malloc(ZSTD_sizeof_DCtx(self->refdctx)); |
|
|||
309 | if (!dctx) { |
|
|||
310 | PyErr_NoMemory(); |
|
|||
311 | return NULL; |
|
|||
312 | } |
|
|||
313 |
|
||||
314 | ZSTD_copyDCtx(dctx, self->refdctx); |
|
|||
315 |
|
||||
316 | if (self->dict) { |
|
303 | if (self->dict) { | |
317 | dictData = self->dict->dictData; |
|
304 | dictData = self->dict->dictData; | |
318 | dictSize = self->dict->dictSize; |
|
305 | dictSize = self->dict->dictSize; | |
@@ -320,12 +307,12 b' PyObject* Decompressor_decompress(ZstdDe' | |||||
320 |
|
307 | |||
321 | if (dictData && !self->ddict) { |
|
308 | if (dictData && !self->ddict) { | |
322 | Py_BEGIN_ALLOW_THREADS |
|
309 | Py_BEGIN_ALLOW_THREADS | |
323 | self->ddict = ZSTD_createDDict(dictData, dictSize); |
|
310 | self->ddict = ZSTD_createDDict_byReference(dictData, dictSize); | |
324 | Py_END_ALLOW_THREADS |
|
311 | Py_END_ALLOW_THREADS | |
325 |
|
312 | |||
326 | if (!self->ddict) { |
|
313 | if (!self->ddict) { | |
327 | PyErr_SetString(ZstdError, "could not create decompression dict"); |
|
314 | PyErr_SetString(ZstdError, "could not create decompression dict"); | |
328 | goto except; |
|
315 | return NULL; | |
329 | } |
|
316 | } | |
330 | } |
|
317 | } | |
331 |
|
318 | |||
@@ -335,7 +322,7 b' PyObject* Decompressor_decompress(ZstdDe' | |||||
335 | if (0 == maxOutputSize) { |
|
322 | if (0 == maxOutputSize) { | |
336 | PyErr_SetString(ZstdError, "input data invalid or missing content size " |
|
323 | PyErr_SetString(ZstdError, "input data invalid or missing content size " | |
337 | "in frame header"); |
|
324 | "in frame header"); | |
338 | goto except; |
|
325 | return NULL; | |
339 | } |
|
326 | } | |
340 | else { |
|
327 | else { | |
341 | result = PyBytes_FromStringAndSize(NULL, maxOutputSize); |
|
328 | result = PyBytes_FromStringAndSize(NULL, maxOutputSize); | |
@@ -348,45 +335,39 b' PyObject* Decompressor_decompress(ZstdDe' | |||||
348 | } |
|
335 | } | |
349 |
|
336 | |||
350 | if (!result) { |
|
337 | if (!result) { | |
351 | goto except; |
|
338 | return NULL; | |
352 | } |
|
339 | } | |
353 |
|
340 | |||
354 | Py_BEGIN_ALLOW_THREADS |
|
341 | Py_BEGIN_ALLOW_THREADS | |
355 | if (self->ddict) { |
|
342 | if (self->ddict) { | |
356 |
zresult = ZSTD_decompress_usingDDict(dctx, |
|
343 | zresult = ZSTD_decompress_usingDDict(self->dctx, | |
|
344 | PyBytes_AsString(result), destCapacity, | |||
357 | source, sourceSize, self->ddict); |
|
345 | source, sourceSize, self->ddict); | |
358 | } |
|
346 | } | |
359 | else { |
|
347 | else { | |
360 | zresult = ZSTD_decompressDCtx(dctx, PyBytes_AsString(result), destCapacity, source, sourceSize); |
|
348 | zresult = ZSTD_decompressDCtx(self->dctx, | |
|
349 | PyBytes_AsString(result), destCapacity, source, sourceSize); | |||
361 | } |
|
350 | } | |
362 | Py_END_ALLOW_THREADS |
|
351 | Py_END_ALLOW_THREADS | |
363 |
|
352 | |||
364 | if (ZSTD_isError(zresult)) { |
|
353 | if (ZSTD_isError(zresult)) { | |
365 | PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); |
|
354 | PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); | |
366 | goto except; |
|
355 | Py_DecRef(result); | |
|
356 | return NULL; | |||
367 | } |
|
357 | } | |
368 | else if (decompressedSize && zresult != decompressedSize) { |
|
358 | else if (decompressedSize && zresult != decompressedSize) { | |
369 | PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", |
|
359 | PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", | |
370 | zresult, decompressedSize); |
|
360 | zresult, decompressedSize); | |
371 | goto except; |
|
361 | Py_DecRef(result); | |
|
362 | return NULL; | |||
372 | } |
|
363 | } | |
373 | else if (zresult < destCapacity) { |
|
364 | else if (zresult < destCapacity) { | |
374 | if (_PyBytes_Resize(&result, zresult)) { |
|
365 | if (_PyBytes_Resize(&result, zresult)) { | |
375 | goto except; |
|
366 | Py_DecRef(result); | |
|
367 | return NULL; | |||
376 | } |
|
368 | } | |
377 | } |
|
369 | } | |
378 |
|
370 | |||
379 | goto finally; |
|
|||
380 |
|
||||
381 | except: |
|
|||
382 | Py_DecRef(result); |
|
|||
383 | result = NULL; |
|
|||
384 |
|
||||
385 | finally: |
|
|||
386 | if (dctx) { |
|
|||
387 | PyMem_FREE(dctx); |
|
|||
388 | } |
|
|||
389 |
|
||||
390 | return result; |
|
371 | return result; | |
391 | } |
|
372 | } | |
392 |
|
373 | |||
@@ -455,8 +436,8 b' static ZstdDecompressorIterator* Decompr' | |||||
455 | ZstdDecompressorIterator* result; |
|
436 | ZstdDecompressorIterator* result; | |
456 | size_t skipBytes = 0; |
|
437 | size_t skipBytes = 0; | |
457 |
|
438 | |||
458 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk", kwlist, |
|
439 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist, | |
459 | &inSize, &outSize, &skipBytes)) { |
|
440 | &reader, &inSize, &outSize, &skipBytes)) { | |
460 | return NULL; |
|
441 | return NULL; | |
461 | } |
|
442 | } | |
462 |
|
443 | |||
@@ -534,19 +515,14 b' static ZstdDecompressorIterator* Decompr' | |||||
534 | goto finally; |
|
515 | goto finally; | |
535 |
|
516 | |||
536 | except: |
|
517 | except: | |
537 |
|
|
518 | Py_CLEAR(result->reader); | |
538 | Py_DECREF(result->reader); |
|
|||
539 | result->reader = NULL; |
|
|||
540 | } |
|
|||
541 |
|
519 | |||
542 | if (result->buffer) { |
|
520 | if (result->buffer) { | |
543 | PyBuffer_Release(result->buffer); |
|
521 | PyBuffer_Release(result->buffer); | |
544 |
Py_ |
|
522 | Py_CLEAR(result->buffer); | |
545 | result->buffer = NULL; |
|
|||
546 | } |
|
523 | } | |
547 |
|
524 | |||
548 |
Py_ |
|
525 | Py_CLEAR(result); | |
549 | result = NULL; |
|
|||
550 |
|
526 | |||
551 | finally: |
|
527 | finally: | |
552 |
|
528 | |||
@@ -577,7 +553,8 b' static ZstdDecompressionWriter* Decompre' | |||||
577 | size_t outSize = ZSTD_DStreamOutSize(); |
|
553 | size_t outSize = ZSTD_DStreamOutSize(); | |
578 | ZstdDecompressionWriter* result; |
|
554 | ZstdDecompressionWriter* result; | |
579 |
|
555 | |||
580 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k", kwlist, |
|
556 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist, | |
|
557 | &writer, &outSize)) { | |||
581 | return NULL; |
|
558 | return NULL; | |
582 | } |
|
559 | } | |
583 |
|
560 | |||
@@ -605,6 +582,200 b' static ZstdDecompressionWriter* Decompre' | |||||
605 | return result; |
|
582 | return result; | |
606 | } |
|
583 | } | |
607 |
|
584 | |||
|
585 | PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__, | |||
|
586 | "Decompress a series of chunks using the content dictionary chaining technique\n" | |||
|
587 | ); | |||
|
588 | ||||
|
589 | static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) { | |||
|
590 | static char* kwlist[] = { | |||
|
591 | "frames", | |||
|
592 | NULL | |||
|
593 | }; | |||
|
594 | ||||
|
595 | PyObject* chunks; | |||
|
596 | Py_ssize_t chunksLen; | |||
|
597 | Py_ssize_t chunkIndex; | |||
|
598 | char parity = 0; | |||
|
599 | PyObject* chunk; | |||
|
600 | char* chunkData; | |||
|
601 | Py_ssize_t chunkSize; | |||
|
602 | ZSTD_DCtx* dctx = NULL; | |||
|
603 | size_t zresult; | |||
|
604 | ZSTD_frameParams frameParams; | |||
|
605 | void* buffer1 = NULL; | |||
|
606 | size_t buffer1Size = 0; | |||
|
607 | size_t buffer1ContentSize = 0; | |||
|
608 | void* buffer2 = NULL; | |||
|
609 | size_t buffer2Size = 0; | |||
|
610 | size_t buffer2ContentSize = 0; | |||
|
611 | void* destBuffer = NULL; | |||
|
612 | PyObject* result = NULL; | |||
|
613 | ||||
|
614 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", | |||
|
615 | kwlist, &PyList_Type, &chunks)) { | |||
|
616 | return NULL; | |||
|
617 | } | |||
|
618 | ||||
|
619 | chunksLen = PyList_Size(chunks); | |||
|
620 | if (!chunksLen) { | |||
|
621 | PyErr_SetString(PyExc_ValueError, "empty input chain"); | |||
|
622 | return NULL; | |||
|
623 | } | |||
|
624 | ||||
|
625 | /* The first chunk should not be using a dictionary. We handle it specially. */ | |||
|
626 | chunk = PyList_GetItem(chunks, 0); | |||
|
627 | if (!PyBytes_Check(chunk)) { | |||
|
628 | PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes"); | |||
|
629 | return NULL; | |||
|
630 | } | |||
|
631 | ||||
|
632 | /* We require that all chunks be zstd frames and that they have content size set. */ | |||
|
633 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | |||
|
634 | zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); | |||
|
635 | if (ZSTD_isError(zresult)) { | |||
|
636 | PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); | |||
|
637 | return NULL; | |||
|
638 | } | |||
|
639 | else if (zresult) { | |||
|
640 | PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame"); | |||
|
641 | return NULL; | |||
|
642 | } | |||
|
643 | ||||
|
644 | if (0 == frameParams.frameContentSize) { | |||
|
645 | PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); | |||
|
646 | return NULL; | |||
|
647 | } | |||
|
648 | ||||
|
649 | dctx = ZSTD_createDCtx(); | |||
|
650 | if (!dctx) { | |||
|
651 | PyErr_NoMemory(); | |||
|
652 | goto finally; | |||
|
653 | } | |||
|
654 | ||||
|
655 | buffer1Size = frameParams.frameContentSize; | |||
|
656 | buffer1 = PyMem_Malloc(buffer1Size); | |||
|
657 | if (!buffer1) { | |||
|
658 | goto finally; | |||
|
659 | } | |||
|
660 | ||||
|
661 | Py_BEGIN_ALLOW_THREADS | |||
|
662 | zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize); | |||
|
663 | Py_END_ALLOW_THREADS | |||
|
664 | if (ZSTD_isError(zresult)) { | |||
|
665 | PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); | |||
|
666 | goto finally; | |||
|
667 | } | |||
|
668 | ||||
|
669 | buffer1ContentSize = zresult; | |||
|
670 | ||||
|
671 | /* Special case of a simple chain. */ | |||
|
672 | if (1 == chunksLen) { | |||
|
673 | result = PyBytes_FromStringAndSize(buffer1, buffer1Size); | |||
|
674 | goto finally; | |||
|
675 | } | |||
|
676 | ||||
|
677 | /* This should ideally look at next chunk. But this is slightly simpler. */ | |||
|
678 | buffer2Size = frameParams.frameContentSize; | |||
|
679 | buffer2 = PyMem_Malloc(buffer2Size); | |||
|
680 | if (!buffer2) { | |||
|
681 | goto finally; | |||
|
682 | } | |||
|
683 | ||||
|
684 | /* For each subsequent chunk, use the previous fulltext as a content dictionary. | |||
|
685 | Our strategy is to have 2 buffers. One holds the previous fulltext (to be | |||
|
686 | used as a content dictionary) and the other holds the new fulltext. The | |||
|
687 | buffers grow when needed but never decrease in size. This limits the | |||
|
688 | memory allocator overhead. | |||
|
689 | */ | |||
|
690 | for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) { | |||
|
691 | chunk = PyList_GetItem(chunks, chunkIndex); | |||
|
692 | if (!PyBytes_Check(chunk)) { | |||
|
693 | PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex); | |||
|
694 | goto finally; | |||
|
695 | } | |||
|
696 | ||||
|
697 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | |||
|
698 | zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); | |||
|
699 | if (ZSTD_isError(zresult)) { | |||
|
700 | PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); | |||
|
701 | goto finally; | |||
|
702 | } | |||
|
703 | else if (zresult) { | |||
|
704 | PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex); | |||
|
705 | goto finally; | |||
|
706 | } | |||
|
707 | ||||
|
708 | if (0 == frameParams.frameContentSize) { | |||
|
709 | PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); | |||
|
710 | goto finally; | |||
|
711 | } | |||
|
712 | ||||
|
713 | parity = chunkIndex % 2; | |||
|
714 | ||||
|
715 | /* This could definitely be abstracted to reduce code duplication. */ | |||
|
716 | if (parity) { | |||
|
717 | /* Resize destination buffer to hold larger content. */ | |||
|
718 | if (buffer2Size < frameParams.frameContentSize) { | |||
|
719 | buffer2Size = frameParams.frameContentSize; | |||
|
720 | destBuffer = PyMem_Realloc(buffer2, buffer2Size); | |||
|
721 | if (!destBuffer) { | |||
|
722 | goto finally; | |||
|
723 | } | |||
|
724 | buffer2 = destBuffer; | |||
|
725 | } | |||
|
726 | ||||
|
727 | Py_BEGIN_ALLOW_THREADS | |||
|
728 | zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size, | |||
|
729 | chunkData, chunkSize, buffer1, buffer1ContentSize); | |||
|
730 | Py_END_ALLOW_THREADS | |||
|
731 | if (ZSTD_isError(zresult)) { | |||
|
732 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | |||
|
733 | chunkIndex, ZSTD_getErrorName(zresult)); | |||
|
734 | goto finally; | |||
|
735 | } | |||
|
736 | buffer2ContentSize = zresult; | |||
|
737 | } | |||
|
738 | else { | |||
|
739 | if (buffer1Size < frameParams.frameContentSize) { | |||
|
740 | buffer1Size = frameParams.frameContentSize; | |||
|
741 | destBuffer = PyMem_Realloc(buffer1, buffer1Size); | |||
|
742 | if (!destBuffer) { | |||
|
743 | goto finally; | |||
|
744 | } | |||
|
745 | buffer1 = destBuffer; | |||
|
746 | } | |||
|
747 | ||||
|
748 | Py_BEGIN_ALLOW_THREADS | |||
|
749 | zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size, | |||
|
750 | chunkData, chunkSize, buffer2, buffer2ContentSize); | |||
|
751 | Py_END_ALLOW_THREADS | |||
|
752 | if (ZSTD_isError(zresult)) { | |||
|
753 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | |||
|
754 | chunkIndex, ZSTD_getErrorName(zresult)); | |||
|
755 | goto finally; | |||
|
756 | } | |||
|
757 | buffer1ContentSize = zresult; | |||
|
758 | } | |||
|
759 | } | |||
|
760 | ||||
|
761 | result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1, | |||
|
762 | parity ? buffer2ContentSize : buffer1ContentSize); | |||
|
763 | ||||
|
764 | finally: | |||
|
765 | if (buffer2) { | |||
|
766 | PyMem_Free(buffer2); | |||
|
767 | } | |||
|
768 | if (buffer1) { | |||
|
769 | PyMem_Free(buffer1); | |||
|
770 | } | |||
|
771 | ||||
|
772 | if (dctx) { | |||
|
773 | ZSTD_freeDCtx(dctx); | |||
|
774 | } | |||
|
775 | ||||
|
776 | return result; | |||
|
777 | } | |||
|
778 | ||||
608 | static PyMethodDef Decompressor_methods[] = { |
|
779 | static PyMethodDef Decompressor_methods[] = { | |
609 | { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS, |
|
780 | { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS, | |
610 | Decompressor_copy_stream__doc__ }, |
|
781 | Decompressor_copy_stream__doc__ }, | |
@@ -616,6 +787,8 b' static PyMethodDef Decompressor_methods[' | |||||
616 | Decompressor_read_from__doc__ }, |
|
787 | Decompressor_read_from__doc__ }, | |
617 | { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, |
|
788 | { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, | |
618 | Decompressor_write_to__doc__ }, |
|
789 | Decompressor_write_to__doc__ }, | |
|
790 | { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, | |||
|
791 | METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, | |||
619 | { NULL, NULL } |
|
792 | { NULL, NULL } | |
620 | }; |
|
793 | }; | |
621 |
|
794 |
@@ -18,8 +18,8 b' static PyObject* DictParameters_new(PyTy' | |||||
18 | unsigned notificationLevel; |
|
18 | unsigned notificationLevel; | |
19 | unsigned dictID; |
|
19 | unsigned dictID; | |
20 |
|
20 | |||
21 | if (!PyArg_ParseTuple(args, "IiII", &selectivityLevel, &compressionLevel, |
|
21 | if (!PyArg_ParseTuple(args, "IiII:DictParameters", | |
22 | ¬ificationLevel, &dictID)) { |
|
22 | &selectivityLevel, &compressionLevel, ¬ificationLevel, &dictID)) { | |
23 | return NULL; |
|
23 | return NULL; | |
24 | } |
|
24 | } | |
25 |
|
25 | |||
@@ -40,6 +40,22 b' static void DictParameters_dealloc(PyObj' | |||||
40 | PyObject_Del(self); |
|
40 | PyObject_Del(self); | |
41 | } |
|
41 | } | |
42 |
|
42 | |||
|
43 | static PyMemberDef DictParameters_members[] = { | |||
|
44 | { "selectivity_level", T_UINT, | |||
|
45 | offsetof(DictParametersObject, selectivityLevel), READONLY, | |||
|
46 | "selectivity level" }, | |||
|
47 | { "compression_level", T_INT, | |||
|
48 | offsetof(DictParametersObject, compressionLevel), READONLY, | |||
|
49 | "compression level" }, | |||
|
50 | { "notification_level", T_UINT, | |||
|
51 | offsetof(DictParametersObject, notificationLevel), READONLY, | |||
|
52 | "notification level" }, | |||
|
53 | { "dict_id", T_UINT, | |||
|
54 | offsetof(DictParametersObject, dictID), READONLY, | |||
|
55 | "dictionary ID" }, | |||
|
56 | { NULL } | |||
|
57 | }; | |||
|
58 | ||||
43 | static Py_ssize_t DictParameters_length(PyObject* self) { |
|
59 | static Py_ssize_t DictParameters_length(PyObject* self) { | |
44 | return 4; |
|
60 | return 4; | |
45 | } |
|
61 | } | |
@@ -102,7 +118,7 b' PyTypeObject DictParametersType = {' | |||||
102 | 0, /* tp_iter */ |
|
118 | 0, /* tp_iter */ | |
103 | 0, /* tp_iternext */ |
|
119 | 0, /* tp_iternext */ | |
104 | 0, /* tp_methods */ |
|
120 | 0, /* tp_methods */ | |
105 | 0, /* tp_members */ |
|
121 | DictParameters_members, /* tp_members */ | |
106 | 0, /* tp_getset */ |
|
122 | 0, /* tp_getset */ | |
107 | 0, /* tp_base */ |
|
123 | 0, /* tp_base */ | |
108 | 0, /* tp_dict */ |
|
124 | 0, /* tp_dict */ |
@@ -8,6 +8,7 b'' | |||||
8 |
|
8 | |||
9 | #define PY_SSIZE_T_CLEAN |
|
9 | #define PY_SSIZE_T_CLEAN | |
10 | #include <Python.h> |
|
10 | #include <Python.h> | |
|
11 | #include "structmember.h" | |||
11 |
|
12 | |||
12 | #define ZSTD_STATIC_LINKING_ONLY |
|
13 | #define ZSTD_STATIC_LINKING_ONLY | |
13 | #define ZDICT_STATIC_LINKING_ONLY |
|
14 | #define ZDICT_STATIC_LINKING_ONLY | |
@@ -15,7 +16,7 b'' | |||||
15 | #include "zstd.h" |
|
16 | #include "zstd.h" | |
16 | #include "zdict.h" |
|
17 | #include "zdict.h" | |
17 |
|
18 | |||
18 |
#define PYTHON_ZSTANDARD_VERSION "0. |
|
19 | #define PYTHON_ZSTANDARD_VERSION "0.7.0" | |
19 |
|
20 | |||
20 | typedef enum { |
|
21 | typedef enum { | |
21 | compressorobj_flush_finish, |
|
22 | compressorobj_flush_finish, | |
@@ -37,6 +38,16 b' extern PyTypeObject CompressionParameter' | |||||
37 |
|
38 | |||
38 | typedef struct { |
|
39 | typedef struct { | |
39 | PyObject_HEAD |
|
40 | PyObject_HEAD | |
|
41 | unsigned long long frameContentSize; | |||
|
42 | unsigned windowSize; | |||
|
43 | unsigned dictID; | |||
|
44 | char checksumFlag; | |||
|
45 | } FrameParametersObject; | |||
|
46 | ||||
|
47 | extern PyTypeObject FrameParametersType; | |||
|
48 | ||||
|
49 | typedef struct { | |||
|
50 | PyObject_HEAD | |||
40 | unsigned selectivityLevel; |
|
51 | unsigned selectivityLevel; | |
41 | int compressionLevel; |
|
52 | int compressionLevel; | |
42 | unsigned notificationLevel; |
|
53 | unsigned notificationLevel; | |
@@ -115,7 +126,7 b' extern PyTypeObject ZstdCompressorIterat' | |||||
115 | typedef struct { |
|
126 | typedef struct { | |
116 | PyObject_HEAD |
|
127 | PyObject_HEAD | |
117 |
|
128 | |||
118 |
ZSTD_DCtx* |
|
129 | ZSTD_DCtx* dctx; | |
119 |
|
130 | |||
120 | ZstdCompressionDict* dict; |
|
131 | ZstdCompressionDict* dict; | |
121 | ZSTD_DDict* ddict; |
|
132 | ZSTD_DDict* ddict; | |
@@ -172,6 +183,7 b' typedef struct {' | |||||
172 |
|
183 | |||
173 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams); |
|
184 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams); | |
174 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args); |
|
185 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args); | |
|
186 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args); | |||
175 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args); |
|
187 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args); | |
176 | ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize); |
|
188 | ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize); | |
177 | ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor); |
|
189 | ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor); |
@@ -9,6 +9,7 b' from __future__ import absolute_import' | |||||
9 | import cffi |
|
9 | import cffi | |
10 | import distutils.ccompiler |
|
10 | import distutils.ccompiler | |
11 | import os |
|
11 | import os | |
|
12 | import re | |||
12 | import subprocess |
|
13 | import subprocess | |
13 | import tempfile |
|
14 | import tempfile | |
14 |
|
15 | |||
@@ -19,6 +20,8 b" SOURCES = ['zstd/%s' % p for p in (" | |||||
19 | 'common/entropy_common.c', |
|
20 | 'common/entropy_common.c', | |
20 | 'common/error_private.c', |
|
21 | 'common/error_private.c', | |
21 | 'common/fse_decompress.c', |
|
22 | 'common/fse_decompress.c', | |
|
23 | 'common/pool.c', | |||
|
24 | 'common/threading.c', | |||
22 | 'common/xxhash.c', |
|
25 | 'common/xxhash.c', | |
23 | 'common/zstd_common.c', |
|
26 | 'common/zstd_common.c', | |
24 | 'compress/fse_compress.c', |
|
27 | 'compress/fse_compress.c', | |
@@ -26,10 +29,17 b" SOURCES = ['zstd/%s' % p for p in (" | |||||
26 | 'compress/zstd_compress.c', |
|
29 | 'compress/zstd_compress.c', | |
27 | 'decompress/huf_decompress.c', |
|
30 | 'decompress/huf_decompress.c', | |
28 | 'decompress/zstd_decompress.c', |
|
31 | 'decompress/zstd_decompress.c', | |
|
32 | 'dictBuilder/cover.c', | |||
29 | 'dictBuilder/divsufsort.c', |
|
33 | 'dictBuilder/divsufsort.c', | |
30 | 'dictBuilder/zdict.c', |
|
34 | 'dictBuilder/zdict.c', | |
31 | )] |
|
35 | )] | |
32 |
|
36 | |||
|
37 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( | |||
|
38 | ('zstd.h',), | |||
|
39 | ('common', 'pool.h'), | |||
|
40 | ('dictBuilder', 'zdict.h'), | |||
|
41 | )] | |||
|
42 | ||||
33 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( |
|
43 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( | |
34 | 'zstd', |
|
44 | 'zstd', | |
35 | 'zstd/common', |
|
45 | 'zstd/common', | |
@@ -53,56 +63,92 b" if compiler.compiler_type == 'unix':" | |||||
53 | args.extend([ |
|
63 | args.extend([ | |
54 | '-E', |
|
64 | '-E', | |
55 | '-DZSTD_STATIC_LINKING_ONLY', |
|
65 | '-DZSTD_STATIC_LINKING_ONLY', | |
|
66 | '-DZDICT_STATIC_LINKING_ONLY', | |||
56 | ]) |
|
67 | ]) | |
57 | elif compiler.compiler_type == 'msvc': |
|
68 | elif compiler.compiler_type == 'msvc': | |
58 | args = [compiler.cc] |
|
69 | args = [compiler.cc] | |
59 | args.extend([ |
|
70 | args.extend([ | |
60 | '/EP', |
|
71 | '/EP', | |
61 | '/DZSTD_STATIC_LINKING_ONLY', |
|
72 | '/DZSTD_STATIC_LINKING_ONLY', | |
|
73 | '/DZDICT_STATIC_LINKING_ONLY', | |||
62 | ]) |
|
74 | ]) | |
63 | else: |
|
75 | else: | |
64 | raise Exception('unsupported compiler type: %s' % compiler.compiler_type) |
|
76 | raise Exception('unsupported compiler type: %s' % compiler.compiler_type) | |
65 |
|
77 | |||
66 | # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate. |
|
78 | def preprocess(path): | |
67 | # This can lead to duplicate declarations. So we strip this include from the |
|
79 | # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate. | |
68 | # preprocessor invocation. |
|
80 | # This can lead to duplicate declarations. So we strip this include from the | |
|
81 | # preprocessor invocation. | |||
|
82 | with open(path, 'rb') as fh: | |||
|
83 | lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')] | |||
69 |
|
84 | |||
70 | with open(os.path.join(HERE, 'zstd', 'zstd.h'), 'rb') as fh: |
|
85 | fd, input_file = tempfile.mkstemp(suffix='.h') | |
71 | lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')] |
|
86 | os.write(fd, b''.join(lines)) | |
72 |
|
87 | os.close(fd) | ||
73 | fd, input_file = tempfile.mkstemp(suffix='.h') |
|
|||
74 | os.write(fd, b''.join(lines)) |
|
|||
75 | os.close(fd) |
|
|||
76 |
|
88 | |||
77 | args.append(input_file) |
|
89 | try: | |
|
90 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE) | |||
|
91 | output = process.communicate()[0] | |||
|
92 | ret = process.poll() | |||
|
93 | if ret: | |||
|
94 | raise Exception('preprocessor exited with error') | |||
78 |
|
95 | |||
79 | try: |
|
96 | return output | |
80 | process = subprocess.Popen(args, stdout=subprocess.PIPE) |
|
97 | finally: | |
81 | output = process.communicate()[0] |
|
98 | os.unlink(input_file) | |
82 | ret = process.poll() |
|
|||
83 | if ret: |
|
|||
84 | raise Exception('preprocessor exited with error') |
|
|||
85 | finally: |
|
|||
86 | os.unlink(input_file) |
|
|||
87 |
|
99 | |||
88 | def normalize_output(): |
|
100 | ||
|
101 | def normalize_output(output): | |||
89 | lines = [] |
|
102 | lines = [] | |
90 | for line in output.splitlines(): |
|
103 | for line in output.splitlines(): | |
91 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. |
|
104 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. | |
92 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): |
|
105 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): | |
93 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] |
|
106 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] | |
94 |
|
107 | |||
|
108 | if line.startswith(b'__attribute__((deprecated('): | |||
|
109 | continue | |||
|
110 | elif b'__declspec(deprecated(' in line: | |||
|
111 | continue | |||
|
112 | ||||
95 | lines.append(line) |
|
113 | lines.append(line) | |
96 |
|
114 | |||
97 | return b'\n'.join(lines) |
|
115 | return b'\n'.join(lines) | |
98 |
|
116 | |||
|
117 | ||||
99 | ffi = cffi.FFI() |
|
118 | ffi = cffi.FFI() | |
100 | ffi.set_source('_zstd_cffi', ''' |
|
119 | ffi.set_source('_zstd_cffi', ''' | |
|
120 | #include "mem.h" | |||
101 | #define ZSTD_STATIC_LINKING_ONLY |
|
121 | #define ZSTD_STATIC_LINKING_ONLY | |
102 | #include "zstd.h" |
|
122 | #include "zstd.h" | |
|
123 | #define ZDICT_STATIC_LINKING_ONLY | |||
|
124 | #include "pool.h" | |||
|
125 | #include "zdict.h" | |||
103 | ''', sources=SOURCES, include_dirs=INCLUDE_DIRS) |
|
126 | ''', sources=SOURCES, include_dirs=INCLUDE_DIRS) | |
104 |
|
127 | |||
105 | ffi.cdef(normalize_output().decode('latin1')) |
|
128 | DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ') | |
|
129 | ||||
|
130 | sources = [] | |||
|
131 | ||||
|
132 | for header in HEADERS: | |||
|
133 | preprocessed = preprocess(header) | |||
|
134 | sources.append(normalize_output(preprocessed)) | |||
|
135 | ||||
|
136 | # Do another pass over source and find constants that were preprocessed | |||
|
137 | # away. | |||
|
138 | with open(header, 'rb') as fh: | |||
|
139 | for line in fh: | |||
|
140 | line = line.strip() | |||
|
141 | m = DEFINE.match(line) | |||
|
142 | if not m: | |||
|
143 | continue | |||
|
144 | ||||
|
145 | # The parser doesn't like some constants with complex values. | |||
|
146 | if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'): | |||
|
147 | continue | |||
|
148 | ||||
|
149 | sources.append(m.group(0) + b' ...') | |||
|
150 | ||||
|
151 | ffi.cdef(u'\n'.join(s.decode('latin1') for s in sources)) | |||
106 |
|
152 | |||
107 | if __name__ == '__main__': |
|
153 | if __name__ == '__main__': | |
108 | ffi.compile() |
|
154 | ffi.compile() |
@@ -62,6 +62,7 b' setup(' | |||||
62 | 'Programming Language :: Python :: 3.3', |
|
62 | 'Programming Language :: Python :: 3.3', | |
63 | 'Programming Language :: Python :: 3.4', |
|
63 | 'Programming Language :: Python :: 3.4', | |
64 | 'Programming Language :: Python :: 3.5', |
|
64 | 'Programming Language :: Python :: 3.5', | |
|
65 | 'Programming Language :: Python :: 3.6', | |||
65 | ], |
|
66 | ], | |
66 | keywords='zstandard zstd compression', |
|
67 | keywords='zstandard zstd compression', | |
67 | ext_modules=extensions, |
|
68 | ext_modules=extensions, |
@@ -12,6 +12,8 b" zstd_sources = ['zstd/%s' % p for p in (" | |||||
12 | 'common/entropy_common.c', |
|
12 | 'common/entropy_common.c', | |
13 | 'common/error_private.c', |
|
13 | 'common/error_private.c', | |
14 | 'common/fse_decompress.c', |
|
14 | 'common/fse_decompress.c', | |
|
15 | 'common/pool.c', | |||
|
16 | 'common/threading.c', | |||
15 | 'common/xxhash.c', |
|
17 | 'common/xxhash.c', | |
16 | 'common/zstd_common.c', |
|
18 | 'common/zstd_common.c', | |
17 | 'compress/fse_compress.c', |
|
19 | 'compress/fse_compress.c', | |
@@ -19,11 +21,13 b" zstd_sources = ['zstd/%s' % p for p in (" | |||||
19 | 'compress/zstd_compress.c', |
|
21 | 'compress/zstd_compress.c', | |
20 | 'decompress/huf_decompress.c', |
|
22 | 'decompress/huf_decompress.c', | |
21 | 'decompress/zstd_decompress.c', |
|
23 | 'decompress/zstd_decompress.c', | |
|
24 | 'dictBuilder/cover.c', | |||
22 | 'dictBuilder/divsufsort.c', |
|
25 | 'dictBuilder/divsufsort.c', | |
23 | 'dictBuilder/zdict.c', |
|
26 | 'dictBuilder/zdict.c', | |
24 | )] |
|
27 | )] | |
25 |
|
28 | |||
26 | zstd_sources_legacy = ['zstd/%s' % p for p in ( |
|
29 | zstd_sources_legacy = ['zstd/%s' % p for p in ( | |
|
30 | 'deprecated/zbuff_common.c', | |||
27 | 'deprecated/zbuff_compress.c', |
|
31 | 'deprecated/zbuff_compress.c', | |
28 | 'deprecated/zbuff_decompress.c', |
|
32 | 'deprecated/zbuff_decompress.c', | |
29 | 'legacy/zstd_v01.c', |
|
33 | 'legacy/zstd_v01.c', | |
@@ -63,6 +67,7 b' ext_sources = [' | |||||
63 | 'c-ext/decompressoriterator.c', |
|
67 | 'c-ext/decompressoriterator.c', | |
64 | 'c-ext/decompressionwriter.c', |
|
68 | 'c-ext/decompressionwriter.c', | |
65 | 'c-ext/dictparams.c', |
|
69 | 'c-ext/dictparams.c', | |
|
70 | 'c-ext/frameparams.c', | |||
66 | ] |
|
71 | ] | |
67 |
|
72 | |||
68 | zstd_depends = [ |
|
73 | zstd_depends = [ |
@@ -1,4 +1,50 b'' | |||||
|
1 | import inspect | |||
1 | import io |
|
2 | import io | |
|
3 | import types | |||
|
4 | ||||
|
5 | ||||
|
6 | def make_cffi(cls): | |||
|
7 | """Decorator to add CFFI versions of each test method.""" | |||
|
8 | ||||
|
9 | try: | |||
|
10 | import zstd_cffi | |||
|
11 | except ImportError: | |||
|
12 | return cls | |||
|
13 | ||||
|
14 | # If CFFI version is available, dynamically construct test methods | |||
|
15 | # that use it. | |||
|
16 | ||||
|
17 | for attr in dir(cls): | |||
|
18 | fn = getattr(cls, attr) | |||
|
19 | if not inspect.ismethod(fn) and not inspect.isfunction(fn): | |||
|
20 | continue | |||
|
21 | ||||
|
22 | if not fn.__name__.startswith('test_'): | |||
|
23 | continue | |||
|
24 | ||||
|
25 | name = '%s_cffi' % fn.__name__ | |||
|
26 | ||||
|
27 | # Replace the "zstd" symbol with the CFFI module instance. Then copy | |||
|
28 | # the function object and install it in a new attribute. | |||
|
29 | if isinstance(fn, types.FunctionType): | |||
|
30 | globs = dict(fn.__globals__) | |||
|
31 | globs['zstd'] = zstd_cffi | |||
|
32 | new_fn = types.FunctionType(fn.__code__, globs, name, | |||
|
33 | fn.__defaults__, fn.__closure__) | |||
|
34 | new_method = new_fn | |||
|
35 | else: | |||
|
36 | globs = dict(fn.__func__.func_globals) | |||
|
37 | globs['zstd'] = zstd_cffi | |||
|
38 | new_fn = types.FunctionType(fn.__func__.func_code, globs, name, | |||
|
39 | fn.__func__.func_defaults, | |||
|
40 | fn.__func__.func_closure) | |||
|
41 | new_method = types.UnboundMethodType(new_fn, fn.im_self, | |||
|
42 | fn.im_class) | |||
|
43 | ||||
|
44 | setattr(cls, name, new_method) | |||
|
45 | ||||
|
46 | return cls | |||
|
47 | ||||
2 |
|
48 | |||
3 | class OpCountingBytesIO(io.BytesIO): |
|
49 | class OpCountingBytesIO(io.BytesIO): | |
4 | def __init__(self, *args, **kwargs): |
|
50 | def __init__(self, *args, **kwargs): |
@@ -10,7 +10,10 b' except ImportError:' | |||||
10 |
|
10 | |||
11 | import zstd |
|
11 | import zstd | |
12 |
|
12 | |||
13 |
from .common import |
|
13 | from .common import ( | |
|
14 | make_cffi, | |||
|
15 | OpCountingBytesIO, | |||
|
16 | ) | |||
14 |
|
17 | |||
15 |
|
18 | |||
16 | if sys.version_info[0] >= 3: |
|
19 | if sys.version_info[0] >= 3: | |
@@ -19,6 +22,7 b' else:' | |||||
19 | next = lambda it: it.next() |
|
22 | next = lambda it: it.next() | |
20 |
|
23 | |||
21 |
|
24 | |||
|
25 | @make_cffi | |||
22 | class TestCompressor(unittest.TestCase): |
|
26 | class TestCompressor(unittest.TestCase): | |
23 | def test_level_bounds(self): |
|
27 | def test_level_bounds(self): | |
24 | with self.assertRaises(ValueError): |
|
28 | with self.assertRaises(ValueError): | |
@@ -28,18 +32,17 b' class TestCompressor(unittest.TestCase):' | |||||
28 | zstd.ZstdCompressor(level=23) |
|
32 | zstd.ZstdCompressor(level=23) | |
29 |
|
33 | |||
30 |
|
34 | |||
|
35 | @make_cffi | |||
31 | class TestCompressor_compress(unittest.TestCase): |
|
36 | class TestCompressor_compress(unittest.TestCase): | |
32 | def test_compress_empty(self): |
|
37 | def test_compress_empty(self): | |
33 | cctx = zstd.ZstdCompressor(level=1) |
|
38 | cctx = zstd.ZstdCompressor(level=1) | |
34 | cctx.compress(b'') |
|
39 | result = cctx.compress(b'') | |
35 |
|
40 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | ||
36 | cctx = zstd.ZstdCompressor(level=22) |
|
41 | params = zstd.get_frame_parameters(result) | |
37 | cctx.compress(b'') |
|
42 | self.assertEqual(params.content_size, 0) | |
38 |
|
43 | self.assertEqual(params.window_size, 524288) | ||
39 | def test_compress_empty(self): |
|
44 | self.assertEqual(params.dict_id, 0) | |
40 | cctx = zstd.ZstdCompressor(level=1) |
|
45 | self.assertFalse(params.has_checksum, 0) | |
41 | self.assertEqual(cctx.compress(b''), |
|
|||
42 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
|||
43 |
|
46 | |||
44 | # TODO should be temporary until https://github.com/facebook/zstd/issues/506 |
|
47 | # TODO should be temporary until https://github.com/facebook/zstd/issues/506 | |
45 | # is fixed. |
|
48 | # is fixed. | |
@@ -59,6 +62,13 b' class TestCompressor_compress(unittest.T' | |||||
59 | self.assertEqual(len(result), 999) |
|
62 | self.assertEqual(len(result), 999) | |
60 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
63 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') | |
61 |
|
64 | |||
|
65 | # This matches the test for read_from() below. | |||
|
66 | cctx = zstd.ZstdCompressor(level=1) | |||
|
67 | result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') | |||
|
68 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' | |||
|
69 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' | |||
|
70 | b'\x02\x09\x00\x00\x6f') | |||
|
71 | ||||
62 | def test_write_checksum(self): |
|
72 | def test_write_checksum(self): | |
63 | cctx = zstd.ZstdCompressor(level=1) |
|
73 | cctx = zstd.ZstdCompressor(level=1) | |
64 | no_checksum = cctx.compress(b'foobar') |
|
74 | no_checksum = cctx.compress(b'foobar') | |
@@ -67,6 +77,12 b' class TestCompressor_compress(unittest.T' | |||||
67 |
|
77 | |||
68 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
78 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) | |
69 |
|
79 | |||
|
80 | no_params = zstd.get_frame_parameters(no_checksum) | |||
|
81 | with_params = zstd.get_frame_parameters(with_checksum) | |||
|
82 | ||||
|
83 | self.assertFalse(no_params.has_checksum) | |||
|
84 | self.assertTrue(with_params.has_checksum) | |||
|
85 | ||||
70 | def test_write_content_size(self): |
|
86 | def test_write_content_size(self): | |
71 | cctx = zstd.ZstdCompressor(level=1) |
|
87 | cctx = zstd.ZstdCompressor(level=1) | |
72 | no_size = cctx.compress(b'foobar' * 256) |
|
88 | no_size = cctx.compress(b'foobar' * 256) | |
@@ -75,6 +91,11 b' class TestCompressor_compress(unittest.T' | |||||
75 |
|
91 | |||
76 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
92 | self.assertEqual(len(with_size), len(no_size) + 1) | |
77 |
|
93 | |||
|
94 | no_params = zstd.get_frame_parameters(no_size) | |||
|
95 | with_params = zstd.get_frame_parameters(with_size) | |||
|
96 | self.assertEqual(no_params.content_size, 0) | |||
|
97 | self.assertEqual(with_params.content_size, 1536) | |||
|
98 | ||||
78 | def test_no_dict_id(self): |
|
99 | def test_no_dict_id(self): | |
79 | samples = [] |
|
100 | samples = [] | |
80 | for i in range(128): |
|
101 | for i in range(128): | |
@@ -92,6 +113,11 b' class TestCompressor_compress(unittest.T' | |||||
92 |
|
113 | |||
93 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) |
|
114 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) | |
94 |
|
115 | |||
|
116 | no_params = zstd.get_frame_parameters(no_dict_id) | |||
|
117 | with_params = zstd.get_frame_parameters(with_dict_id) | |||
|
118 | self.assertEqual(no_params.dict_id, 0) | |||
|
119 | self.assertEqual(with_params.dict_id, 1584102229) | |||
|
120 | ||||
95 | def test_compress_dict_multiple(self): |
|
121 | def test_compress_dict_multiple(self): | |
96 | samples = [] |
|
122 | samples = [] | |
97 | for i in range(128): |
|
123 | for i in range(128): | |
@@ -107,6 +133,7 b' class TestCompressor_compress(unittest.T' | |||||
107 | cctx.compress(b'foo bar foobar foo bar foobar') |
|
133 | cctx.compress(b'foo bar foobar foo bar foobar') | |
108 |
|
134 | |||
109 |
|
135 | |||
|
136 | @make_cffi | |||
110 | class TestCompressor_compressobj(unittest.TestCase): |
|
137 | class TestCompressor_compressobj(unittest.TestCase): | |
111 | def test_compressobj_empty(self): |
|
138 | def test_compressobj_empty(self): | |
112 | cctx = zstd.ZstdCompressor(level=1) |
|
139 | cctx = zstd.ZstdCompressor(level=1) | |
@@ -127,6 +154,12 b' class TestCompressor_compressobj(unittes' | |||||
127 | self.assertEqual(len(result), 999) |
|
154 | self.assertEqual(len(result), 999) | |
128 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
155 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') | |
129 |
|
156 | |||
|
157 | params = zstd.get_frame_parameters(result) | |||
|
158 | self.assertEqual(params.content_size, 0) | |||
|
159 | self.assertEqual(params.window_size, 1048576) | |||
|
160 | self.assertEqual(params.dict_id, 0) | |||
|
161 | self.assertFalse(params.has_checksum) | |||
|
162 | ||||
130 | def test_write_checksum(self): |
|
163 | def test_write_checksum(self): | |
131 | cctx = zstd.ZstdCompressor(level=1) |
|
164 | cctx = zstd.ZstdCompressor(level=1) | |
132 | cobj = cctx.compressobj() |
|
165 | cobj = cctx.compressobj() | |
@@ -135,6 +168,15 b' class TestCompressor_compressobj(unittes' | |||||
135 | cobj = cctx.compressobj() |
|
168 | cobj = cctx.compressobj() | |
136 | with_checksum = cobj.compress(b'foobar') + cobj.flush() |
|
169 | with_checksum = cobj.compress(b'foobar') + cobj.flush() | |
137 |
|
170 | |||
|
171 | no_params = zstd.get_frame_parameters(no_checksum) | |||
|
172 | with_params = zstd.get_frame_parameters(with_checksum) | |||
|
173 | self.assertEqual(no_params.content_size, 0) | |||
|
174 | self.assertEqual(with_params.content_size, 0) | |||
|
175 | self.assertEqual(no_params.dict_id, 0) | |||
|
176 | self.assertEqual(with_params.dict_id, 0) | |||
|
177 | self.assertFalse(no_params.has_checksum) | |||
|
178 | self.assertTrue(with_params.has_checksum) | |||
|
179 | ||||
138 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
180 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) | |
139 |
|
181 | |||
140 | def test_write_content_size(self): |
|
182 | def test_write_content_size(self): | |
@@ -145,6 +187,15 b' class TestCompressor_compressobj(unittes' | |||||
145 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) |
|
187 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) | |
146 | with_size = cobj.compress(b'foobar' * 256) + cobj.flush() |
|
188 | with_size = cobj.compress(b'foobar' * 256) + cobj.flush() | |
147 |
|
189 | |||
|
190 | no_params = zstd.get_frame_parameters(no_size) | |||
|
191 | with_params = zstd.get_frame_parameters(with_size) | |||
|
192 | self.assertEqual(no_params.content_size, 0) | |||
|
193 | self.assertEqual(with_params.content_size, 1536) | |||
|
194 | self.assertEqual(no_params.dict_id, 0) | |||
|
195 | self.assertEqual(with_params.dict_id, 0) | |||
|
196 | self.assertFalse(no_params.has_checksum) | |||
|
197 | self.assertFalse(with_params.has_checksum) | |||
|
198 | ||||
148 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
199 | self.assertEqual(len(with_size), len(no_size) + 1) | |
149 |
|
200 | |||
150 | def test_compress_after_finished(self): |
|
201 | def test_compress_after_finished(self): | |
@@ -187,6 +238,7 b' class TestCompressor_compressobj(unittes' | |||||
187 | self.assertEqual(header, b'\x01\x00\x00') |
|
238 | self.assertEqual(header, b'\x01\x00\x00') | |
188 |
|
239 | |||
189 |
|
240 | |||
|
241 | @make_cffi | |||
190 | class TestCompressor_copy_stream(unittest.TestCase): |
|
242 | class TestCompressor_copy_stream(unittest.TestCase): | |
191 | def test_no_read(self): |
|
243 | def test_no_read(self): | |
192 | source = object() |
|
244 | source = object() | |
@@ -229,6 +281,12 b' class TestCompressor_copy_stream(unittes' | |||||
229 | self.assertEqual(r, 255 * 16384) |
|
281 | self.assertEqual(r, 255 * 16384) | |
230 | self.assertEqual(w, 999) |
|
282 | self.assertEqual(w, 999) | |
231 |
|
283 | |||
|
284 | params = zstd.get_frame_parameters(dest.getvalue()) | |||
|
285 | self.assertEqual(params.content_size, 0) | |||
|
286 | self.assertEqual(params.window_size, 1048576) | |||
|
287 | self.assertEqual(params.dict_id, 0) | |||
|
288 | self.assertFalse(params.has_checksum) | |||
|
289 | ||||
232 | def test_write_checksum(self): |
|
290 | def test_write_checksum(self): | |
233 | source = io.BytesIO(b'foobar') |
|
291 | source = io.BytesIO(b'foobar') | |
234 | no_checksum = io.BytesIO() |
|
292 | no_checksum = io.BytesIO() | |
@@ -244,6 +302,15 b' class TestCompressor_copy_stream(unittes' | |||||
244 | self.assertEqual(len(with_checksum.getvalue()), |
|
302 | self.assertEqual(len(with_checksum.getvalue()), | |
245 | len(no_checksum.getvalue()) + 4) |
|
303 | len(no_checksum.getvalue()) + 4) | |
246 |
|
304 | |||
|
305 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |||
|
306 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |||
|
307 | self.assertEqual(no_params.content_size, 0) | |||
|
308 | self.assertEqual(with_params.content_size, 0) | |||
|
309 | self.assertEqual(no_params.dict_id, 0) | |||
|
310 | self.assertEqual(with_params.dict_id, 0) | |||
|
311 | self.assertFalse(no_params.has_checksum) | |||
|
312 | self.assertTrue(with_params.has_checksum) | |||
|
313 | ||||
247 | def test_write_content_size(self): |
|
314 | def test_write_content_size(self): | |
248 | source = io.BytesIO(b'foobar' * 256) |
|
315 | source = io.BytesIO(b'foobar' * 256) | |
249 | no_size = io.BytesIO() |
|
316 | no_size = io.BytesIO() | |
@@ -268,6 +335,15 b' class TestCompressor_copy_stream(unittes' | |||||
268 | self.assertEqual(len(with_size.getvalue()), |
|
335 | self.assertEqual(len(with_size.getvalue()), | |
269 | len(no_size.getvalue()) + 1) |
|
336 | len(no_size.getvalue()) + 1) | |
270 |
|
337 | |||
|
338 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |||
|
339 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |||
|
340 | self.assertEqual(no_params.content_size, 0) | |||
|
341 | self.assertEqual(with_params.content_size, 1536) | |||
|
342 | self.assertEqual(no_params.dict_id, 0) | |||
|
343 | self.assertEqual(with_params.dict_id, 0) | |||
|
344 | self.assertFalse(no_params.has_checksum) | |||
|
345 | self.assertFalse(with_params.has_checksum) | |||
|
346 | ||||
271 | def test_read_write_size(self): |
|
347 | def test_read_write_size(self): | |
272 | source = OpCountingBytesIO(b'foobarfoobar') |
|
348 | source = OpCountingBytesIO(b'foobarfoobar') | |
273 | dest = OpCountingBytesIO() |
|
349 | dest = OpCountingBytesIO() | |
@@ -288,18 +364,25 b' def compress(data, level):' | |||||
288 | return buffer.getvalue() |
|
364 | return buffer.getvalue() | |
289 |
|
365 | |||
290 |
|
366 | |||
|
367 | @make_cffi | |||
291 | class TestCompressor_write_to(unittest.TestCase): |
|
368 | class TestCompressor_write_to(unittest.TestCase): | |
292 | def test_empty(self): |
|
369 | def test_empty(self): | |
293 |
|
|
370 | result = compress(b'', 1) | |
294 |
|
|
371 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
|
372 | ||||
|
373 | params = zstd.get_frame_parameters(result) | |||
|
374 | self.assertEqual(params.content_size, 0) | |||
|
375 | self.assertEqual(params.window_size, 524288) | |||
|
376 | self.assertEqual(params.dict_id, 0) | |||
|
377 | self.assertFalse(params.has_checksum) | |||
295 |
|
378 | |||
296 | def test_multiple_compress(self): |
|
379 | def test_multiple_compress(self): | |
297 | buffer = io.BytesIO() |
|
380 | buffer = io.BytesIO() | |
298 | cctx = zstd.ZstdCompressor(level=5) |
|
381 | cctx = zstd.ZstdCompressor(level=5) | |
299 | with cctx.write_to(buffer) as compressor: |
|
382 | with cctx.write_to(buffer) as compressor: | |
300 | compressor.write(b'foo') |
|
383 | self.assertEqual(compressor.write(b'foo'), 0) | |
301 | compressor.write(b'bar') |
|
384 | self.assertEqual(compressor.write(b'bar'), 0) | |
302 | compressor.write(b'x' * 8192) |
|
385 | self.assertEqual(compressor.write(b'x' * 8192), 0) | |
303 |
|
386 | |||
304 | result = buffer.getvalue() |
|
387 | result = buffer.getvalue() | |
305 | self.assertEqual(result, |
|
388 | self.assertEqual(result, | |
@@ -318,11 +401,23 b' class TestCompressor_write_to(unittest.T' | |||||
318 | buffer = io.BytesIO() |
|
401 | buffer = io.BytesIO() | |
319 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
402 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) | |
320 | with cctx.write_to(buffer) as compressor: |
|
403 | with cctx.write_to(buffer) as compressor: | |
321 | compressor.write(b'foo') |
|
404 | self.assertEqual(compressor.write(b'foo'), 0) | |
322 | compressor.write(b'bar') |
|
405 | self.assertEqual(compressor.write(b'bar'), 0) | |
323 | compressor.write(b'foo' * 16384) |
|
406 | self.assertEqual(compressor.write(b'foo' * 16384), 634) | |
324 |
|
407 | |||
325 | compressed = buffer.getvalue() |
|
408 | compressed = buffer.getvalue() | |
|
409 | ||||
|
410 | params = zstd.get_frame_parameters(compressed) | |||
|
411 | self.assertEqual(params.content_size, 0) | |||
|
412 | self.assertEqual(params.window_size, 1024) | |||
|
413 | self.assertEqual(params.dict_id, d.dict_id()) | |||
|
414 | self.assertFalse(params.has_checksum) | |||
|
415 | ||||
|
416 | self.assertEqual(compressed[0:32], | |||
|
417 | b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00' | |||
|
418 | b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54' | |||
|
419 | b'\x00\x00\x18\x6f\x6f\x66\x01\x00') | |||
|
420 | ||||
326 | h = hashlib.sha1(compressed).hexdigest() |
|
421 | h = hashlib.sha1(compressed).hexdigest() | |
327 | self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92') |
|
422 | self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92') | |
328 |
|
423 | |||
@@ -332,11 +427,18 b' class TestCompressor_write_to(unittest.T' | |||||
332 | buffer = io.BytesIO() |
|
427 | buffer = io.BytesIO() | |
333 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
428 | cctx = zstd.ZstdCompressor(compression_params=params) | |
334 | with cctx.write_to(buffer) as compressor: |
|
429 | with cctx.write_to(buffer) as compressor: | |
335 | compressor.write(b'foo') |
|
430 | self.assertEqual(compressor.write(b'foo'), 0) | |
336 | compressor.write(b'bar') |
|
431 | self.assertEqual(compressor.write(b'bar'), 0) | |
337 | compressor.write(b'foobar' * 16384) |
|
432 | self.assertEqual(compressor.write(b'foobar' * 16384), 0) | |
338 |
|
433 | |||
339 | compressed = buffer.getvalue() |
|
434 | compressed = buffer.getvalue() | |
|
435 | ||||
|
436 | params = zstd.get_frame_parameters(compressed) | |||
|
437 | self.assertEqual(params.content_size, 0) | |||
|
438 | self.assertEqual(params.window_size, 1048576) | |||
|
439 | self.assertEqual(params.dict_id, 0) | |||
|
440 | self.assertFalse(params.has_checksum) | |||
|
441 | ||||
340 | h = hashlib.sha1(compressed).hexdigest() |
|
442 | h = hashlib.sha1(compressed).hexdigest() | |
341 | self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99') |
|
443 | self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99') | |
342 |
|
444 | |||
@@ -344,12 +446,21 b' class TestCompressor_write_to(unittest.T' | |||||
344 | no_checksum = io.BytesIO() |
|
446 | no_checksum = io.BytesIO() | |
345 | cctx = zstd.ZstdCompressor(level=1) |
|
447 | cctx = zstd.ZstdCompressor(level=1) | |
346 | with cctx.write_to(no_checksum) as compressor: |
|
448 | with cctx.write_to(no_checksum) as compressor: | |
347 | compressor.write(b'foobar') |
|
449 | self.assertEqual(compressor.write(b'foobar'), 0) | |
348 |
|
450 | |||
349 | with_checksum = io.BytesIO() |
|
451 | with_checksum = io.BytesIO() | |
350 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
452 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
351 | with cctx.write_to(with_checksum) as compressor: |
|
453 | with cctx.write_to(with_checksum) as compressor: | |
352 | compressor.write(b'foobar') |
|
454 | self.assertEqual(compressor.write(b'foobar'), 0) | |
|
455 | ||||
|
456 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |||
|
457 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |||
|
458 | self.assertEqual(no_params.content_size, 0) | |||
|
459 | self.assertEqual(with_params.content_size, 0) | |||
|
460 | self.assertEqual(no_params.dict_id, 0) | |||
|
461 | self.assertEqual(with_params.dict_id, 0) | |||
|
462 | self.assertFalse(no_params.has_checksum) | |||
|
463 | self.assertTrue(with_params.has_checksum) | |||
353 |
|
464 | |||
354 | self.assertEqual(len(with_checksum.getvalue()), |
|
465 | self.assertEqual(len(with_checksum.getvalue()), | |
355 | len(no_checksum.getvalue()) + 4) |
|
466 | len(no_checksum.getvalue()) + 4) | |
@@ -358,12 +469,12 b' class TestCompressor_write_to(unittest.T' | |||||
358 | no_size = io.BytesIO() |
|
469 | no_size = io.BytesIO() | |
359 | cctx = zstd.ZstdCompressor(level=1) |
|
470 | cctx = zstd.ZstdCompressor(level=1) | |
360 | with cctx.write_to(no_size) as compressor: |
|
471 | with cctx.write_to(no_size) as compressor: | |
361 | compressor.write(b'foobar' * 256) |
|
472 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
362 |
|
473 | |||
363 | with_size = io.BytesIO() |
|
474 | with_size = io.BytesIO() | |
364 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) |
|
475 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) | |
365 | with cctx.write_to(with_size) as compressor: |
|
476 | with cctx.write_to(with_size) as compressor: | |
366 | compressor.write(b'foobar' * 256) |
|
477 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
367 |
|
478 | |||
368 | # Source size is not known in streaming mode, so header not |
|
479 | # Source size is not known in streaming mode, so header not | |
369 | # written. |
|
480 | # written. | |
@@ -373,7 +484,16 b' class TestCompressor_write_to(unittest.T' | |||||
373 | # Declaring size will write the header. |
|
484 | # Declaring size will write the header. | |
374 | with_size = io.BytesIO() |
|
485 | with_size = io.BytesIO() | |
375 | with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor: |
|
486 | with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor: | |
376 | compressor.write(b'foobar' * 256) |
|
487 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
|
488 | ||||
|
489 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |||
|
490 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |||
|
491 | self.assertEqual(no_params.content_size, 0) | |||
|
492 | self.assertEqual(with_params.content_size, 1536) | |||
|
493 | self.assertEqual(no_params.dict_id, 0) | |||
|
494 | self.assertEqual(with_params.dict_id, 0) | |||
|
495 | self.assertFalse(no_params.has_checksum) | |||
|
496 | self.assertFalse(with_params.has_checksum) | |||
377 |
|
497 | |||
378 | self.assertEqual(len(with_size.getvalue()), |
|
498 | self.assertEqual(len(with_size.getvalue()), | |
379 | len(no_size.getvalue()) + 1) |
|
499 | len(no_size.getvalue()) + 1) | |
@@ -390,12 +510,21 b' class TestCompressor_write_to(unittest.T' | |||||
390 | with_dict_id = io.BytesIO() |
|
510 | with_dict_id = io.BytesIO() | |
391 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
511 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
392 | with cctx.write_to(with_dict_id) as compressor: |
|
512 | with cctx.write_to(with_dict_id) as compressor: | |
393 | compressor.write(b'foobarfoobar') |
|
513 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) | |
394 |
|
514 | |||
395 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
515 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) | |
396 | no_dict_id = io.BytesIO() |
|
516 | no_dict_id = io.BytesIO() | |
397 | with cctx.write_to(no_dict_id) as compressor: |
|
517 | with cctx.write_to(no_dict_id) as compressor: | |
398 | compressor.write(b'foobarfoobar') |
|
518 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) | |
|
519 | ||||
|
520 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) | |||
|
521 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) | |||
|
522 | self.assertEqual(no_params.content_size, 0) | |||
|
523 | self.assertEqual(with_params.content_size, 0) | |||
|
524 | self.assertEqual(no_params.dict_id, 0) | |||
|
525 | self.assertEqual(with_params.dict_id, d.dict_id()) | |||
|
526 | self.assertFalse(no_params.has_checksum) | |||
|
527 | self.assertFalse(with_params.has_checksum) | |||
399 |
|
528 | |||
400 | self.assertEqual(len(with_dict_id.getvalue()), |
|
529 | self.assertEqual(len(with_dict_id.getvalue()), | |
401 | len(no_dict_id.getvalue()) + 4) |
|
530 | len(no_dict_id.getvalue()) + 4) | |
@@ -412,9 +541,9 b' class TestCompressor_write_to(unittest.T' | |||||
412 | cctx = zstd.ZstdCompressor(level=3) |
|
541 | cctx = zstd.ZstdCompressor(level=3) | |
413 | dest = OpCountingBytesIO() |
|
542 | dest = OpCountingBytesIO() | |
414 | with cctx.write_to(dest, write_size=1) as compressor: |
|
543 | with cctx.write_to(dest, write_size=1) as compressor: | |
415 | compressor.write(b'foo') |
|
544 | self.assertEqual(compressor.write(b'foo'), 0) | |
416 | compressor.write(b'bar') |
|
545 | self.assertEqual(compressor.write(b'bar'), 0) | |
417 | compressor.write(b'foobar') |
|
546 | self.assertEqual(compressor.write(b'foobar'), 0) | |
418 |
|
547 | |||
419 | self.assertEqual(len(dest.getvalue()), dest._write_count) |
|
548 | self.assertEqual(len(dest.getvalue()), dest._write_count) | |
420 |
|
549 | |||
@@ -422,15 +551,15 b' class TestCompressor_write_to(unittest.T' | |||||
422 | cctx = zstd.ZstdCompressor(level=3) |
|
551 | cctx = zstd.ZstdCompressor(level=3) | |
423 | dest = OpCountingBytesIO() |
|
552 | dest = OpCountingBytesIO() | |
424 | with cctx.write_to(dest) as compressor: |
|
553 | with cctx.write_to(dest) as compressor: | |
425 | compressor.write(b'foo') |
|
554 | self.assertEqual(compressor.write(b'foo'), 0) | |
426 | self.assertEqual(dest._write_count, 0) |
|
555 | self.assertEqual(dest._write_count, 0) | |
427 | compressor.flush() |
|
556 | self.assertEqual(compressor.flush(), 12) | |
428 | self.assertEqual(dest._write_count, 1) |
|
557 | self.assertEqual(dest._write_count, 1) | |
429 | compressor.write(b'bar') |
|
558 | self.assertEqual(compressor.write(b'bar'), 0) | |
430 | self.assertEqual(dest._write_count, 1) |
|
559 | self.assertEqual(dest._write_count, 1) | |
431 | compressor.flush() |
|
560 | self.assertEqual(compressor.flush(), 6) | |
432 | self.assertEqual(dest._write_count, 2) |
|
561 | self.assertEqual(dest._write_count, 2) | |
433 | compressor.write(b'baz') |
|
562 | self.assertEqual(compressor.write(b'baz'), 0) | |
434 |
|
563 | |||
435 | self.assertEqual(dest._write_count, 3) |
|
564 | self.assertEqual(dest._write_count, 3) | |
436 |
|
565 | |||
@@ -438,10 +567,10 b' class TestCompressor_write_to(unittest.T' | |||||
438 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) |
|
567 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) | |
439 | dest = OpCountingBytesIO() |
|
568 | dest = OpCountingBytesIO() | |
440 | with cctx.write_to(dest) as compressor: |
|
569 | with cctx.write_to(dest) as compressor: | |
441 | compressor.write(b'foobar' * 8192) |
|
570 | self.assertEqual(compressor.write(b'foobar' * 8192), 0) | |
442 | count = dest._write_count |
|
571 | count = dest._write_count | |
443 | offset = dest.tell() |
|
572 | offset = dest.tell() | |
444 | compressor.flush() |
|
573 | self.assertEqual(compressor.flush(), 23) | |
445 | self.assertGreater(dest._write_count, count) |
|
574 | self.assertGreater(dest._write_count, count) | |
446 | self.assertGreater(dest.tell(), offset) |
|
575 | self.assertGreater(dest.tell(), offset) | |
447 | offset = dest.tell() |
|
576 | offset = dest.tell() | |
@@ -456,18 +585,22 b' class TestCompressor_write_to(unittest.T' | |||||
456 | self.assertEqual(header, b'\x01\x00\x00') |
|
585 | self.assertEqual(header, b'\x01\x00\x00') | |
457 |
|
586 | |||
458 |
|
587 | |||
|
588 | @make_cffi | |||
459 | class TestCompressor_read_from(unittest.TestCase): |
|
589 | class TestCompressor_read_from(unittest.TestCase): | |
460 | def test_type_validation(self): |
|
590 | def test_type_validation(self): | |
461 | cctx = zstd.ZstdCompressor() |
|
591 | cctx = zstd.ZstdCompressor() | |
462 |
|
592 | |||
463 | # Object with read() works. |
|
593 | # Object with read() works. | |
464 | cctx.read_from(io.BytesIO()) |
|
594 | for chunk in cctx.read_from(io.BytesIO()): | |
|
595 | pass | |||
465 |
|
596 | |||
466 | # Buffer protocol works. |
|
597 | # Buffer protocol works. | |
467 | cctx.read_from(b'foobar') |
|
598 | for chunk in cctx.read_from(b'foobar'): | |
|
599 | pass | |||
468 |
|
600 | |||
469 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
601 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): | |
470 | cctx.read_from(True) |
|
602 | for chunk in cctx.read_from(True): | |
|
603 | pass | |||
471 |
|
604 | |||
472 | def test_read_empty(self): |
|
605 | def test_read_empty(self): | |
473 | cctx = zstd.ZstdCompressor(level=1) |
|
606 | cctx = zstd.ZstdCompressor(level=1) | |
@@ -521,6 +654,12 b' class TestCompressor_read_from(unittest.' | |||||
521 | # We should get the same output as the one-shot compression mechanism. |
|
654 | # We should get the same output as the one-shot compression mechanism. | |
522 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) |
|
655 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) | |
523 |
|
656 | |||
|
657 | params = zstd.get_frame_parameters(b''.join(chunks)) | |||
|
658 | self.assertEqual(params.content_size, 0) | |||
|
659 | self.assertEqual(params.window_size, 262144) | |||
|
660 | self.assertEqual(params.dict_id, 0) | |||
|
661 | self.assertFalse(params.has_checksum) | |||
|
662 | ||||
524 | # Now check the buffer protocol. |
|
663 | # Now check the buffer protocol. | |
525 | it = cctx.read_from(source.getvalue()) |
|
664 | it = cctx.read_from(source.getvalue()) | |
526 | chunks = list(it) |
|
665 | chunks = list(it) |
@@ -13,6 +13,12 b' except ImportError:' | |||||
13 |
|
13 | |||
14 | import zstd |
|
14 | import zstd | |
15 |
|
15 | |||
|
16 | from . common import ( | |||
|
17 | make_cffi, | |||
|
18 | ) | |||
|
19 | ||||
|
20 | ||||
|
21 | @make_cffi | |||
16 | class TestCompressionParameters(unittest.TestCase): |
|
22 | class TestCompressionParameters(unittest.TestCase): | |
17 | def test_init_bad_arg_type(self): |
|
23 | def test_init_bad_arg_type(self): | |
18 | with self.assertRaises(TypeError): |
|
24 | with self.assertRaises(TypeError): | |
@@ -42,7 +48,81 b' class TestCompressionParameters(unittest' | |||||
42 | p = zstd.get_compression_parameters(1) |
|
48 | p = zstd.get_compression_parameters(1) | |
43 | self.assertIsInstance(p, zstd.CompressionParameters) |
|
49 | self.assertIsInstance(p, zstd.CompressionParameters) | |
44 |
|
50 | |||
45 |
self.assertEqual(p |
|
51 | self.assertEqual(p.window_log, 19) | |
|
52 | ||||
|
53 | def test_members(self): | |||
|
54 | p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1) | |||
|
55 | self.assertEqual(p.window_log, 10) | |||
|
56 | self.assertEqual(p.chain_log, 6) | |||
|
57 | self.assertEqual(p.hash_log, 7) | |||
|
58 | self.assertEqual(p.search_log, 4) | |||
|
59 | self.assertEqual(p.search_length, 5) | |||
|
60 | self.assertEqual(p.target_length, 8) | |||
|
61 | self.assertEqual(p.strategy, 1) | |||
|
62 | ||||
|
63 | ||||
|
64 | @make_cffi | |||
|
65 | class TestFrameParameters(unittest.TestCase): | |||
|
66 | def test_invalid_type(self): | |||
|
67 | with self.assertRaises(TypeError): | |||
|
68 | zstd.get_frame_parameters(None) | |||
|
69 | ||||
|
70 | with self.assertRaises(TypeError): | |||
|
71 | zstd.get_frame_parameters(u'foobarbaz') | |||
|
72 | ||||
|
73 | def test_invalid_input_sizes(self): | |||
|
74 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): | |||
|
75 | zstd.get_frame_parameters(b'') | |||
|
76 | ||||
|
77 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): | |||
|
78 | zstd.get_frame_parameters(zstd.FRAME_HEADER) | |||
|
79 | ||||
|
80 | def test_invalid_frame(self): | |||
|
81 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): | |||
|
82 | zstd.get_frame_parameters(b'foobarbaz') | |||
|
83 | ||||
|
84 | def test_attributes(self): | |||
|
85 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') | |||
|
86 | self.assertEqual(params.content_size, 0) | |||
|
87 | self.assertEqual(params.window_size, 1024) | |||
|
88 | self.assertEqual(params.dict_id, 0) | |||
|
89 | self.assertFalse(params.has_checksum) | |||
|
90 | ||||
|
91 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. | |||
|
92 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') | |||
|
93 | self.assertEqual(params.content_size, 0) | |||
|
94 | self.assertEqual(params.window_size, 1024) | |||
|
95 | self.assertEqual(params.dict_id, 255) | |||
|
96 | self.assertFalse(params.has_checksum) | |||
|
97 | ||||
|
98 | # Lowest 3rd bit indicates if checksum is present. | |||
|
99 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') | |||
|
100 | self.assertEqual(params.content_size, 0) | |||
|
101 | self.assertEqual(params.window_size, 1024) | |||
|
102 | self.assertEqual(params.dict_id, 0) | |||
|
103 | self.assertTrue(params.has_checksum) | |||
|
104 | ||||
|
105 | # Upper 2 bits indicate content size. | |||
|
106 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00') | |||
|
107 | self.assertEqual(params.content_size, 511) | |||
|
108 | self.assertEqual(params.window_size, 1024) | |||
|
109 | self.assertEqual(params.dict_id, 0) | |||
|
110 | self.assertFalse(params.has_checksum) | |||
|
111 | ||||
|
112 | # Window descriptor is 2nd byte after frame header. | |||
|
113 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') | |||
|
114 | self.assertEqual(params.content_size, 0) | |||
|
115 | self.assertEqual(params.window_size, 262144) | |||
|
116 | self.assertEqual(params.dict_id, 0) | |||
|
117 | self.assertFalse(params.has_checksum) | |||
|
118 | ||||
|
119 | # Set multiple things. | |||
|
120 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00') | |||
|
121 | self.assertEqual(params.content_size, 272) | |||
|
122 | self.assertEqual(params.window_size, 262144) | |||
|
123 | self.assertEqual(params.dict_id, 15) | |||
|
124 | self.assertTrue(params.has_checksum) | |||
|
125 | ||||
46 |
|
126 | |||
47 | if hypothesis: |
|
127 | if hypothesis: | |
48 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, |
|
128 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, | |
@@ -65,6 +145,8 b' if hypothesis:' | |||||
65 | zstd.STRATEGY_BTLAZY2, |
|
145 | zstd.STRATEGY_BTLAZY2, | |
66 | zstd.STRATEGY_BTOPT)) |
|
146 | zstd.STRATEGY_BTOPT)) | |
67 |
|
147 | |||
|
148 | ||||
|
149 | @make_cffi | |||
68 | class TestCompressionParametersHypothesis(unittest.TestCase): |
|
150 | class TestCompressionParametersHypothesis(unittest.TestCase): | |
69 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
151 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |
70 | s_searchlength, s_targetlength, s_strategy) |
|
152 | s_searchlength, s_targetlength, s_strategy) | |
@@ -73,9 +155,6 b' if hypothesis:' | |||||
73 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, |
|
155 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, | |
74 | searchlog, searchlength, |
|
156 | searchlog, searchlength, | |
75 | targetlength, strategy) |
|
157 | targetlength, strategy) | |
76 | self.assertEqual(tuple(p), |
|
|||
77 | (windowlog, chainlog, hashlog, searchlog, |
|
|||
78 | searchlength, targetlength, strategy)) |
|
|||
79 |
|
158 | |||
80 | # Verify we can instantiate a compressor with the supplied values. |
|
159 | # Verify we can instantiate a compressor with the supplied values. | |
81 | # ZSTD_checkCParams moves the goal posts on us from what's advertised |
|
160 | # ZSTD_checkCParams moves the goal posts on us from what's advertised |
@@ -10,7 +10,10 b' except ImportError:' | |||||
10 |
|
10 | |||
11 | import zstd |
|
11 | import zstd | |
12 |
|
12 | |||
13 |
from .common import |
|
13 | from .common import ( | |
|
14 | make_cffi, | |||
|
15 | OpCountingBytesIO, | |||
|
16 | ) | |||
14 |
|
17 | |||
15 |
|
18 | |||
16 | if sys.version_info[0] >= 3: |
|
19 | if sys.version_info[0] >= 3: | |
@@ -19,6 +22,7 b' else:' | |||||
19 | next = lambda it: it.next() |
|
22 | next = lambda it: it.next() | |
20 |
|
23 | |||
21 |
|
24 | |||
|
25 | @make_cffi | |||
22 | class TestDecompressor_decompress(unittest.TestCase): |
|
26 | class TestDecompressor_decompress(unittest.TestCase): | |
23 | def test_empty_input(self): |
|
27 | def test_empty_input(self): | |
24 | dctx = zstd.ZstdDecompressor() |
|
28 | dctx = zstd.ZstdDecompressor() | |
@@ -119,6 +123,7 b' class TestDecompressor_decompress(unitte' | |||||
119 | self.assertEqual(decompressed, sources[i]) |
|
123 | self.assertEqual(decompressed, sources[i]) | |
120 |
|
124 | |||
121 |
|
125 | |||
|
126 | @make_cffi | |||
122 | class TestDecompressor_copy_stream(unittest.TestCase): |
|
127 | class TestDecompressor_copy_stream(unittest.TestCase): | |
123 | def test_no_read(self): |
|
128 | def test_no_read(self): | |
124 | source = object() |
|
129 | source = object() | |
@@ -180,6 +185,7 b' class TestDecompressor_copy_stream(unitt' | |||||
180 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
185 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
181 |
|
186 | |||
182 |
|
187 | |||
|
188 | @make_cffi | |||
183 | class TestDecompressor_decompressobj(unittest.TestCase): |
|
189 | class TestDecompressor_decompressobj(unittest.TestCase): | |
184 | def test_simple(self): |
|
190 | def test_simple(self): | |
185 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') |
|
191 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') | |
@@ -207,6 +213,7 b' def decompress_via_writer(data):' | |||||
207 | return buffer.getvalue() |
|
213 | return buffer.getvalue() | |
208 |
|
214 | |||
209 |
|
215 | |||
|
216 | @make_cffi | |||
210 | class TestDecompressor_write_to(unittest.TestCase): |
|
217 | class TestDecompressor_write_to(unittest.TestCase): | |
211 | def test_empty_roundtrip(self): |
|
218 | def test_empty_roundtrip(self): | |
212 | cctx = zstd.ZstdCompressor() |
|
219 | cctx = zstd.ZstdCompressor() | |
@@ -256,14 +263,14 b' class TestDecompressor_write_to(unittest' | |||||
256 | buffer = io.BytesIO() |
|
263 | buffer = io.BytesIO() | |
257 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
264 | cctx = zstd.ZstdCompressor(dict_data=d) | |
258 | with cctx.write_to(buffer) as compressor: |
|
265 | with cctx.write_to(buffer) as compressor: | |
259 | compressor.write(orig) |
|
266 | self.assertEqual(compressor.write(orig), 1544) | |
260 |
|
267 | |||
261 | compressed = buffer.getvalue() |
|
268 | compressed = buffer.getvalue() | |
262 | buffer = io.BytesIO() |
|
269 | buffer = io.BytesIO() | |
263 |
|
270 | |||
264 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
271 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
265 | with dctx.write_to(buffer) as decompressor: |
|
272 | with dctx.write_to(buffer) as decompressor: | |
266 | decompressor.write(compressed) |
|
273 | self.assertEqual(decompressor.write(compressed), len(orig)) | |
267 |
|
274 | |||
268 | self.assertEqual(buffer.getvalue(), orig) |
|
275 | self.assertEqual(buffer.getvalue(), orig) | |
269 |
|
276 | |||
@@ -291,6 +298,7 b' class TestDecompressor_write_to(unittest' | |||||
291 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
298 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
292 |
|
299 | |||
293 |
|
300 | |||
|
301 | @make_cffi | |||
294 | class TestDecompressor_read_from(unittest.TestCase): |
|
302 | class TestDecompressor_read_from(unittest.TestCase): | |
295 | def test_type_validation(self): |
|
303 | def test_type_validation(self): | |
296 | dctx = zstd.ZstdDecompressor() |
|
304 | dctx = zstd.ZstdDecompressor() | |
@@ -302,7 +310,7 b' class TestDecompressor_read_from(unittes' | |||||
302 | dctx.read_from(b'foobar') |
|
310 | dctx.read_from(b'foobar') | |
303 |
|
311 | |||
304 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
312 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): | |
305 | dctx.read_from(True) |
|
313 | b''.join(dctx.read_from(True)) | |
306 |
|
314 | |||
307 | def test_empty_input(self): |
|
315 | def test_empty_input(self): | |
308 | dctx = zstd.ZstdDecompressor() |
|
316 | dctx = zstd.ZstdDecompressor() | |
@@ -351,7 +359,7 b' class TestDecompressor_read_from(unittes' | |||||
351 | dctx = zstd.ZstdDecompressor() |
|
359 | dctx = zstd.ZstdDecompressor() | |
352 |
|
360 | |||
353 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): |
|
361 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): | |
354 | dctx.read_from(b'', skip_bytes=1, read_size=1) |
|
362 | b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1)) | |
355 |
|
363 | |||
356 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): |
|
364 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): | |
357 | b''.join(dctx.read_from(b'foobar', skip_bytes=10)) |
|
365 | b''.join(dctx.read_from(b'foobar', skip_bytes=10)) | |
@@ -476,3 +484,94 b' class TestDecompressor_read_from(unittes' | |||||
476 | self.assertEqual(len(chunk), 1) |
|
484 | self.assertEqual(len(chunk), 1) | |
477 |
|
485 | |||
478 | self.assertEqual(source._read_count, len(source.getvalue())) |
|
486 | self.assertEqual(source._read_count, len(source.getvalue())) | |
|
487 | ||||
|
488 | ||||
|
489 | @make_cffi | |||
|
490 | class TestDecompressor_content_dict_chain(unittest.TestCase): | |||
|
491 | def test_bad_inputs_simple(self): | |||
|
492 | dctx = zstd.ZstdDecompressor() | |||
|
493 | ||||
|
494 | with self.assertRaises(TypeError): | |||
|
495 | dctx.decompress_content_dict_chain(b'foo') | |||
|
496 | ||||
|
497 | with self.assertRaises(TypeError): | |||
|
498 | dctx.decompress_content_dict_chain((b'foo', b'bar')) | |||
|
499 | ||||
|
500 | with self.assertRaisesRegexp(ValueError, 'empty input chain'): | |||
|
501 | dctx.decompress_content_dict_chain([]) | |||
|
502 | ||||
|
503 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): | |||
|
504 | dctx.decompress_content_dict_chain([u'foo']) | |||
|
505 | ||||
|
506 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): | |||
|
507 | dctx.decompress_content_dict_chain([True]) | |||
|
508 | ||||
|
509 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): | |||
|
510 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) | |||
|
511 | ||||
|
512 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): | |||
|
513 | dctx.decompress_content_dict_chain([b'foo' * 8]) | |||
|
514 | ||||
|
515 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) | |||
|
516 | ||||
|
517 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): | |||
|
518 | dctx.decompress_content_dict_chain([no_size]) | |||
|
519 | ||||
|
520 | # Corrupt first frame. | |||
|
521 | frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) | |||
|
522 | frame = frame[0:12] + frame[15:] | |||
|
523 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'): | |||
|
524 | dctx.decompress_content_dict_chain([frame]) | |||
|
525 | ||||
|
526 | def test_bad_subsequent_input(self): | |||
|
527 | initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) | |||
|
528 | ||||
|
529 | dctx = zstd.ZstdDecompressor() | |||
|
530 | ||||
|
531 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): | |||
|
532 | dctx.decompress_content_dict_chain([initial, u'foo']) | |||
|
533 | ||||
|
534 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): | |||
|
535 | dctx.decompress_content_dict_chain([initial, None]) | |||
|
536 | ||||
|
537 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): | |||
|
538 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) | |||
|
539 | ||||
|
540 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): | |||
|
541 | dctx.decompress_content_dict_chain([initial, b'foo' * 8]) | |||
|
542 | ||||
|
543 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) | |||
|
544 | ||||
|
545 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): | |||
|
546 | dctx.decompress_content_dict_chain([initial, no_size]) | |||
|
547 | ||||
|
548 | # Corrupt second frame. | |||
|
549 | cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) | |||
|
550 | frame = cctx.compress(b'bar' * 64) | |||
|
551 | frame = frame[0:12] + frame[15:] | |||
|
552 | ||||
|
553 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'): | |||
|
554 | dctx.decompress_content_dict_chain([initial, frame]) | |||
|
555 | ||||
|
556 | def test_simple(self): | |||
|
557 | original = [ | |||
|
558 | b'foo' * 64, | |||
|
559 | b'foobar' * 64, | |||
|
560 | b'baz' * 64, | |||
|
561 | b'foobaz' * 64, | |||
|
562 | b'foobarbaz' * 64, | |||
|
563 | ] | |||
|
564 | ||||
|
565 | chunks = [] | |||
|
566 | chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0])) | |||
|
567 | for i, chunk in enumerate(original[1:]): | |||
|
568 | d = zstd.ZstdCompressionDict(original[i]) | |||
|
569 | cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True) | |||
|
570 | chunks.append(cctx.compress(chunk)) | |||
|
571 | ||||
|
572 | for i in range(1, len(original)): | |||
|
573 | chain = chunks[0:i] | |||
|
574 | expected = original[i - 1] | |||
|
575 | dctx = zstd.ZstdDecompressor() | |||
|
576 | decompressed = dctx.decompress_content_dict_chain(chain) | |||
|
577 | self.assertEqual(decompressed, expected) |
@@ -5,7 +5,12 b' except ImportError:' | |||||
5 |
|
5 | |||
6 | import zstd |
|
6 | import zstd | |
7 |
|
7 | |||
|
8 | from . common import ( | |||
|
9 | make_cffi, | |||
|
10 | ) | |||
8 |
|
11 | |||
|
12 | ||||
|
13 | @make_cffi | |||
9 | class TestSizes(unittest.TestCase): |
|
14 | class TestSizes(unittest.TestCase): | |
10 | def test_decompression_size(self): |
|
15 | def test_decompression_size(self): | |
11 | size = zstd.estimate_decompression_context_size() |
|
16 | size = zstd.estimate_decompression_context_size() |
@@ -7,9 +7,15 b' except ImportError:' | |||||
7 |
|
7 | |||
8 | import zstd |
|
8 | import zstd | |
9 |
|
9 | |||
|
10 | from . common import ( | |||
|
11 | make_cffi, | |||
|
12 | ) | |||
|
13 | ||||
|
14 | ||||
|
15 | @make_cffi | |||
10 | class TestModuleAttributes(unittest.TestCase): |
|
16 | class TestModuleAttributes(unittest.TestCase): | |
11 | def test_version(self): |
|
17 | def test_version(self): | |
12 |
self.assertEqual(zstd.ZSTD_VERSION, (1, 1, |
|
18 | self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3)) | |
13 |
|
19 | |||
14 | def test_constants(self): |
|
20 | def test_constants(self): | |
15 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
|
21 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) | |
@@ -45,4 +51,4 b' class TestModuleAttributes(unittest.Test' | |||||
45 | ) |
|
51 | ) | |
46 |
|
52 | |||
47 | for a in attrs: |
|
53 | for a in attrs: | |
48 | self.assertTrue(hasattr(zstd, a)) |
|
54 | self.assertTrue(hasattr(zstd, a), a) |
@@ -13,10 +13,14 b' except ImportError:' | |||||
13 |
|
13 | |||
14 | import zstd |
|
14 | import zstd | |
15 |
|
15 | |||
|
16 | from .common import ( | |||
|
17 | make_cffi, | |||
|
18 | ) | |||
16 |
|
19 | |||
17 | compression_levels = strategies.integers(min_value=1, max_value=22) |
|
20 | compression_levels = strategies.integers(min_value=1, max_value=22) | |
18 |
|
21 | |||
19 |
|
22 | |||
|
23 | @make_cffi | |||
20 | class TestRoundTrip(unittest.TestCase): |
|
24 | class TestRoundTrip(unittest.TestCase): | |
21 | @hypothesis.given(strategies.binary(), compression_levels) |
|
25 | @hypothesis.given(strategies.binary(), compression_levels) | |
22 | def test_compress_write_to(self, data, level): |
|
26 | def test_compress_write_to(self, data, level): |
@@ -7,6 +7,9 b' except ImportError:' | |||||
7 |
|
7 | |||
8 | import zstd |
|
8 | import zstd | |
9 |
|
9 | |||
|
10 | from . common import ( | |||
|
11 | make_cffi, | |||
|
12 | ) | |||
10 |
|
13 | |||
11 | if sys.version_info[0] >= 3: |
|
14 | if sys.version_info[0] >= 3: | |
12 | int_type = int |
|
15 | int_type = int | |
@@ -14,6 +17,7 b' else:' | |||||
14 | int_type = long |
|
17 | int_type = long | |
15 |
|
18 | |||
16 |
|
19 | |||
|
20 | @make_cffi | |||
17 | class TestTrainDictionary(unittest.TestCase): |
|
21 | class TestTrainDictionary(unittest.TestCase): | |
18 | def test_no_args(self): |
|
22 | def test_no_args(self): | |
19 | with self.assertRaises(TypeError): |
|
23 | with self.assertRaises(TypeError): |
@@ -34,6 +34,11 b' PyDoc_STRVAR(get_compression_parameters_' | |||||
34 | "Obtains a ``CompressionParameters`` instance from a compression level and\n" |
|
34 | "Obtains a ``CompressionParameters`` instance from a compression level and\n" | |
35 | "optional input size and dictionary size"); |
|
35 | "optional input size and dictionary size"); | |
36 |
|
36 | |||
|
37 | PyDoc_STRVAR(get_frame_parameters__doc__, | |||
|
38 | "get_frame_parameters(data)\n" | |||
|
39 | "\n" | |||
|
40 | "Obtains a ``FrameParameters`` instance by parsing data.\n"); | |||
|
41 | ||||
37 | PyDoc_STRVAR(train_dictionary__doc__, |
|
42 | PyDoc_STRVAR(train_dictionary__doc__, | |
38 | "train_dictionary(dict_size, samples)\n" |
|
43 | "train_dictionary(dict_size, samples)\n" | |
39 | "\n" |
|
44 | "\n" | |
@@ -53,6 +58,8 b' static PyMethodDef zstd_methods[] = {' | |||||
53 | METH_NOARGS, estimate_decompression_context_size__doc__ }, |
|
58 | METH_NOARGS, estimate_decompression_context_size__doc__ }, | |
54 | { "get_compression_parameters", (PyCFunction)get_compression_parameters, |
|
59 | { "get_compression_parameters", (PyCFunction)get_compression_parameters, | |
55 | METH_VARARGS, get_compression_parameters__doc__ }, |
|
60 | METH_VARARGS, get_compression_parameters__doc__ }, | |
|
61 | { "get_frame_parameters", (PyCFunction)get_frame_parameters, | |||
|
62 | METH_VARARGS, get_frame_parameters__doc__ }, | |||
56 | { "train_dictionary", (PyCFunction)train_dictionary, |
|
63 | { "train_dictionary", (PyCFunction)train_dictionary, | |
57 | METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, |
|
64 | METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, | |
58 | { NULL, NULL } |
|
65 | { NULL, NULL } | |
@@ -70,6 +77,7 b' void decompressor_module_init(PyObject* ' | |||||
70 | void decompressobj_module_init(PyObject* mod); |
|
77 | void decompressobj_module_init(PyObject* mod); | |
71 | void decompressionwriter_module_init(PyObject* mod); |
|
78 | void decompressionwriter_module_init(PyObject* mod); | |
72 | void decompressoriterator_module_init(PyObject* mod); |
|
79 | void decompressoriterator_module_init(PyObject* mod); | |
|
80 | void frameparams_module_init(PyObject* mod); | |||
73 |
|
81 | |||
74 | void zstd_module_init(PyObject* m) { |
|
82 | void zstd_module_init(PyObject* m) { | |
75 | /* python-zstandard relies on unstable zstd C API features. This means |
|
83 | /* python-zstandard relies on unstable zstd C API features. This means | |
@@ -87,7 +95,7 b' void zstd_module_init(PyObject* m) {' | |||||
87 | We detect this mismatch here and refuse to load the module if this |
|
95 | We detect this mismatch here and refuse to load the module if this | |
88 | scenario is detected. |
|
96 | scenario is detected. | |
89 | */ |
|
97 | */ | |
90 |
if (ZSTD_VERSION_NUMBER != 1010 |
|
98 | if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) { | |
91 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); |
|
99 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); | |
92 | return; |
|
100 | return; | |
93 | } |
|
101 | } | |
@@ -104,6 +112,7 b' void zstd_module_init(PyObject* m) {' | |||||
104 | decompressobj_module_init(m); |
|
112 | decompressobj_module_init(m); | |
105 | decompressionwriter_module_init(m); |
|
113 | decompressionwriter_module_init(m); | |
106 | decompressoriterator_module_init(m); |
|
114 | decompressoriterator_module_init(m); | |
|
115 | frameparams_module_init(m); | |||
107 | } |
|
116 | } | |
108 |
|
117 | |||
109 | #if PY_MAJOR_VERSION >= 3 |
|
118 | #if PY_MAJOR_VERSION >= 3 |
@@ -39,7 +39,7 b' extern "C" {' | |||||
39 | #endif |
|
39 | #endif | |
40 |
|
40 | |||
41 | /* code only tested on 32 and 64 bits systems */ |
|
41 | /* code only tested on 32 and 64 bits systems */ | |
42 |
#define MEM_STATIC_ASSERT(c) { enum { |
|
42 | #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } | |
43 | MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } |
|
43 | MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } | |
44 |
|
44 | |||
45 |
|
45 |
@@ -43,10 +43,6 b' ZSTD_ErrorCode ZSTD_getErrorCode(size_t ' | |||||
43 | * provides error code string from enum */ |
|
43 | * provides error code string from enum */ | |
44 | const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); } |
|
44 | const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); } | |
45 |
|
45 | |||
46 | /* --- ZBUFF Error Management (deprecated) --- */ |
|
|||
47 | unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } |
|
|||
48 | const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } |
|
|||
49 |
|
||||
50 |
|
46 | |||
51 | /*=************************************************************** |
|
47 | /*=************************************************************** | |
52 | * Custom allocator |
|
48 | * Custom allocator |
@@ -18,6 +18,20 b' extern "C" {' | |||||
18 | #include <stddef.h> /* size_t */ |
|
18 | #include <stddef.h> /* size_t */ | |
19 |
|
19 | |||
20 |
|
20 | |||
|
21 | /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ | |||
|
22 | #if defined(__GNUC__) && (__GNUC__ >= 4) | |||
|
23 | # define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) | |||
|
24 | #else | |||
|
25 | # define ZSTDERRORLIB_VISIBILITY | |||
|
26 | #endif | |||
|
27 | #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |||
|
28 | # define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY | |||
|
29 | #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |||
|
30 | # define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |||
|
31 | #else | |||
|
32 | # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY | |||
|
33 | #endif | |||
|
34 | ||||
21 | /*-**************************************** |
|
35 | /*-**************************************** | |
22 | * error codes list |
|
36 | * error codes list | |
23 | ******************************************/ |
|
37 | ******************************************/ | |
@@ -49,8 +63,8 b' typedef enum {' | |||||
49 | /*! ZSTD_getErrorCode() : |
|
63 | /*! ZSTD_getErrorCode() : | |
50 | convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, |
|
64 | convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, | |
51 | which can be used to compare directly with enum list published into "error_public.h" */ |
|
65 | which can be used to compare directly with enum list published into "error_public.h" */ | |
52 | ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); |
|
66 | ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); | |
53 | const char* ZSTD_getErrorString(ZSTD_ErrorCode code); |
|
67 | ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); | |
54 |
|
68 | |||
55 |
|
69 | |||
56 | #if defined (__cplusplus) |
|
70 | #if defined (__cplusplus) |
@@ -267,4 +267,13 b' MEM_STATIC U32 ZSTD_highbit32(U32 val)' | |||||
267 | } |
|
267 | } | |
268 |
|
268 | |||
269 |
|
269 | |||
|
270 | /* hidden functions */ | |||
|
271 | ||||
|
272 | /* ZSTD_invalidateRepCodes() : | |||
|
273 | * ensures next compression will not use repcodes from previous block. | |||
|
274 | * Note : only works with regular variant; | |||
|
275 | * do not use with extDict variant ! */ | |||
|
276 | void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); | |||
|
277 | ||||
|
278 | ||||
270 | #endif /* ZSTD_CCOMMON_H_MODULE */ |
|
279 | #endif /* ZSTD_CCOMMON_H_MODULE */ |
@@ -51,8 +51,7 b' static void ZSTD_resetSeqStore(seqStore_' | |||||
51 | /*-************************************* |
|
51 | /*-************************************* | |
52 | * Context memory management |
|
52 | * Context memory management | |
53 | ***************************************/ |
|
53 | ***************************************/ | |
54 | struct ZSTD_CCtx_s |
|
54 | struct ZSTD_CCtx_s { | |
55 | { |
|
|||
56 | const BYTE* nextSrc; /* next block here to continue on current prefix */ |
|
55 | const BYTE* nextSrc; /* next block here to continue on current prefix */ | |
57 | const BYTE* base; /* All regular indexes relative to this position */ |
|
56 | const BYTE* base; /* All regular indexes relative to this position */ | |
58 | const BYTE* dictBase; /* extDict indexes relative to this position */ |
|
57 | const BYTE* dictBase; /* extDict indexes relative to this position */ | |
@@ -61,10 +60,11 b' struct ZSTD_CCtx_s' | |||||
61 | U32 nextToUpdate; /* index from which to continue dictionary update */ |
|
60 | U32 nextToUpdate; /* index from which to continue dictionary update */ | |
62 | U32 nextToUpdate3; /* index from which to continue dictionary update */ |
|
61 | U32 nextToUpdate3; /* index from which to continue dictionary update */ | |
63 | U32 hashLog3; /* dispatch table : larger == faster, more memory */ |
|
62 | U32 hashLog3; /* dispatch table : larger == faster, more memory */ | |
64 | U32 loadedDictEnd; |
|
63 | U32 loadedDictEnd; /* index of end of dictionary */ | |
|
64 | U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */ | |||
65 | ZSTD_compressionStage_e stage; |
|
65 | ZSTD_compressionStage_e stage; | |
66 | U32 rep[ZSTD_REP_NUM]; |
|
66 | U32 rep[ZSTD_REP_NUM]; | |
67 |
U32 |
|
67 | U32 repToConfirm[ZSTD_REP_NUM]; | |
68 | U32 dictID; |
|
68 | U32 dictID; | |
69 | ZSTD_parameters params; |
|
69 | ZSTD_parameters params; | |
70 | void* workSpace; |
|
70 | void* workSpace; | |
@@ -101,7 +101,7 b' ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD' | |||||
101 | cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); |
|
101 | cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); | |
102 | if (!cctx) return NULL; |
|
102 | if (!cctx) return NULL; | |
103 | memset(cctx, 0, sizeof(ZSTD_CCtx)); |
|
103 | memset(cctx, 0, sizeof(ZSTD_CCtx)); | |
104 |
|
|
104 | cctx->customMem = customMem; | |
105 | return cctx; |
|
105 | return cctx; | |
106 | } |
|
106 | } | |
107 |
|
107 | |||
@@ -119,6 +119,15 b' size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx*' | |||||
119 | return sizeof(*cctx) + cctx->workSpaceSize; |
|
119 | return sizeof(*cctx) + cctx->workSpaceSize; | |
120 | } |
|
120 | } | |
121 |
|
121 | |||
|
122 | size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value) | |||
|
123 | { | |||
|
124 | switch(param) | |||
|
125 | { | |||
|
126 | case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0; | |||
|
127 | default: return ERROR(parameter_unknown); | |||
|
128 | } | |||
|
129 | } | |||
|
130 | ||||
122 | const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ |
|
131 | const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ | |
123 | { |
|
132 | { | |
124 | return &(ctx->seqStore); |
|
133 | return &(ctx->seqStore); | |
@@ -318,6 +327,14 b' static size_t ZSTD_resetCCtx_advanced (Z' | |||||
318 | } |
|
327 | } | |
319 | } |
|
328 | } | |
320 |
|
329 | |||
|
330 | /* ZSTD_invalidateRepCodes() : | |||
|
331 | * ensures next compression will not use repcodes from previous block. | |||
|
332 | * Note : only works with regular variant; | |||
|
333 | * do not use with extDict variant ! */ | |||
|
334 | void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { | |||
|
335 | int i; | |||
|
336 | for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0; | |||
|
337 | } | |||
321 |
|
338 | |||
322 | /*! ZSTD_copyCCtx() : |
|
339 | /*! ZSTD_copyCCtx() : | |
323 | * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. |
|
340 | * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. | |
@@ -735,12 +752,19 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||||
735 | if ((size_t)(op-ostart) >= maxCSize) return 0; } |
|
752 | if ((size_t)(op-ostart) >= maxCSize) return 0; } | |
736 |
|
753 | |||
737 | /* confirm repcodes */ |
|
754 | /* confirm repcodes */ | |
738 |
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc-> |
|
755 | { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; } | |
739 |
|
756 | |||
740 | return op - ostart; |
|
757 | return op - ostart; | |
741 | } |
|
758 | } | |
742 |
|
759 | |||
743 |
|
760 | |||
|
761 | #if 0 /* for debug */ | |||
|
762 | # define STORESEQ_DEBUG | |||
|
763 | #include <stdio.h> /* fprintf */ | |||
|
764 | U32 g_startDebug = 0; | |||
|
765 | const BYTE* g_start = NULL; | |||
|
766 | #endif | |||
|
767 | ||||
744 | /*! ZSTD_storeSeq() : |
|
768 | /*! ZSTD_storeSeq() : | |
745 | Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. |
|
769 | Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. | |
746 | `offsetCode` : distance to match, or 0 == repCode. |
|
770 | `offsetCode` : distance to match, or 0 == repCode. | |
@@ -748,13 +772,14 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||||
748 | */ |
|
772 | */ | |
749 | MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) |
|
773 | MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) | |
750 | { |
|
774 | { | |
751 | #if 0 /* for debug */ |
|
775 | #ifdef STORESEQ_DEBUG | |
752 | static const BYTE* g_start = NULL; |
|
776 | if (g_startDebug) { | |
753 | const U32 pos = (U32)((const BYTE*)literals - g_start); |
|
777 | const U32 pos = (U32)((const BYTE*)literals - g_start); | |
754 | if (g_start==NULL) g_start = (const BYTE*)literals; |
|
778 | if (g_start==NULL) g_start = (const BYTE*)literals; | |
755 |
|
|
779 | if ((pos > 1895000) && (pos < 1895300)) | |
756 | printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", |
|
780 | fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", | |
757 | pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); |
|
781 | pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); | |
|
782 | } | |||
758 | #endif |
|
783 | #endif | |
759 | /* copy Literals */ |
|
784 | /* copy Literals */ | |
760 | ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); |
|
785 | ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); | |
@@ -1004,8 +1029,8 b' void ZSTD_compressBlock_fast_generic(ZST' | |||||
1004 | } } } |
|
1029 | } } } | |
1005 |
|
1030 | |||
1006 | /* save reps for next block */ |
|
1031 | /* save reps for next block */ | |
1007 |
cctx-> |
|
1032 | cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; | |
1008 |
cctx-> |
|
1033 | cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; | |
1009 |
|
1034 | |||
1010 | /* Last Literals */ |
|
1035 | /* Last Literals */ | |
1011 | { size_t const lastLLSize = iend - anchor; |
|
1036 | { size_t const lastLLSize = iend - anchor; | |
@@ -1119,7 +1144,7 b' static void ZSTD_compressBlock_fast_extD' | |||||
1119 | } } } |
|
1144 | } } } | |
1120 |
|
1145 | |||
1121 | /* save reps for next block */ |
|
1146 | /* save reps for next block */ | |
1122 |
ctx-> |
|
1147 | ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; | |
1123 |
|
1148 | |||
1124 | /* Last Literals */ |
|
1149 | /* Last Literals */ | |
1125 | { size_t const lastLLSize = iend - anchor; |
|
1150 | { size_t const lastLLSize = iend - anchor; | |
@@ -1273,8 +1298,8 b' void ZSTD_compressBlock_doubleFast_gener' | |||||
1273 | } } } |
|
1298 | } } } | |
1274 |
|
1299 | |||
1275 | /* save reps for next block */ |
|
1300 | /* save reps for next block */ | |
1276 |
cctx-> |
|
1301 | cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; | |
1277 |
cctx-> |
|
1302 | cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; | |
1278 |
|
1303 | |||
1279 | /* Last Literals */ |
|
1304 | /* Last Literals */ | |
1280 | { size_t const lastLLSize = iend - anchor; |
|
1305 | { size_t const lastLLSize = iend - anchor; | |
@@ -1423,7 +1448,7 b' static void ZSTD_compressBlock_doubleFas' | |||||
1423 | } } } |
|
1448 | } } } | |
1424 |
|
1449 | |||
1425 | /* save reps for next block */ |
|
1450 | /* save reps for next block */ | |
1426 |
ctx-> |
|
1451 | ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; | |
1427 |
|
1452 | |||
1428 | /* Last Literals */ |
|
1453 | /* Last Literals */ | |
1429 | { size_t const lastLLSize = iend - anchor; |
|
1454 | { size_t const lastLLSize = iend - anchor; | |
@@ -1955,8 +1980,8 b' void ZSTD_compressBlock_lazy_generic(ZST' | |||||
1955 | } } |
|
1980 | } } | |
1956 |
|
1981 | |||
1957 | /* Save reps for next block */ |
|
1982 | /* Save reps for next block */ | |
1958 |
ctx-> |
|
1983 | ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; | |
1959 |
ctx-> |
|
1984 | ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; | |
1960 |
|
1985 | |||
1961 | /* Last Literals */ |
|
1986 | /* Last Literals */ | |
1962 | { size_t const lastLLSize = iend - anchor; |
|
1987 | { size_t const lastLLSize = iend - anchor; | |
@@ -2150,7 +2175,7 b' void ZSTD_compressBlock_lazy_extDict_gen' | |||||
2150 | } } |
|
2175 | } } | |
2151 |
|
2176 | |||
2152 | /* Save reps for next block */ |
|
2177 | /* Save reps for next block */ | |
2153 |
ctx-> |
|
2178 | ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; | |
2154 |
|
2179 | |||
2155 | /* Last Literals */ |
|
2180 | /* Last Literals */ | |
2156 | { size_t const lastLLSize = iend - anchor; |
|
2181 | { size_t const lastLLSize = iend - anchor; | |
@@ -2409,12 +2434,14 b' static size_t ZSTD_compressContinue_inte' | |||||
2409 |
|
2434 | |||
2410 | cctx->nextSrc = ip + srcSize; |
|
2435 | cctx->nextSrc = ip + srcSize; | |
2411 |
|
2436 | |||
2412 | { size_t const cSize = frame ? |
|
2437 | if (srcSize) { | |
|
2438 | size_t const cSize = frame ? | |||
2413 | ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : |
|
2439 | ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : | |
2414 | ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); |
|
2440 | ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); | |
2415 | if (ZSTD_isError(cSize)) return cSize; |
|
2441 | if (ZSTD_isError(cSize)) return cSize; | |
2416 | return cSize + fhSize; |
|
2442 | return cSize + fhSize; | |
2417 | } |
|
2443 | } else | |
|
2444 | return fhSize; | |||
2418 | } |
|
2445 | } | |
2419 |
|
2446 | |||
2420 |
|
2447 | |||
@@ -2450,7 +2477,7 b' static size_t ZSTD_loadDictionaryContent' | |||||
2450 | zc->dictBase = zc->base; |
|
2477 | zc->dictBase = zc->base; | |
2451 | zc->base += ip - zc->nextSrc; |
|
2478 | zc->base += ip - zc->nextSrc; | |
2452 | zc->nextToUpdate = zc->dictLimit; |
|
2479 | zc->nextToUpdate = zc->dictLimit; | |
2453 | zc->loadedDictEnd = (U32)(iend - zc->base); |
|
2480 | zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base); | |
2454 |
|
2481 | |||
2455 | zc->nextSrc = iend; |
|
2482 | zc->nextSrc = iend; | |
2456 | if (srcSize <= HASH_READ_SIZE) return 0; |
|
2483 | if (srcSize <= HASH_READ_SIZE) return 0; | |
@@ -2557,9 +2584,9 b' static size_t ZSTD_loadDictEntropyStats(' | |||||
2557 | } |
|
2584 | } | |
2558 |
|
2585 | |||
2559 | if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); |
|
2586 | if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); | |
2560 | cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); |
|
2587 | cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); | |
2561 | cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); |
|
2588 | cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); | |
2562 | cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); |
|
2589 | cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); | |
2563 | dictPtr += 12; |
|
2590 | dictPtr += 12; | |
2564 |
|
2591 | |||
2565 | { U32 offcodeMax = MaxOff; |
|
2592 | { U32 offcodeMax = MaxOff; | |
@@ -2594,7 +2621,6 b' static size_t ZSTD_compress_insertDictio' | |||||
2594 | } |
|
2621 | } | |
2595 | } |
|
2622 | } | |
2596 |
|
2623 | |||
2597 |
|
||||
2598 | /*! ZSTD_compressBegin_internal() : |
|
2624 | /*! ZSTD_compressBegin_internal() : | |
2599 | * @return : 0, or an error code */ |
|
2625 | * @return : 0, or an error code */ | |
2600 | static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, |
|
2626 | static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, | |
@@ -2626,9 +2652,9 b' size_t ZSTD_compressBegin_usingDict(ZSTD' | |||||
2626 | } |
|
2652 | } | |
2627 |
|
2653 | |||
2628 |
|
2654 | |||
2629 |
size_t ZSTD_compressBegin(ZSTD_CCtx* |
|
2655 | size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) | |
2630 | { |
|
2656 | { | |
2631 |
return ZSTD_compressBegin_usingDict( |
|
2657 | return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); | |
2632 | } |
|
2658 | } | |
2633 |
|
2659 | |||
2634 |
|
2660 | |||
@@ -2733,7 +2759,8 b' size_t ZSTD_compress(void* dst, size_t d' | |||||
2733 | /* ===== Dictionary API ===== */ |
|
2759 | /* ===== Dictionary API ===== */ | |
2734 |
|
2760 | |||
2735 | struct ZSTD_CDict_s { |
|
2761 | struct ZSTD_CDict_s { | |
2736 |
void* dict |
|
2762 | void* dictBuffer; | |
|
2763 | const void* dictContent; | |||
2737 | size_t dictContentSize; |
|
2764 | size_t dictContentSize; | |
2738 | ZSTD_CCtx* refContext; |
|
2765 | ZSTD_CCtx* refContext; | |
2739 | }; /* typedef'd tp ZSTD_CDict within "zstd.h" */ |
|
2766 | }; /* typedef'd tp ZSTD_CDict within "zstd.h" */ | |
@@ -2741,39 +2768,45 b' struct ZSTD_CDict_s {' | |||||
2741 | size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) |
|
2768 | size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) | |
2742 | { |
|
2769 | { | |
2743 | if (cdict==NULL) return 0; /* support sizeof on NULL */ |
|
2770 | if (cdict==NULL) return 0; /* support sizeof on NULL */ | |
2744 | return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize; |
|
2771 | return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); | |
2745 | } |
|
2772 | } | |
2746 |
|
2773 | |||
2747 |
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, |
|
2774 | ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference, | |
|
2775 | ZSTD_parameters params, ZSTD_customMem customMem) | |||
2748 | { |
|
2776 | { | |
2749 | if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; |
|
2777 | if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; | |
2750 | if (!customMem.customAlloc || !customMem.customFree) return NULL; |
|
2778 | if (!customMem.customAlloc || !customMem.customFree) return NULL; | |
2751 |
|
2779 | |||
2752 | { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem); |
|
2780 | { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem); | |
2753 | void* const dictContent = ZSTD_malloc(dictSize, customMem); |
|
|||
2754 | ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem); |
|
2781 | ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem); | |
2755 |
|
2782 | |||
2756 |
if ( |
|
2783 | if (!cdict || !cctx) { | |
2757 | ZSTD_free(dictContent, customMem); |
|
|||
2758 | ZSTD_free(cdict, customMem); |
|
2784 | ZSTD_free(cdict, customMem); | |
2759 | ZSTD_free(cctx, customMem); |
|
2785 | ZSTD_free(cctx, customMem); | |
2760 | return NULL; |
|
2786 | return NULL; | |
2761 | } |
|
2787 | } | |
2762 |
|
2788 | |||
2763 | if (dictSize) { |
|
2789 | if ((byReference) || (!dictBuffer) || (!dictSize)) { | |
2764 | memcpy(dictContent, dict, dictSize); |
|
2790 | cdict->dictBuffer = NULL; | |
|
2791 | cdict->dictContent = dictBuffer; | |||
|
2792 | } else { | |||
|
2793 | void* const internalBuffer = ZSTD_malloc(dictSize, customMem); | |||
|
2794 | if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; } | |||
|
2795 | memcpy(internalBuffer, dictBuffer, dictSize); | |||
|
2796 | cdict->dictBuffer = internalBuffer; | |||
|
2797 | cdict->dictContent = internalBuffer; | |||
2765 | } |
|
2798 | } | |
2766 | { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0); |
|
2799 | ||
|
2800 | { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0); | |||
2767 | if (ZSTD_isError(errorCode)) { |
|
2801 | if (ZSTD_isError(errorCode)) { | |
2768 |
ZSTD_free( |
|
2802 | ZSTD_free(cdict->dictBuffer, customMem); | |
|
2803 | ZSTD_free(cctx, customMem); | |||
2769 | ZSTD_free(cdict, customMem); |
|
2804 | ZSTD_free(cdict, customMem); | |
2770 | ZSTD_free(cctx, customMem); |
|
|||
2771 | return NULL; |
|
2805 | return NULL; | |
2772 | } } |
|
2806 | } } | |
2773 |
|
2807 | |||
2774 |
cdict-> |
|
2808 | cdict->refContext = cctx; | |
2775 | cdict->dictContentSize = dictSize; |
|
2809 | cdict->dictContentSize = dictSize; | |
2776 | cdict->refContext = cctx; |
|
|||
2777 | return cdict; |
|
2810 | return cdict; | |
2778 | } |
|
2811 | } | |
2779 | } |
|
2812 | } | |
@@ -2783,7 +2816,15 b' ZSTD_CDict* ZSTD_createCDict(const void*' | |||||
2783 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
|
2816 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; | |
2784 | ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize); |
|
2817 | ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize); | |
2785 | params.fParams.contentSizeFlag = 1; |
|
2818 | params.fParams.contentSizeFlag = 1; | |
2786 | return ZSTD_createCDict_advanced(dict, dictSize, params, allocator); |
|
2819 | return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator); | |
|
2820 | } | |||
|
2821 | ||||
|
2822 | ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) | |||
|
2823 | { | |||
|
2824 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; | |||
|
2825 | ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize); | |||
|
2826 | params.fParams.contentSizeFlag = 1; | |||
|
2827 | return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator); | |||
2787 | } |
|
2828 | } | |
2788 |
|
2829 | |||
2789 | size_t ZSTD_freeCDict(ZSTD_CDict* cdict) |
|
2830 | size_t ZSTD_freeCDict(ZSTD_CDict* cdict) | |
@@ -2791,7 +2832,7 b' size_t ZSTD_freeCDict(ZSTD_CDict* cdict)' | |||||
2791 | if (cdict==NULL) return 0; /* support free on NULL */ |
|
2832 | if (cdict==NULL) return 0; /* support free on NULL */ | |
2792 | { ZSTD_customMem const cMem = cdict->refContext->customMem; |
|
2833 | { ZSTD_customMem const cMem = cdict->refContext->customMem; | |
2793 | ZSTD_freeCCtx(cdict->refContext); |
|
2834 | ZSTD_freeCCtx(cdict->refContext); | |
2794 |
ZSTD_free(cdict->dict |
|
2835 | ZSTD_free(cdict->dictBuffer, cMem); | |
2795 | ZSTD_free(cdict, cMem); |
|
2836 | ZSTD_free(cdict, cMem); | |
2796 | return 0; |
|
2837 | return 0; | |
2797 | } |
|
2838 | } | |
@@ -2801,7 +2842,7 b' static ZSTD_parameters ZSTD_getParamsFro' | |||||
2801 | return ZSTD_getParamsFromCCtx(cdict->refContext); |
|
2842 | return ZSTD_getParamsFromCCtx(cdict->refContext); | |
2802 | } |
|
2843 | } | |
2803 |
|
2844 | |||
2804 |
size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, |
|
2845 | size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize) | |
2805 | { |
|
2846 | { | |
2806 | if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize)) |
|
2847 | if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize)) | |
2807 | else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize)); |
|
2848 | else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize)); | |
@@ -2900,7 +2941,7 b' size_t ZSTD_CStreamOutSize(void) { retur' | |||||
2900 |
|
2941 | |||
2901 | size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) |
|
2942 | size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) | |
2902 | { |
|
2943 | { | |
2903 | if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */ |
|
2944 | if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */ | |
2904 |
|
2945 | |||
2905 | if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize)) |
|
2946 | if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize)) | |
2906 | else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize)); |
|
2947 | else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize)); | |
@@ -2937,9 +2978,9 b' size_t ZSTD_initCStream_advanced(ZSTD_CS' | |||||
2937 | if (zcs->outBuff == NULL) return ERROR(memory_allocation); |
|
2978 | if (zcs->outBuff == NULL) return ERROR(memory_allocation); | |
2938 | } |
|
2979 | } | |
2939 |
|
2980 | |||
2940 | if (dict) { |
|
2981 | if (dict && dictSize >= 8) { | |
2941 | ZSTD_freeCDict(zcs->cdictLocal); |
|
2982 | ZSTD_freeCDict(zcs->cdictLocal); | |
2942 | zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem); |
|
2983 | zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem); | |
2943 | if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); |
|
2984 | if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); | |
2944 | zcs->cdict = zcs->cdictLocal; |
|
2985 | zcs->cdict = zcs->cdictLocal; | |
2945 | } else zcs->cdict = NULL; |
|
2986 | } else zcs->cdict = NULL; | |
@@ -2956,6 +2997,7 b' size_t ZSTD_initCStream_usingCDict(ZSTD_' | |||||
2956 | ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict); |
|
2997 | ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict); | |
2957 | size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0); |
|
2998 | size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0); | |
2958 | zcs->cdict = cdict; |
|
2999 | zcs->cdict = cdict; | |
|
3000 | zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID; | |||
2959 | return initError; |
|
3001 | return initError; | |
2960 | } |
|
3002 | } | |
2961 |
|
3003 | |||
@@ -2967,7 +3009,8 b' size_t ZSTD_initCStream_usingDict(ZSTD_C' | |||||
2967 |
|
3009 | |||
2968 | size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize) |
|
3010 | size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize) | |
2969 | { |
|
3011 | { | |
2970 |
ZSTD_parameters |
|
3012 | ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); | |
|
3013 | if (pledgedSrcSize) params.fParams.contentSizeFlag = 1; | |||
2971 | return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize); |
|
3014 | return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize); | |
2972 | } |
|
3015 | } | |
2973 |
|
3016 |
@@ -38,7 +38,7 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto' | |||||
38 |
|
38 | |||
39 | ssPtr->cachedLiterals = NULL; |
|
39 | ssPtr->cachedLiterals = NULL; | |
40 | ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; |
|
40 | ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; | |
41 |
ssPtr->staticPrices = 0; |
|
41 | ssPtr->staticPrices = 0; | |
42 |
|
42 | |||
43 | if (ssPtr->litLengthSum == 0) { |
|
43 | if (ssPtr->litLengthSum == 0) { | |
44 | if (srcSize <= 1024) ssPtr->staticPrices = 1; |
|
44 | if (srcSize <= 1024) ssPtr->staticPrices = 1; | |
@@ -56,7 +56,7 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto' | |||||
56 |
|
56 | |||
57 | for (u=0; u<=MaxLit; u++) { |
|
57 | for (u=0; u<=MaxLit; u++) { | |
58 | ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); |
|
58 | ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); | |
59 |
ssPtr->litSum += ssPtr->litFreq[u]; |
|
59 | ssPtr->litSum += ssPtr->litFreq[u]; | |
60 | } |
|
60 | } | |
61 | for (u=0; u<=MaxLL; u++) |
|
61 | for (u=0; u<=MaxLL; u++) | |
62 | ssPtr->litLengthFreq[u] = 1; |
|
62 | ssPtr->litLengthFreq[u] = 1; | |
@@ -634,7 +634,7 b' void ZSTD_compressBlock_opt_generic(ZSTD' | |||||
634 | } } /* for (cur=0; cur < last_pos; ) */ |
|
634 | } } /* for (cur=0; cur < last_pos; ) */ | |
635 |
|
635 | |||
636 | /* Save reps for next block */ |
|
636 | /* Save reps for next block */ | |
637 |
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx-> |
|
637 | { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; } | |
638 |
|
638 | |||
639 | /* Last Literals */ |
|
639 | /* Last Literals */ | |
640 | { size_t const lastLLSize = iend - anchor; |
|
640 | { size_t const lastLLSize = iend - anchor; | |
@@ -825,7 +825,7 b' void ZSTD_compressBlock_opt_extDict_gene' | |||||
825 |
|
825 | |||
826 | match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); |
|
826 | match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); | |
827 |
|
827 | |||
828 | if (match_num > 0 && matches[match_num-1].len > sufficient_len) { |
|
828 | if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { | |
829 | best_mlen = matches[match_num-1].len; |
|
829 | best_mlen = matches[match_num-1].len; | |
830 | best_off = matches[match_num-1].off; |
|
830 | best_off = matches[match_num-1].off; | |
831 | last_pos = cur + 1; |
|
831 | last_pos = cur + 1; | |
@@ -835,7 +835,7 b' void ZSTD_compressBlock_opt_extDict_gene' | |||||
835 | /* set prices using matches at position = cur */ |
|
835 | /* set prices using matches at position = cur */ | |
836 | for (u = 0; u < match_num; u++) { |
|
836 | for (u = 0; u < match_num; u++) { | |
837 | mlen = (u>0) ? matches[u-1].len+1 : best_mlen; |
|
837 | mlen = (u>0) ? matches[u-1].len+1 : best_mlen; | |
838 | best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; |
|
838 | best_mlen = matches[u].len; | |
839 |
|
839 | |||
840 | while (mlen <= best_mlen) { |
|
840 | while (mlen <= best_mlen) { | |
841 | if (opt[cur].mlen == 1) { |
|
841 | if (opt[cur].mlen == 1) { | |
@@ -907,7 +907,7 b' void ZSTD_compressBlock_opt_extDict_gene' | |||||
907 | } } /* for (cur=0; cur < last_pos; ) */ |
|
907 | } } /* for (cur=0; cur < last_pos; ) */ | |
908 |
|
908 | |||
909 | /* Save reps for next block */ |
|
909 | /* Save reps for next block */ | |
910 |
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx-> |
|
910 | { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; } | |
911 |
|
911 | |||
912 | /* Last Literals */ |
|
912 | /* Last Literals */ | |
913 | { size_t lastLLSize = iend - anchor; |
|
913 | { size_t lastLLSize = iend - anchor; |
@@ -1444,7 +1444,7 b' size_t ZSTD_decompress_usingDict(ZSTD_DC' | |||||
1444 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) |
|
1444 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) | |
1445 | if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize); |
|
1445 | if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize); | |
1446 | #endif |
|
1446 | #endif | |
1447 | ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); |
|
1447 | CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); | |
1448 | ZSTD_checkContinuity(dctx, dst); |
|
1448 | ZSTD_checkContinuity(dctx, dst); | |
1449 | return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); |
|
1449 | return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); | |
1450 | } |
|
1450 | } | |
@@ -1671,9 +1671,9 b' static size_t ZSTD_loadEntropy(ZSTD_DCtx' | |||||
1671 | } |
|
1671 | } | |
1672 |
|
1672 | |||
1673 | if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); |
|
1673 | if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); | |
1674 | dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); |
|
1674 | dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); | |
1675 | dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); |
|
1675 | dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); | |
1676 | dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); |
|
1676 | dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); | |
1677 | dictPtr += 12; |
|
1677 | dictPtr += 12; | |
1678 |
|
1678 | |||
1679 | dctx->litEntropy = dctx->fseEntropy = 1; |
|
1679 | dctx->litEntropy = dctx->fseEntropy = 1; | |
@@ -1713,39 +1713,44 b' size_t ZSTD_decompressBegin_usingDict(ZS' | |||||
1713 | /* ====== ZSTD_DDict ====== */ |
|
1713 | /* ====== ZSTD_DDict ====== */ | |
1714 |
|
1714 | |||
1715 | struct ZSTD_DDict_s { |
|
1715 | struct ZSTD_DDict_s { | |
1716 | void* dict; |
|
1716 | void* dictBuffer; | |
|
1717 | const void* dictContent; | |||
1717 | size_t dictSize; |
|
1718 | size_t dictSize; | |
1718 | ZSTD_DCtx* refContext; |
|
1719 | ZSTD_DCtx* refContext; | |
1719 | }; /* typedef'd to ZSTD_DDict within "zstd.h" */ |
|
1720 | }; /* typedef'd to ZSTD_DDict within "zstd.h" */ | |
1720 |
|
1721 | |||
1721 | ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem) |
|
1722 | ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem) | |
1722 | { |
|
1723 | { | |
1723 | if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; |
|
1724 | if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; | |
1724 | if (!customMem.customAlloc || !customMem.customFree) return NULL; |
|
1725 | if (!customMem.customAlloc || !customMem.customFree) return NULL; | |
1725 |
|
1726 | |||
1726 | { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); |
|
1727 | { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); | |
1727 | void* const dictContent = ZSTD_malloc(dictSize, customMem); |
|
|||
1728 | ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem); |
|
1728 | ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem); | |
1729 |
|
1729 | |||
1730 |
if ( |
|
1730 | if (!ddict || !dctx) { | |
1731 | ZSTD_free(dictContent, customMem); |
|
|||
1732 | ZSTD_free(ddict, customMem); |
|
1731 | ZSTD_free(ddict, customMem); | |
1733 | ZSTD_free(dctx, customMem); |
|
1732 | ZSTD_free(dctx, customMem); | |
1734 | return NULL; |
|
1733 | return NULL; | |
1735 | } |
|
1734 | } | |
1736 |
|
1735 | |||
1737 | if (dictSize) { |
|
1736 | if ((byReference) || (!dict) || (!dictSize)) { | |
1738 | memcpy(dictContent, dict, dictSize); |
|
1737 | ddict->dictBuffer = NULL; | |
|
1738 | ddict->dictContent = dict; | |||
|
1739 | } else { | |||
|
1740 | void* const internalBuffer = ZSTD_malloc(dictSize, customMem); | |||
|
1741 | if (!internalBuffer) { ZSTD_free(dctx, customMem); ZSTD_free(ddict, customMem); return NULL; } | |||
|
1742 | memcpy(internalBuffer, dict, dictSize); | |||
|
1743 | ddict->dictBuffer = internalBuffer; | |||
|
1744 | ddict->dictContent = internalBuffer; | |||
1739 | } |
|
1745 | } | |
1740 | { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize); |
|
1746 | { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize); | |
1741 | if (ZSTD_isError(errorCode)) { |
|
1747 | if (ZSTD_isError(errorCode)) { | |
1742 |
ZSTD_free(d |
|
1748 | ZSTD_free(ddict->dictBuffer, customMem); | |
1743 | ZSTD_free(ddict, customMem); |
|
1749 | ZSTD_free(ddict, customMem); | |
1744 | ZSTD_free(dctx, customMem); |
|
1750 | ZSTD_free(dctx, customMem); | |
1745 | return NULL; |
|
1751 | return NULL; | |
1746 | } } |
|
1752 | } } | |
1747 |
|
1753 | |||
1748 | ddict->dict = dictContent; |
|
|||
1749 | ddict->dictSize = dictSize; |
|
1754 | ddict->dictSize = dictSize; | |
1750 | ddict->refContext = dctx; |
|
1755 | ddict->refContext = dctx; | |
1751 | return ddict; |
|
1756 | return ddict; | |
@@ -1758,15 +1763,27 b' ZSTD_DDict* ZSTD_createDDict_advanced(co' | |||||
1758 | ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) |
|
1763 | ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) | |
1759 | { |
|
1764 | { | |
1760 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
|
1765 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; | |
1761 | return ZSTD_createDDict_advanced(dict, dictSize, allocator); |
|
1766 | return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator); | |
1762 | } |
|
1767 | } | |
1763 |
|
1768 | |||
|
1769 | ||||
|
1770 | /*! ZSTD_createDDict_byReference() : | |||
|
1771 | * Create a digested dictionary, ready to start decompression operation without startup delay. | |||
|
1772 | * Dictionary content is simply referenced, and therefore stays in dictBuffer. | |||
|
1773 | * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */ | |||
|
1774 | ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) | |||
|
1775 | { | |||
|
1776 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; | |||
|
1777 | return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator); | |||
|
1778 | } | |||
|
1779 | ||||
|
1780 | ||||
1764 | size_t ZSTD_freeDDict(ZSTD_DDict* ddict) |
|
1781 | size_t ZSTD_freeDDict(ZSTD_DDict* ddict) | |
1765 | { |
|
1782 | { | |
1766 | if (ddict==NULL) return 0; /* support free on NULL */ |
|
1783 | if (ddict==NULL) return 0; /* support free on NULL */ | |
1767 | { ZSTD_customMem const cMem = ddict->refContext->customMem; |
|
1784 | { ZSTD_customMem const cMem = ddict->refContext->customMem; | |
1768 | ZSTD_freeDCtx(ddict->refContext); |
|
1785 | ZSTD_freeDCtx(ddict->refContext); | |
1769 | ZSTD_free(ddict->dict, cMem); |
|
1786 | ZSTD_free(ddict->dictBuffer, cMem); | |
1770 | ZSTD_free(ddict, cMem); |
|
1787 | ZSTD_free(ddict, cMem); | |
1771 | return 0; |
|
1788 | return 0; | |
1772 | } |
|
1789 | } | |
@@ -1775,7 +1792,7 b' size_t ZSTD_freeDDict(ZSTD_DDict* ddict)' | |||||
1775 | size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) |
|
1792 | size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) | |
1776 | { |
|
1793 | { | |
1777 | if (ddict==NULL) return 0; /* support sizeof on NULL */ |
|
1794 | if (ddict==NULL) return 0; /* support sizeof on NULL */ | |
1778 | return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize; |
|
1795 | return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ; | |
1779 | } |
|
1796 | } | |
1780 |
|
1797 | |||
1781 | /*! ZSTD_getDictID_fromDict() : |
|
1798 | /*! ZSTD_getDictID_fromDict() : | |
@@ -1796,7 +1813,7 b' unsigned ZSTD_getDictID_fromDict(const v' | |||||
1796 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) |
|
1813 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) | |
1797 | { |
|
1814 | { | |
1798 | if (ddict==NULL) return 0; |
|
1815 | if (ddict==NULL) return 0; | |
1799 | return ZSTD_getDictID_fromDict(ddict->dict, ddict->dictSize); |
|
1816 | return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); | |
1800 | } |
|
1817 | } | |
1801 |
|
1818 | |||
1802 | /*! ZSTD_getDictID_fromFrame() : |
|
1819 | /*! ZSTD_getDictID_fromFrame() : | |
@@ -1827,7 +1844,7 b' size_t ZSTD_decompress_usingDDict(ZSTD_D' | |||||
1827 | const ZSTD_DDict* ddict) |
|
1844 | const ZSTD_DDict* ddict) | |
1828 | { |
|
1845 | { | |
1829 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) |
|
1846 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) | |
1830 | if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize); |
|
1847 | if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dictContent, ddict->dictSize); | |
1831 | #endif |
|
1848 | #endif | |
1832 | ZSTD_refDCtx(dctx, ddict->refContext); |
|
1849 | ZSTD_refDCtx(dctx, ddict->refContext); | |
1833 | ZSTD_checkContinuity(dctx, dst); |
|
1850 | ZSTD_checkContinuity(dctx, dst); | |
@@ -1919,7 +1936,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D' | |||||
1919 | zds->stage = zdss_loadHeader; |
|
1936 | zds->stage = zdss_loadHeader; | |
1920 | zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; |
|
1937 | zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; | |
1921 | ZSTD_freeDDict(zds->ddictLocal); |
|
1938 | ZSTD_freeDDict(zds->ddictLocal); | |
1922 | if (dict) { |
|
1939 | if (dict && dictSize >= 8) { | |
1923 | zds->ddictLocal = ZSTD_createDDict(dict, dictSize); |
|
1940 | zds->ddictLocal = ZSTD_createDDict(dict, dictSize); | |
1924 | if (zds->ddictLocal == NULL) return ERROR(memory_allocation); |
|
1941 | if (zds->ddictLocal == NULL) return ERROR(memory_allocation); | |
1925 | } else zds->ddictLocal = NULL; |
|
1942 | } else zds->ddictLocal = NULL; | |
@@ -1956,7 +1973,7 b' size_t ZSTD_setDStreamParameter(ZSTD_DSt' | |||||
1956 | switch(paramType) |
|
1973 | switch(paramType) | |
1957 | { |
|
1974 | { | |
1958 | default : return ERROR(parameter_unknown); |
|
1975 | default : return ERROR(parameter_unknown); | |
1959 |
case |
|
1976 | case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; | |
1960 | } |
|
1977 | } | |
1961 | return 0; |
|
1978 | return 0; | |
1962 | } |
|
1979 | } | |
@@ -2007,7 +2024,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||||
2007 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) |
|
2024 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) | |
2008 | { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); |
|
2025 | { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); | |
2009 | if (legacyVersion) { |
|
2026 | if (legacyVersion) { | |
2010 | const void* const dict = zds->ddict ? zds->ddict->dict : NULL; |
|
2027 | const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL; | |
2011 | size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0; |
|
2028 | size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0; | |
2012 | CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, |
|
2029 | CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, | |
2013 | dict, dictSize)); |
|
2030 | dict, dictSize)); |
@@ -36,12 +36,11 b'' | |||||
36 | #include <time.h> /* clock */ |
|
36 | #include <time.h> /* clock */ | |
37 |
|
37 | |||
38 | #include "mem.h" /* read */ |
|
38 | #include "mem.h" /* read */ | |
39 | #include "error_private.h" |
|
|||
40 | #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ |
|
39 | #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ | |
41 | #define HUF_STATIC_LINKING_ONLY |
|
40 | #define HUF_STATIC_LINKING_ONLY | |
42 | #include "huf.h" |
|
41 | #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */ | |
43 | #include "zstd_internal.h" /* includes zstd.h */ |
|
42 | #include "zstd_internal.h" /* includes zstd.h */ | |
44 | #include "xxhash.h" |
|
43 | #include "xxhash.h" /* XXH64 */ | |
45 | #include "divsufsort.h" |
|
44 | #include "divsufsort.h" | |
46 | #ifndef ZDICT_STATIC_LINKING_ONLY |
|
45 | #ifndef ZDICT_STATIC_LINKING_ONLY | |
47 | # define ZDICT_STATIC_LINKING_ONLY |
|
46 | # define ZDICT_STATIC_LINKING_ONLY | |
@@ -61,7 +60,7 b'' | |||||
61 | #define NOISELENGTH 32 |
|
60 | #define NOISELENGTH 32 | |
62 |
|
61 | |||
63 | #define MINRATIO 4 |
|
62 | #define MINRATIO 4 | |
64 |
static const int g_compressionLevel_default = |
|
63 | static const int g_compressionLevel_default = 6; | |
65 | static const U32 g_selectivity_default = 9; |
|
64 | static const U32 g_selectivity_default = 9; | |
66 | static const size_t g_provision_entropySize = 200; |
|
65 | static const size_t g_provision_entropySize = 200; | |
67 | static const size_t g_min_fast_dictContent = 192; |
|
66 | static const size_t g_min_fast_dictContent = 192; | |
@@ -307,13 +306,13 b' static dictItem ZDICT_analyzePos(' | |||||
307 | } while (length >=MINMATCHLENGTH); |
|
306 | } while (length >=MINMATCHLENGTH); | |
308 |
|
307 | |||
309 | /* look backward */ |
|
308 | /* look backward */ | |
310 |
|
|
309 | length = MINMATCHLENGTH; | |
311 |
|
|
310 | while ((length >= MINMATCHLENGTH) & (start > 0)) { | |
312 |
|
|
311 | length = ZDICT_count(b + pos, b + suffix[start - 1]); | |
313 |
|
|
312 | if (length >= LLIMIT) length = LLIMIT - 1; | |
314 |
|
|
313 | lengthList[length]++; | |
315 |
|
|
314 | if (length >= MINMATCHLENGTH) start--; | |
316 | } |
|
315 | } | |
317 |
|
316 | |||
318 | /* largest useful length */ |
|
317 | /* largest useful length */ | |
319 | memset(cumulLength, 0, sizeof(cumulLength)); |
|
318 | memset(cumulLength, 0, sizeof(cumulLength)); | |
@@ -570,7 +569,7 b' static void ZDICT_countEStats(EStats_res' | |||||
570 | if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } |
|
569 | if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } | |
571 | } |
|
570 | } | |
572 | cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize); |
|
571 | cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize); | |
573 |
if (ZSTD_isError(cSize)) { DISPLAYLEVEL( |
|
572 | if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; } | |
574 |
|
573 | |||
575 | if (cSize) { /* if == 0; block is not compressible */ |
|
574 | if (cSize) { /* if == 0; block is not compressible */ | |
576 | const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc); |
|
575 | const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc); | |
@@ -825,6 +824,55 b' static size_t ZDICT_analyzeEntropy(void*' | |||||
825 | } |
|
824 | } | |
826 |
|
825 | |||
827 |
|
826 | |||
|
827 | ||||
|
828 | size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, | |||
|
829 | const void* customDictContent, size_t dictContentSize, | |||
|
830 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |||
|
831 | ZDICT_params_t params) | |||
|
832 | { | |||
|
833 | size_t hSize; | |||
|
834 | #define HBUFFSIZE 256 | |||
|
835 | BYTE header[HBUFFSIZE]; | |||
|
836 | int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel; | |||
|
837 | U32 const notificationLevel = params.notificationLevel; | |||
|
838 | ||||
|
839 | /* check conditions */ | |||
|
840 | if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall); | |||
|
841 | if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong); | |||
|
842 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall); | |||
|
843 | ||||
|
844 | /* dictionary header */ | |||
|
845 | MEM_writeLE32(header, ZSTD_DICT_MAGIC); | |||
|
846 | { U64 const randomID = XXH64(customDictContent, dictContentSize, 0); | |||
|
847 | U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; | |||
|
848 | U32 const dictID = params.dictID ? params.dictID : compliantID; | |||
|
849 | MEM_writeLE32(header+4, dictID); | |||
|
850 | } | |||
|
851 | hSize = 8; | |||
|
852 | ||||
|
853 | /* entropy tables */ | |||
|
854 | DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ | |||
|
855 | DISPLAYLEVEL(2, "statistics ... \n"); | |||
|
856 | { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize, | |||
|
857 | compressionLevel, | |||
|
858 | samplesBuffer, samplesSizes, nbSamples, | |||
|
859 | customDictContent, dictContentSize, | |||
|
860 | notificationLevel); | |||
|
861 | if (ZDICT_isError(eSize)) return eSize; | |||
|
862 | hSize += eSize; | |||
|
863 | } | |||
|
864 | ||||
|
865 | /* copy elements in final buffer ; note : src and dst buffer can overlap */ | |||
|
866 | if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; | |||
|
867 | { size_t const dictSize = hSize + dictContentSize; | |||
|
868 | char* dictEnd = (char*)dictBuffer + dictSize; | |||
|
869 | memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); | |||
|
870 | memcpy(dictBuffer, header, hSize); | |||
|
871 | return dictSize; | |||
|
872 | } | |||
|
873 | } | |||
|
874 | ||||
|
875 | ||||
828 | size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, |
|
876 | size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, | |
829 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
|
877 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |
830 | ZDICT_params_t params) |
|
878 | ZDICT_params_t params) |
@@ -19,15 +19,18 b' extern "C" {' | |||||
19 | #include <stddef.h> /* size_t */ |
|
19 | #include <stddef.h> /* size_t */ | |
20 |
|
20 | |||
21 |
|
21 | |||
22 | /*====== Export for Windows ======*/ |
|
22 | /* ===== ZDICTLIB_API : control library symbols visibility ===== */ | |
23 | /*! |
|
23 | #if defined(__GNUC__) && (__GNUC__ >= 4) | |
24 | * ZSTD_DLL_EXPORT : |
|
24 | # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) | |
25 | * Enable exporting of functions when building a Windows DLL |
|
|||
26 | */ |
|
|||
27 | #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) |
|
|||
28 | # define ZDICTLIB_API __declspec(dllexport) |
|
|||
29 | #else |
|
25 | #else | |
30 |
# define ZDICTLIB_ |
|
26 | # define ZDICTLIB_VISIBILITY | |
|
27 | #endif | |||
|
28 | #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |||
|
29 | # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY | |||
|
30 | #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |||
|
31 | # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |||
|
32 | #else | |||
|
33 | # define ZDICTLIB_API ZDICTLIB_VISIBILITY | |||
31 | #endif |
|
34 | #endif | |
32 |
|
35 | |||
33 |
|
36 | |||
@@ -79,27 +82,114 b' typedef struct {' | |||||
79 | or an error code, which can be tested by ZDICT_isError(). |
|
82 | or an error code, which can be tested by ZDICT_isError(). | |
80 | note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0. |
|
83 | note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0. | |
81 | */ |
|
84 | */ | |
82 | size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, |
|
85 | ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, | |
|
86 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |||
|
87 | ZDICT_params_t parameters); | |||
|
88 | ||||
|
89 | /*! COVER_params_t : | |||
|
90 | For all values 0 means default. | |||
|
91 | kMin and d are the only required parameters. | |||
|
92 | */ | |||
|
93 | typedef struct { | |||
|
94 | unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ | |||
|
95 | unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ | |||
|
96 | unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ | |||
|
97 | ||||
|
98 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ | |||
|
99 | unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ | |||
|
100 | unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */ | |||
|
101 | int compressionLevel; /* 0 means default; target a specific zstd compression level */ | |||
|
102 | } COVER_params_t; | |||
|
103 | ||||
|
104 | ||||
|
105 | /*! COVER_trainFromBuffer() : | |||
|
106 | Train a dictionary from an array of samples using the COVER algorithm. | |||
|
107 | Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
|
108 | supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
|
109 | The resulting dictionary will be saved into `dictBuffer`. | |||
|
110 | @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
|
111 | or an error code, which can be tested with ZDICT_isError(). | |||
|
112 | Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte. | |||
|
113 | Tips : In general, a reasonable dictionary has a size of ~ 100 KB. | |||
|
114 | It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. | |||
|
115 | In general, it's recommended to provide a few thousands samples, but this can vary a lot. | |||
|
116 | It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
|
117 | */ | |||
|
118 | ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, | |||
|
119 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |||
|
120 | COVER_params_t parameters); | |||
|
121 | ||||
|
122 | /*! COVER_optimizeTrainFromBuffer() : | |||
|
123 | The same requirements as above hold for all the parameters except `parameters`. | |||
|
124 | This function tries many parameter combinations and picks the best parameters. | |||
|
125 | `*parameters` is filled with the best parameters found, and the dictionary | |||
|
126 | constructed with those parameters is stored in `dictBuffer`. | |||
|
127 | ||||
|
128 | All of the parameters d, k, steps are optional. | |||
|
129 | If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. | |||
|
130 | if steps is zero it defaults to its default value. | |||
|
131 | If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. | |||
|
132 | ||||
|
133 | @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
|
134 | or an error code, which can be tested with ZDICT_isError(). | |||
|
135 | On success `*parameters` contains the parameters selected. | |||
|
136 | Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. | |||
|
137 | */ | |||
|
138 | ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, | |||
|
139 | const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, | |||
|
140 | COVER_params_t *parameters); | |||
|
141 | ||||
|
142 | /*! ZDICT_finalizeDictionary() : | |||
|
143 | ||||
|
144 | Given a custom content as a basis for dictionary, and a set of samples, | |||
|
145 | finalize dictionary by adding headers and statistics. | |||
|
146 | ||||
|
147 | Samples must be stored concatenated in a flat buffer `samplesBuffer`, | |||
|
148 | supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. | |||
|
149 | ||||
|
150 | dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes. | |||
|
151 | maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes. | |||
|
152 | ||||
|
153 | @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), | |||
|
154 | or an error code, which can be tested by ZDICT_isError(). | |||
|
155 | note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. | |||
|
156 | note 2 : dictBuffer and customDictContent can overlap | |||
|
157 | */ | |||
|
158 | #define ZDICT_CONTENTSIZE_MIN 256 | |||
|
159 | #define ZDICT_DICTSIZE_MIN 512 | |||
|
160 | ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, | |||
|
161 | const void* customDictContent, size_t dictContentSize, | |||
83 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, |
|
162 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |
84 | ZDICT_params_t parameters); |
|
163 | ZDICT_params_t parameters); | |
85 |
|
164 | |||
86 |
|
165 | |||
87 | /*! ZDICT_addEntropyTablesFromBuffer() : |
|
|||
88 |
|
||||
89 | Given a content-only dictionary (built using any 3rd party algorithm), |
|
|||
90 | add entropy tables computed from an array of samples. |
|
|||
91 | Samples must be stored concatenated in a flat buffer `samplesBuffer`, |
|
|||
92 | supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. |
|
|||
93 |
|
166 | |||
94 | The input dictionary content must be stored *at the end* of `dictBuffer`. |
|
167 | /* Deprecation warnings */ | |
95 | Its size is `dictContentSize`. |
|
168 | /* It is generally possible to disable deprecation warnings from compiler, | |
96 | The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*, |
|
169 | for example with -Wno-deprecated-declarations for gcc | |
97 | starting from its beginning. |
|
170 | or _CRT_SECURE_NO_WARNINGS in Visual. | |
98 | @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`). |
|
171 | Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ | |
99 | */ |
|
172 | #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS | |
|
173 | # define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ | |||
|
174 | #else | |||
|
175 | # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) | |||
|
176 | # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ | |||
|
177 | # define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]] | |||
|
178 | # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) | |||
|
179 | # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) | |||
|
180 | # elif (ZDICT_GCC_VERSION >= 301) | |||
|
181 | # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) | |||
|
182 | # elif defined(_MSC_VER) | |||
|
183 | # define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) | |||
|
184 | # else | |||
|
185 | # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") | |||
|
186 | # define ZDICT_DEPRECATED(message) ZDICTLIB_API | |||
|
187 | # endif | |||
|
188 | #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ | |||
|
189 | ||||
|
190 | ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") | |||
100 | size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, |
|
191 | size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, | |
101 |
|
|
192 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); | |
102 |
|
||||
103 |
|
193 | |||
104 |
|
194 | |||
105 | #endif /* ZDICT_STATIC_LINKING_ONLY */ |
|
195 | #endif /* ZDICT_STATIC_LINKING_ONLY */ |
@@ -20,13 +20,16 b' extern "C" {' | |||||
20 |
|
20 | |||
21 | /* ===== ZSTDLIB_API : control library symbols visibility ===== */ |
|
21 | /* ===== ZSTDLIB_API : control library symbols visibility ===== */ | |
22 | #if defined(__GNUC__) && (__GNUC__ >= 4) |
|
22 | #if defined(__GNUC__) && (__GNUC__ >= 4) | |
23 |
# define ZSTDLIB_ |
|
23 | # define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) | |
24 | #elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) |
|
24 | #else | |
25 | # define ZSTDLIB_API __declspec(dllexport) |
|
25 | # define ZSTDLIB_VISIBILITY | |
|
26 | #endif | |||
|
27 | #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |||
|
28 | # define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY | |||
26 | #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) |
|
29 | #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |
27 | # define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ |
|
30 | # define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |
28 | #else |
|
31 | #else | |
29 | # define ZSTDLIB_API |
|
32 | # define ZSTDLIB_API ZSTDLIB_VISIBILITY | |
30 | #endif |
|
33 | #endif | |
31 |
|
34 | |||
32 |
|
35 | |||
@@ -53,7 +56,7 b' extern "C" {' | |||||
53 | /*------ Version ------*/ |
|
56 | /*------ Version ------*/ | |
54 | #define ZSTD_VERSION_MAJOR 1 |
|
57 | #define ZSTD_VERSION_MAJOR 1 | |
55 | #define ZSTD_VERSION_MINOR 1 |
|
58 | #define ZSTD_VERSION_MINOR 1 | |
56 |
#define ZSTD_VERSION_RELEASE |
|
59 | #define ZSTD_VERSION_RELEASE 3 | |
57 |
|
60 | |||
58 | #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE |
|
61 | #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE | |
59 | #define ZSTD_QUOTE(str) #str |
|
62 | #define ZSTD_QUOTE(str) #str | |
@@ -170,8 +173,8 b' typedef struct ZSTD_CDict_s ZSTD_CDict;' | |||||
170 | * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. |
|
173 | * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. | |
171 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. |
|
174 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. | |
172 | * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. |
|
175 | * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. | |
173 |
* `dict` can be released after ZSTD_CDict creation |
|
176 | * `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */ | |
174 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel); |
|
177 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); | |
175 |
|
178 | |||
176 | /*! ZSTD_freeCDict() : |
|
179 | /*! ZSTD_freeCDict() : | |
177 | * Function frees memory allocated by ZSTD_createCDict(). */ |
|
180 | * Function frees memory allocated by ZSTD_createCDict(). */ | |
@@ -191,8 +194,8 b' typedef struct ZSTD_DDict_s ZSTD_DDict;' | |||||
191 |
|
194 | |||
192 | /*! ZSTD_createDDict() : |
|
195 | /*! ZSTD_createDDict() : | |
193 | * Create a digested dictionary, ready to start decompression operation without startup delay. |
|
196 | * Create a digested dictionary, ready to start decompression operation without startup delay. | |
194 |
* |
|
197 | * dictBuffer can be released after DDict creation, as its content is copied inside DDict */ | |
195 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize); |
|
198 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); | |
196 |
|
199 | |||
197 | /*! ZSTD_freeDDict() : |
|
200 | /*! ZSTD_freeDDict() : | |
198 | * Function frees memory allocated with ZSTD_createDDict() */ |
|
201 | * Function frees memory allocated with ZSTD_createDDict() */ | |
@@ -325,7 +328,7 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v' | |||||
325 | * ***************************************************************************************/ |
|
328 | * ***************************************************************************************/ | |
326 |
|
329 | |||
327 | /* --- Constants ---*/ |
|
330 | /* --- Constants ---*/ | |
328 | #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */ |
|
331 | #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ | |
329 | #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U |
|
332 | #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U | |
330 |
|
333 | |||
331 | #define ZSTD_WINDOWLOG_MAX_32 25 |
|
334 | #define ZSTD_WINDOWLOG_MAX_32 25 | |
@@ -345,8 +348,9 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v' | |||||
345 | #define ZSTD_TARGETLENGTH_MAX 999 |
|
348 | #define ZSTD_TARGETLENGTH_MAX 999 | |
346 |
|
349 | |||
347 | #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ |
|
350 | #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ | |
|
351 | #define ZSTD_FRAMEHEADERSIZE_MIN 6 | |||
348 | static const size_t ZSTD_frameHeaderSize_prefix = 5; |
|
352 | static const size_t ZSTD_frameHeaderSize_prefix = 5; | |
349 |
static const size_t ZSTD_frameHeaderSize_min = |
|
353 | static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; | |
350 | static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; |
|
354 | static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; | |
351 | static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ |
|
355 | static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ | |
352 |
|
356 | |||
@@ -365,9 +369,9 b' typedef struct {' | |||||
365 | } ZSTD_compressionParameters; |
|
369 | } ZSTD_compressionParameters; | |
366 |
|
370 | |||
367 | typedef struct { |
|
371 | typedef struct { | |
368 |
unsigned contentSizeFlag; /**< 1: content size will be in frame header ( |
|
372 | unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */ | |
369 |
unsigned checksumFlag; /**< 1: |
|
373 | unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */ | |
370 |
unsigned noDictIDFlag; /**< 1: no dict |
|
374 | unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */ | |
371 | } ZSTD_frameParameters; |
|
375 | } ZSTD_frameParameters; | |
372 |
|
376 | |||
373 | typedef struct { |
|
377 | typedef struct { | |
@@ -397,9 +401,23 b' ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_a' | |||||
397 | * Gives the amount of memory used by a given ZSTD_CCtx */ |
|
401 | * Gives the amount of memory used by a given ZSTD_CCtx */ | |
398 | ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); |
|
402 | ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); | |
399 |
|
403 | |||
|
404 | typedef enum { | |||
|
405 | ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/ | |||
|
406 | } ZSTD_CCtxParameter; | |||
|
407 | /*! ZSTD_setCCtxParameter() : | |||
|
408 | * Set advanced parameters, selected through enum ZSTD_CCtxParameter | |||
|
409 | * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ | |||
|
410 | ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); | |||
|
411 | ||||
|
412 | /*! ZSTD_createCDict_byReference() : | |||
|
413 | * Create a digested dictionary for compression | |||
|
414 | * Dictionary content is simply referenced, and therefore stays in dictBuffer. | |||
|
415 | * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */ | |||
|
416 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); | |||
|
417 | ||||
400 | /*! ZSTD_createCDict_advanced() : |
|
418 | /*! ZSTD_createCDict_advanced() : | |
401 | * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ |
|
419 | * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ | |
402 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, |
|
420 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, | |
403 | ZSTD_parameters params, ZSTD_customMem customMem); |
|
421 | ZSTD_parameters params, ZSTD_customMem customMem); | |
404 |
|
422 | |||
405 | /*! ZSTD_sizeof_CDict() : |
|
423 | /*! ZSTD_sizeof_CDict() : | |
@@ -455,6 +473,15 b' ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_a' | |||||
455 | * Gives the amount of memory used by a given ZSTD_DCtx */ |
|
473 | * Gives the amount of memory used by a given ZSTD_DCtx */ | |
456 | ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); |
|
474 | ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); | |
457 |
|
475 | |||
|
476 | /*! ZSTD_createDDict_byReference() : | |||
|
477 | * Create a digested dictionary, ready to start decompression operation without startup delay. | |||
|
478 | * Dictionary content is simply referenced, and therefore stays in dictBuffer. | |||
|
479 | * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */ | |||
|
480 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); | |||
|
481 | ||||
|
482 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, | |||
|
483 | unsigned byReference, ZSTD_customMem customMem); | |||
|
484 | ||||
458 | /*! ZSTD_sizeof_DDict() : |
|
485 | /*! ZSTD_sizeof_DDict() : | |
459 | * Gives the amount of memory used by a given ZSTD_DDict */ |
|
486 | * Gives the amount of memory used by a given ZSTD_DDict */ | |
460 | ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); |
|
487 | ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); | |
@@ -463,13 +490,13 b' ZSTDLIB_API size_t ZSTD_sizeof_DDict(con' | |||||
463 | * Provides the dictID stored within dictionary. |
|
490 | * Provides the dictID stored within dictionary. | |
464 | * if @return == 0, the dictionary is not conformant with Zstandard specification. |
|
491 | * if @return == 0, the dictionary is not conformant with Zstandard specification. | |
465 | * It can still be loaded, but as a content-only dictionary. */ |
|
492 | * It can still be loaded, but as a content-only dictionary. */ | |
466 | unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); |
|
493 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); | |
467 |
|
494 | |||
468 | /*! ZSTD_getDictID_fromDDict() : |
|
495 | /*! ZSTD_getDictID_fromDDict() : | |
469 | * Provides the dictID of the dictionary loaded into `ddict`. |
|
496 | * Provides the dictID of the dictionary loaded into `ddict`. | |
470 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. |
|
497 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | |
471 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ |
|
498 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | |
472 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); |
|
499 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); | |
473 |
|
500 | |||
474 | /*! ZSTD_getDictID_fromFrame() : |
|
501 | /*! ZSTD_getDictID_fromFrame() : | |
475 | * Provides the dictID required to decompressed the frame stored within `src`. |
|
502 | * Provides the dictID required to decompressed the frame stored within `src`. | |
@@ -481,7 +508,7 b' unsigned ZSTD_getDictID_fromDDict(const ' | |||||
481 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). |
|
508 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). | |
482 | * - This is not a Zstandard frame. |
|
509 | * - This is not a Zstandard frame. | |
483 | * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ |
|
510 | * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ | |
484 | unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); |
|
511 | ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); | |
485 |
|
512 | |||
486 |
|
513 | |||
487 | /******************************************************************** |
|
514 | /******************************************************************** | |
@@ -491,7 +518,7 b' unsigned ZSTD_getDictID_fromFrame(const ' | |||||
491 | /*===== Advanced Streaming compression functions =====*/ |
|
518 | /*===== Advanced Streaming compression functions =====*/ | |
492 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); |
|
519 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); | |
493 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */ |
|
520 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */ | |
494 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); |
|
521 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ | |
495 | ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, |
|
522 | ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, | |
496 | ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ |
|
523 | ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ | |
497 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ |
|
524 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ | |
@@ -500,9 +527,9 b' ZSTDLIB_API size_t ZSTD_sizeof_CStream(c' | |||||
500 |
|
527 | |||
501 |
|
528 | |||
502 | /*===== Advanced Streaming decompression functions =====*/ |
|
529 | /*===== Advanced Streaming decompression functions =====*/ | |
503 |
typedef enum { |
|
530 | typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; | |
504 | ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); |
|
531 | ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); | |
505 | ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); |
|
532 | ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ | |
506 | ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); |
|
533 | ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); | |
507 | ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ |
|
534 | ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ | |
508 | ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ |
|
535 | ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ | |
@@ -542,10 +569,10 b' ZSTDLIB_API size_t ZSTD_sizeof_DStream(c' | |||||
542 | In which case, it will "discard" the relevant memory section from its history. |
|
569 | In which case, it will "discard" the relevant memory section from its history. | |
543 |
|
570 | |||
544 | Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. |
|
571 | Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. | |
545 |
It's possible to use |
|
572 | It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. | |
546 |
Without last block mark, frames will be considered unfinished ( |
|
573 | Without last block mark, frames will be considered unfinished (corrupted) by decoders. | |
547 |
|
574 | |||
548 |
|
|
575 | `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame. | |
549 | */ |
|
576 | */ | |
550 |
|
577 | |||
551 | /*===== Buffer-less streaming compression functions =====*/ |
|
578 | /*===== Buffer-less streaming compression functions =====*/ | |
@@ -553,6 +580,7 b' ZSTDLIB_API size_t ZSTD_compressBegin(ZS' | |||||
553 | ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); |
|
580 | ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); | |
554 | ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); |
|
581 | ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); | |
555 | ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); |
|
582 | ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); | |
|
583 | ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); | |||
556 | ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); |
|
584 | ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |
557 | ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); |
|
585 | ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |
558 |
|
586 |
This diff has been collapsed as it changes many lines, (1016 lines changed) Show them Hide them | |||||
@@ -8,145 +8,1035 b'' | |||||
8 |
|
8 | |||
9 | from __future__ import absolute_import, unicode_literals |
|
9 | from __future__ import absolute_import, unicode_literals | |
10 |
|
10 | |||
11 |
import |
|
11 | import sys | |
12 |
|
12 | |||
13 | from _zstd_cffi import ( |
|
13 | from _zstd_cffi import ( | |
14 | ffi, |
|
14 | ffi, | |
15 | lib, |
|
15 | lib, | |
16 | ) |
|
16 | ) | |
17 |
|
17 | |||
|
18 | if sys.version_info[0] == 2: | |||
|
19 | bytes_type = str | |||
|
20 | int_type = long | |||
|
21 | else: | |||
|
22 | bytes_type = bytes | |||
|
23 | int_type = int | |||
18 |
|
24 | |||
19 | _CSTREAM_IN_SIZE = lib.ZSTD_CStreamInSize() |
|
25 | ||
20 |
|
|
26 | COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize() | |
|
27 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize() | |||
|
28 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize() | |||
|
29 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize() | |||
|
30 | ||||
|
31 | new_nonzero = ffi.new_allocator(should_clear_after_alloc=False) | |||
|
32 | ||||
|
33 | ||||
|
34 | MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel() | |||
|
35 | MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER | |||
|
36 | FRAME_HEADER = b'\x28\xb5\x2f\xfd' | |||
|
37 | ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE) | |||
|
38 | ||||
|
39 | WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN | |||
|
40 | WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX | |||
|
41 | CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN | |||
|
42 | CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX | |||
|
43 | HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN | |||
|
44 | HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX | |||
|
45 | HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX | |||
|
46 | SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN | |||
|
47 | SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX | |||
|
48 | SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN | |||
|
49 | SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX | |||
|
50 | TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN | |||
|
51 | TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX | |||
|
52 | ||||
|
53 | STRATEGY_FAST = lib.ZSTD_fast | |||
|
54 | STRATEGY_DFAST = lib.ZSTD_dfast | |||
|
55 | STRATEGY_GREEDY = lib.ZSTD_greedy | |||
|
56 | STRATEGY_LAZY = lib.ZSTD_lazy | |||
|
57 | STRATEGY_LAZY2 = lib.ZSTD_lazy2 | |||
|
58 | STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2 | |||
|
59 | STRATEGY_BTOPT = lib.ZSTD_btopt | |||
|
60 | ||||
|
61 | COMPRESSOBJ_FLUSH_FINISH = 0 | |||
|
62 | COMPRESSOBJ_FLUSH_BLOCK = 1 | |||
|
63 | ||||
|
64 | ||||
|
65 | class ZstdError(Exception): | |||
|
66 | pass | |||
21 |
|
67 | |||
22 |
|
68 | |||
23 |
class |
|
69 | class CompressionParameters(object): | |
24 | def __init__(self, cstream, writer): |
|
70 | def __init__(self, window_log, chain_log, hash_log, search_log, | |
25 | self._cstream = cstream |
|
71 | search_length, target_length, strategy): | |
|
72 | if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX: | |||
|
73 | raise ValueError('invalid window log value') | |||
|
74 | ||||
|
75 | if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX: | |||
|
76 | raise ValueError('invalid chain log value') | |||
|
77 | ||||
|
78 | if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX: | |||
|
79 | raise ValueError('invalid hash log value') | |||
|
80 | ||||
|
81 | if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX: | |||
|
82 | raise ValueError('invalid search log value') | |||
|
83 | ||||
|
84 | if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX: | |||
|
85 | raise ValueError('invalid search length value') | |||
|
86 | ||||
|
87 | if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX: | |||
|
88 | raise ValueError('invalid target length value') | |||
|
89 | ||||
|
90 | if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT: | |||
|
91 | raise ValueError('invalid strategy value') | |||
|
92 | ||||
|
93 | self.window_log = window_log | |||
|
94 | self.chain_log = chain_log | |||
|
95 | self.hash_log = hash_log | |||
|
96 | self.search_log = search_log | |||
|
97 | self.search_length = search_length | |||
|
98 | self.target_length = target_length | |||
|
99 | self.strategy = strategy | |||
|
100 | ||||
|
101 | def as_compression_parameters(self): | |||
|
102 | p = ffi.new('ZSTD_compressionParameters *')[0] | |||
|
103 | p.windowLog = self.window_log | |||
|
104 | p.chainLog = self.chain_log | |||
|
105 | p.hashLog = self.hash_log | |||
|
106 | p.searchLog = self.search_log | |||
|
107 | p.searchLength = self.search_length | |||
|
108 | p.targetLength = self.target_length | |||
|
109 | p.strategy = self.strategy | |||
|
110 | ||||
|
111 | return p | |||
|
112 | ||||
|
113 | def get_compression_parameters(level, source_size=0, dict_size=0): | |||
|
114 | params = lib.ZSTD_getCParams(level, source_size, dict_size) | |||
|
115 | return CompressionParameters(window_log=params.windowLog, | |||
|
116 | chain_log=params.chainLog, | |||
|
117 | hash_log=params.hashLog, | |||
|
118 | search_log=params.searchLog, | |||
|
119 | search_length=params.searchLength, | |||
|
120 | target_length=params.targetLength, | |||
|
121 | strategy=params.strategy) | |||
|
122 | ||||
|
123 | ||||
|
124 | def estimate_compression_context_size(params): | |||
|
125 | if not isinstance(params, CompressionParameters): | |||
|
126 | raise ValueError('argument must be a CompressionParameters') | |||
|
127 | ||||
|
128 | cparams = params.as_compression_parameters() | |||
|
129 | return lib.ZSTD_estimateCCtxSize(cparams) | |||
|
130 | ||||
|
131 | ||||
|
132 | def estimate_decompression_context_size(): | |||
|
133 | return lib.ZSTD_estimateDCtxSize() | |||
|
134 | ||||
|
135 | ||||
|
136 | class ZstdCompressionWriter(object): | |||
|
137 | def __init__(self, compressor, writer, source_size, write_size): | |||
|
138 | self._compressor = compressor | |||
26 | self._writer = writer |
|
139 | self._writer = writer | |
|
140 | self._source_size = source_size | |||
|
141 | self._write_size = write_size | |||
|
142 | self._entered = False | |||
27 |
|
143 | |||
28 | def __enter__(self): |
|
144 | def __enter__(self): | |
|
145 | if self._entered: | |||
|
146 | raise ZstdError('cannot __enter__ multiple times') | |||
|
147 | ||||
|
148 | self._cstream = self._compressor._get_cstream(self._source_size) | |||
|
149 | self._entered = True | |||
29 | return self |
|
150 | return self | |
30 |
|
151 | |||
31 | def __exit__(self, exc_type, exc_value, exc_tb): |
|
152 | def __exit__(self, exc_type, exc_value, exc_tb): | |
|
153 | self._entered = False | |||
|
154 | ||||
32 | if not exc_type and not exc_value and not exc_tb: |
|
155 | if not exc_type and not exc_value and not exc_tb: | |
33 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
156 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
34 |
|
|
157 | dst_buffer = ffi.new('char[]', self._write_size) | |
35 |
out_buffer. |
|
158 | out_buffer.dst = dst_buffer | |
|
159 | out_buffer.size = self._write_size | |||
36 | out_buffer.pos = 0 |
|
160 | out_buffer.pos = 0 | |
37 |
|
161 | |||
38 | while True: |
|
162 | while True: | |
39 | res = lib.ZSTD_endStream(self._cstream, out_buffer) |
|
163 | zresult = lib.ZSTD_endStream(self._cstream, out_buffer) | |
40 | if lib.ZSTD_isError(res): |
|
164 | if lib.ZSTD_isError(zresult): | |
41 |
raise |
|
165 | raise ZstdError('error ending compression stream: %s' % | |
|
166 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
42 |
|
167 | |||
43 | if out_buffer.pos: |
|
168 | if out_buffer.pos: | |
44 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
|
169 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |
45 | out_buffer.pos = 0 |
|
170 | out_buffer.pos = 0 | |
46 |
|
171 | |||
47 | if res == 0: |
|
172 | if zresult == 0: | |
48 | break |
|
173 | break | |
49 |
|
174 | |||
|
175 | self._cstream = None | |||
|
176 | self._compressor = None | |||
|
177 | ||||
50 | return False |
|
178 | return False | |
51 |
|
179 | |||
|
180 | def memory_size(self): | |||
|
181 | if not self._entered: | |||
|
182 | raise ZstdError('cannot determine size of an inactive compressor; ' | |||
|
183 | 'call when a context manager is active') | |||
|
184 | ||||
|
185 | return lib.ZSTD_sizeof_CStream(self._cstream) | |||
|
186 | ||||
52 | def write(self, data): |
|
187 | def write(self, data): | |
|
188 | if not self._entered: | |||
|
189 | raise ZstdError('write() must be called from an active context ' | |||
|
190 | 'manager') | |||
|
191 | ||||
|
192 | total_write = 0 | |||
|
193 | ||||
|
194 | data_buffer = ffi.from_buffer(data) | |||
|
195 | ||||
|
196 | in_buffer = ffi.new('ZSTD_inBuffer *') | |||
|
197 | in_buffer.src = data_buffer | |||
|
198 | in_buffer.size = len(data_buffer) | |||
|
199 | in_buffer.pos = 0 | |||
|
200 | ||||
53 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
201 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
54 |
|
|
202 | dst_buffer = ffi.new('char[]', self._write_size) | |
55 |
out_buffer. |
|
203 | out_buffer.dst = dst_buffer | |
|
204 | out_buffer.size = self._write_size | |||
|
205 | out_buffer.pos = 0 | |||
|
206 | ||||
|
207 | while in_buffer.pos < in_buffer.size: | |||
|
208 | zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer) | |||
|
209 | if lib.ZSTD_isError(zresult): | |||
|
210 | raise ZstdError('zstd compress error: %s' % | |||
|
211 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
212 | ||||
|
213 | if out_buffer.pos: | |||
|
214 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |||
|
215 | total_write += out_buffer.pos | |||
|
216 | out_buffer.pos = 0 | |||
|
217 | ||||
|
218 | return total_write | |||
|
219 | ||||
|
220 | def flush(self): | |||
|
221 | if not self._entered: | |||
|
222 | raise ZstdError('flush must be called from an active context manager') | |||
|
223 | ||||
|
224 | total_write = 0 | |||
|
225 | ||||
|
226 | out_buffer = ffi.new('ZSTD_outBuffer *') | |||
|
227 | dst_buffer = ffi.new('char[]', self._write_size) | |||
|
228 | out_buffer.dst = dst_buffer | |||
|
229 | out_buffer.size = self._write_size | |||
56 | out_buffer.pos = 0 |
|
230 | out_buffer.pos = 0 | |
57 |
|
231 | |||
58 | # TODO can we reuse existing memory? |
|
232 | while True: | |
59 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
233 | zresult = lib.ZSTD_flushStream(self._cstream, out_buffer) | |
60 | in_buffer.src = ffi.new('char[]', data) |
|
234 | if lib.ZSTD_isError(zresult): | |
61 | in_buffer.size = len(data) |
|
235 | raise ZstdError('zstd compress error: %s' % | |
62 | in_buffer.pos = 0 |
|
236 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
63 | while in_buffer.pos < in_buffer.size: |
|
237 | ||
64 | res = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer) |
|
238 | if not out_buffer.pos: | |
65 | if lib.ZSTD_isError(res): |
|
239 | break | |
66 | raise Exception('zstd compress error: %s' % lib.ZSTD_getErrorName(res)) |
|
240 | ||
|
241 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |||
|
242 | total_write += out_buffer.pos | |||
|
243 | out_buffer.pos = 0 | |||
|
244 | ||||
|
245 | return total_write | |||
|
246 | ||||
|
247 | ||||
|
248 | class ZstdCompressionObj(object): | |||
|
249 | def compress(self, data): | |||
|
250 | if self._finished: | |||
|
251 | raise ZstdError('cannot call compress() after compressor finished') | |||
|
252 | ||||
|
253 | data_buffer = ffi.from_buffer(data) | |||
|
254 | source = ffi.new('ZSTD_inBuffer *') | |||
|
255 | source.src = data_buffer | |||
|
256 | source.size = len(data_buffer) | |||
|
257 | source.pos = 0 | |||
|
258 | ||||
|
259 | chunks = [] | |||
|
260 | ||||
|
261 | while source.pos < len(data): | |||
|
262 | zresult = lib.ZSTD_compressStream(self._cstream, self._out, source) | |||
|
263 | if lib.ZSTD_isError(zresult): | |||
|
264 | raise ZstdError('zstd compress error: %s' % | |||
|
265 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
266 | ||||
|
267 | if self._out.pos: | |||
|
268 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) | |||
|
269 | self._out.pos = 0 | |||
|
270 | ||||
|
271 | return b''.join(chunks) | |||
67 |
|
272 | |||
68 | if out_buffer.pos: |
|
273 | def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH): | |
69 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
|
274 | if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK): | |
70 | out_buffer.pos = 0 |
|
275 | raise ValueError('flush mode not recognized') | |
|
276 | ||||
|
277 | if self._finished: | |||
|
278 | raise ZstdError('compressor object already finished') | |||
|
279 | ||||
|
280 | assert self._out.pos == 0 | |||
|
281 | ||||
|
282 | if flush_mode == COMPRESSOBJ_FLUSH_BLOCK: | |||
|
283 | zresult = lib.ZSTD_flushStream(self._cstream, self._out) | |||
|
284 | if lib.ZSTD_isError(zresult): | |||
|
285 | raise ZstdError('zstd compress error: %s' % | |||
|
286 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
287 | ||||
|
288 | # Output buffer is guaranteed to hold full block. | |||
|
289 | assert zresult == 0 | |||
|
290 | ||||
|
291 | if self._out.pos: | |||
|
292 | result = ffi.buffer(self._out.dst, self._out.pos)[:] | |||
|
293 | self._out.pos = 0 | |||
|
294 | return result | |||
|
295 | else: | |||
|
296 | return b'' | |||
|
297 | ||||
|
298 | assert flush_mode == COMPRESSOBJ_FLUSH_FINISH | |||
|
299 | self._finished = True | |||
|
300 | ||||
|
301 | chunks = [] | |||
|
302 | ||||
|
303 | while True: | |||
|
304 | zresult = lib.ZSTD_endStream(self._cstream, self._out) | |||
|
305 | if lib.ZSTD_isError(zresult): | |||
|
306 | raise ZstdError('error ending compression stream: %s' % | |||
|
307 | ffi.string(lib.ZSTD_getErroName(zresult))) | |||
|
308 | ||||
|
309 | if self._out.pos: | |||
|
310 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) | |||
|
311 | self._out.pos = 0 | |||
|
312 | ||||
|
313 | if not zresult: | |||
|
314 | break | |||
|
315 | ||||
|
316 | # GC compression stream immediately. | |||
|
317 | self._cstream = None | |||
|
318 | ||||
|
319 | return b''.join(chunks) | |||
71 |
|
320 | |||
72 |
|
321 | |||
73 | class ZstdCompressor(object): |
|
322 | class ZstdCompressor(object): | |
74 |
def __init__(self, level=3, dict_data=None, compression_params=None |
|
323 | def __init__(self, level=3, dict_data=None, compression_params=None, | |
75 | if dict_data: |
|
324 | write_checksum=False, write_content_size=False, | |
76 | raise Exception('dict_data not yet supported') |
|
325 | write_dict_id=True): | |
77 | if compression_params: |
|
326 | if level < 1: | |
78 | raise Exception('compression_params not yet supported') |
|
327 | raise ValueError('level must be greater than 0') | |
|
328 | elif level > lib.ZSTD_maxCLevel(): | |||
|
329 | raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel()) | |||
79 |
|
330 | |||
80 | self._compression_level = level |
|
331 | self._compression_level = level | |
|
332 | self._dict_data = dict_data | |||
|
333 | self._cparams = compression_params | |||
|
334 | self._fparams = ffi.new('ZSTD_frameParameters *')[0] | |||
|
335 | self._fparams.checksumFlag = write_checksum | |||
|
336 | self._fparams.contentSizeFlag = write_content_size | |||
|
337 | self._fparams.noDictIDFlag = not write_dict_id | |||
81 |
|
338 | |||
82 | def compress(self, data): |
|
339 | cctx = lib.ZSTD_createCCtx() | |
83 | # Just use the stream API for now. |
|
340 | if cctx == ffi.NULL: | |
84 | output = io.BytesIO() |
|
341 | raise MemoryError() | |
85 | with self.write_to(output) as compressor: |
|
342 | ||
86 | compressor.write(data) |
|
343 | self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx) | |
87 | return output.getvalue() |
|
344 | ||
|
345 | def compress(self, data, allow_empty=False): | |||
|
346 | if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty: | |||
|
347 | raise ValueError('cannot write empty inputs when writing content sizes') | |||
|
348 | ||||
|
349 | # TODO use a CDict for performance. | |||
|
350 | dict_data = ffi.NULL | |||
|
351 | dict_size = 0 | |||
|
352 | ||||
|
353 | if self._dict_data: | |||
|
354 | dict_data = self._dict_data.as_bytes() | |||
|
355 | dict_size = len(self._dict_data) | |||
|
356 | ||||
|
357 | params = ffi.new('ZSTD_parameters *')[0] | |||
|
358 | if self._cparams: | |||
|
359 | params.cParams = self._cparams.as_compression_parameters() | |||
|
360 | else: | |||
|
361 | params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data), | |||
|
362 | dict_size) | |||
|
363 | params.fParams = self._fparams | |||
|
364 | ||||
|
365 | dest_size = lib.ZSTD_compressBound(len(data)) | |||
|
366 | out = new_nonzero('char[]', dest_size) | |||
88 |
|
367 | |||
89 | def copy_stream(self, ifh, ofh): |
|
368 | zresult = lib.ZSTD_compress_advanced(self._cctx, | |
90 | cstream = self._get_cstream() |
|
369 | ffi.addressof(out), dest_size, | |
|
370 | data, len(data), | |||
|
371 | dict_data, dict_size, | |||
|
372 | params) | |||
|
373 | ||||
|
374 | if lib.ZSTD_isError(zresult): | |||
|
375 | raise ZstdError('cannot compress: %s' % | |||
|
376 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
377 | ||||
|
378 | return ffi.buffer(out, zresult)[:] | |||
|
379 | ||||
|
380 | def compressobj(self, size=0): | |||
|
381 | cstream = self._get_cstream(size) | |||
|
382 | cobj = ZstdCompressionObj() | |||
|
383 | cobj._cstream = cstream | |||
|
384 | cobj._out = ffi.new('ZSTD_outBuffer *') | |||
|
385 | cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |||
|
386 | cobj._out.dst = cobj._dst_buffer | |||
|
387 | cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE | |||
|
388 | cobj._out.pos = 0 | |||
|
389 | cobj._compressor = self | |||
|
390 | cobj._finished = False | |||
|
391 | ||||
|
392 | return cobj | |||
|
393 | ||||
|
394 | def copy_stream(self, ifh, ofh, size=0, | |||
|
395 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |||
|
396 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |||
|
397 | ||||
|
398 | if not hasattr(ifh, 'read'): | |||
|
399 | raise ValueError('first argument must have a read() method') | |||
|
400 | if not hasattr(ofh, 'write'): | |||
|
401 | raise ValueError('second argument must have a write() method') | |||
|
402 | ||||
|
403 | cstream = self._get_cstream(size) | |||
91 |
|
404 | |||
92 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
405 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
93 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
406 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
94 |
|
407 | |||
95 |
|
|
408 | dst_buffer = ffi.new('char[]', write_size) | |
96 |
out_buffer. |
|
409 | out_buffer.dst = dst_buffer | |
|
410 | out_buffer.size = write_size | |||
97 | out_buffer.pos = 0 |
|
411 | out_buffer.pos = 0 | |
98 |
|
412 | |||
99 | total_read, total_write = 0, 0 |
|
413 | total_read, total_write = 0, 0 | |
100 |
|
414 | |||
101 | while True: |
|
415 | while True: | |
102 |
data = ifh.read( |
|
416 | data = ifh.read(read_size) | |
103 | if not data: |
|
417 | if not data: | |
104 | break |
|
418 | break | |
105 |
|
419 | |||
106 |
|
|
420 | data_buffer = ffi.from_buffer(data) | |
107 |
|
421 | total_read += len(data_buffer) | ||
108 |
in_buffer.src = |
|
422 | in_buffer.src = data_buffer | |
109 | in_buffer.size = len(data) |
|
423 | in_buffer.size = len(data_buffer) | |
110 | in_buffer.pos = 0 |
|
424 | in_buffer.pos = 0 | |
111 |
|
425 | |||
112 | while in_buffer.pos < in_buffer.size: |
|
426 | while in_buffer.pos < in_buffer.size: | |
113 | res = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer) |
|
427 | zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer) | |
114 | if lib.ZSTD_isError(res): |
|
428 | if lib.ZSTD_isError(zresult): | |
115 |
raise |
|
429 | raise ZstdError('zstd compress error: %s' % | |
116 | lib.ZSTD_getErrorName(res)) |
|
430 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
117 |
|
431 | |||
118 | if out_buffer.pos: |
|
432 | if out_buffer.pos: | |
119 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
|
433 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) | |
120 | total_write = out_buffer.pos |
|
434 | total_write += out_buffer.pos | |
121 | out_buffer.pos = 0 |
|
435 | out_buffer.pos = 0 | |
122 |
|
436 | |||
123 | # We've finished reading. Flush the compressor. |
|
437 | # We've finished reading. Flush the compressor. | |
124 | while True: |
|
438 | while True: | |
125 | res = lib.ZSTD_endStream(cstream, out_buffer) |
|
439 | zresult = lib.ZSTD_endStream(cstream, out_buffer) | |
126 | if lib.ZSTD_isError(res): |
|
440 | if lib.ZSTD_isError(zresult): | |
127 |
raise |
|
441 | raise ZstdError('error ending compression stream: %s' % | |
128 | lib.ZSTD_getErrorName(res)) |
|
442 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
129 |
|
443 | |||
130 | if out_buffer.pos: |
|
444 | if out_buffer.pos: | |
131 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
|
445 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) | |
132 | total_write += out_buffer.pos |
|
446 | total_write += out_buffer.pos | |
133 | out_buffer.pos = 0 |
|
447 | out_buffer.pos = 0 | |
134 |
|
448 | |||
135 | if res == 0: |
|
449 | if zresult == 0: | |
136 | break |
|
450 | break | |
137 |
|
451 | |||
138 | return total_read, total_write |
|
452 | return total_read, total_write | |
139 |
|
453 | |||
140 |
def write_to(self, writer |
|
454 | def write_to(self, writer, size=0, | |
141 | return _ZstdCompressionWriter(self._get_cstream(), writer) |
|
455 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
|
456 | ||||
|
457 | if not hasattr(writer, 'write'): | |||
|
458 | raise ValueError('must pass an object with a write() method') | |||
|
459 | ||||
|
460 | return ZstdCompressionWriter(self, writer, size, write_size) | |||
|
461 | ||||
|
462 | def read_from(self, reader, size=0, | |||
|
463 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |||
|
464 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |||
|
465 | if hasattr(reader, 'read'): | |||
|
466 | have_read = True | |||
|
467 | elif hasattr(reader, '__getitem__'): | |||
|
468 | have_read = False | |||
|
469 | buffer_offset = 0 | |||
|
470 | size = len(reader) | |||
|
471 | else: | |||
|
472 | raise ValueError('must pass an object with a read() method or ' | |||
|
473 | 'conforms to buffer protocol') | |||
|
474 | ||||
|
475 | cstream = self._get_cstream(size) | |||
|
476 | ||||
|
477 | in_buffer = ffi.new('ZSTD_inBuffer *') | |||
|
478 | out_buffer = ffi.new('ZSTD_outBuffer *') | |||
|
479 | ||||
|
480 | in_buffer.src = ffi.NULL | |||
|
481 | in_buffer.size = 0 | |||
|
482 | in_buffer.pos = 0 | |||
|
483 | ||||
|
484 | dst_buffer = ffi.new('char[]', write_size) | |||
|
485 | out_buffer.dst = dst_buffer | |||
|
486 | out_buffer.size = write_size | |||
|
487 | out_buffer.pos = 0 | |||
|
488 | ||||
|
489 | while True: | |||
|
490 | # We should never have output data sitting around after a previous | |||
|
491 | # iteration. | |||
|
492 | assert out_buffer.pos == 0 | |||
|
493 | ||||
|
494 | # Collect input data. | |||
|
495 | if have_read: | |||
|
496 | read_result = reader.read(read_size) | |||
|
497 | else: | |||
|
498 | remaining = len(reader) - buffer_offset | |||
|
499 | slice_size = min(remaining, read_size) | |||
|
500 | read_result = reader[buffer_offset:buffer_offset + slice_size] | |||
|
501 | buffer_offset += slice_size | |||
142 |
|
502 | |||
143 | def _get_cstream(self): |
|
503 | # No new input data. Break out of the read loop. | |
|
504 | if not read_result: | |||
|
505 | break | |||
|
506 | ||||
|
507 | # Feed all read data into the compressor and emit output until | |||
|
508 | # exhausted. | |||
|
509 | read_buffer = ffi.from_buffer(read_result) | |||
|
510 | in_buffer.src = read_buffer | |||
|
511 | in_buffer.size = len(read_buffer) | |||
|
512 | in_buffer.pos = 0 | |||
|
513 | ||||
|
514 | while in_buffer.pos < in_buffer.size: | |||
|
515 | zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer) | |||
|
516 | if lib.ZSTD_isError(zresult): | |||
|
517 | raise ZstdError('zstd compress error: %s' % | |||
|
518 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
519 | ||||
|
520 | if out_buffer.pos: | |||
|
521 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] | |||
|
522 | out_buffer.pos = 0 | |||
|
523 | yield data | |||
|
524 | ||||
|
525 | assert out_buffer.pos == 0 | |||
|
526 | ||||
|
527 | # And repeat the loop to collect more data. | |||
|
528 | continue | |||
|
529 | ||||
|
530 | # If we get here, input is exhausted. End the stream and emit what | |||
|
531 | # remains. | |||
|
532 | while True: | |||
|
533 | assert out_buffer.pos == 0 | |||
|
534 | zresult = lib.ZSTD_endStream(cstream, out_buffer) | |||
|
535 | if lib.ZSTD_isError(zresult): | |||
|
536 | raise ZstdError('error ending compression stream: %s' % | |||
|
537 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
538 | ||||
|
539 | if out_buffer.pos: | |||
|
540 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] | |||
|
541 | out_buffer.pos = 0 | |||
|
542 | yield data | |||
|
543 | ||||
|
544 | if zresult == 0: | |||
|
545 | break | |||
|
546 | ||||
|
547 | def _get_cstream(self, size): | |||
144 | cstream = lib.ZSTD_createCStream() |
|
548 | cstream = lib.ZSTD_createCStream() | |
|
549 | if cstream == ffi.NULL: | |||
|
550 | raise MemoryError() | |||
|
551 | ||||
145 | cstream = ffi.gc(cstream, lib.ZSTD_freeCStream) |
|
552 | cstream = ffi.gc(cstream, lib.ZSTD_freeCStream) | |
146 |
|
553 | |||
147 | res = lib.ZSTD_initCStream(cstream, self._compression_level) |
|
554 | dict_data = ffi.NULL | |
148 | if lib.ZSTD_isError(res): |
|
555 | dict_size = 0 | |
|
556 | if self._dict_data: | |||
|
557 | dict_data = self._dict_data.as_bytes() | |||
|
558 | dict_size = len(self._dict_data) | |||
|
559 | ||||
|
560 | zparams = ffi.new('ZSTD_parameters *')[0] | |||
|
561 | if self._cparams: | |||
|
562 | zparams.cParams = self._cparams.as_compression_parameters() | |||
|
563 | else: | |||
|
564 | zparams.cParams = lib.ZSTD_getCParams(self._compression_level, | |||
|
565 | size, dict_size) | |||
|
566 | zparams.fParams = self._fparams | |||
|
567 | ||||
|
568 | zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size, | |||
|
569 | zparams, size) | |||
|
570 | if lib.ZSTD_isError(zresult): | |||
149 | raise Exception('cannot init CStream: %s' % |
|
571 | raise Exception('cannot init CStream: %s' % | |
150 | lib.ZSTD_getErrorName(res)) |
|
572 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
151 |
|
573 | |||
152 | return cstream |
|
574 | return cstream | |
|
575 | ||||
|
576 | ||||
|
577 | class FrameParameters(object): | |||
|
578 | def __init__(self, fparams): | |||
|
579 | self.content_size = fparams.frameContentSize | |||
|
580 | self.window_size = fparams.windowSize | |||
|
581 | self.dict_id = fparams.dictID | |||
|
582 | self.has_checksum = bool(fparams.checksumFlag) | |||
|
583 | ||||
|
584 | ||||
|
585 | def get_frame_parameters(data): | |||
|
586 | if not isinstance(data, bytes_type): | |||
|
587 | raise TypeError('argument must be bytes') | |||
|
588 | ||||
|
589 | params = ffi.new('ZSTD_frameParams *') | |||
|
590 | ||||
|
591 | zresult = lib.ZSTD_getFrameParams(params, data, len(data)) | |||
|
592 | if lib.ZSTD_isError(zresult): | |||
|
593 | raise ZstdError('cannot get frame parameters: %s' % | |||
|
594 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
595 | ||||
|
596 | if zresult: | |||
|
597 | raise ZstdError('not enough data for frame parameters; need %d bytes' % | |||
|
598 | zresult) | |||
|
599 | ||||
|
600 | return FrameParameters(params[0]) | |||
|
601 | ||||
|
602 | ||||
|
603 | class ZstdCompressionDict(object): | |||
|
604 | def __init__(self, data): | |||
|
605 | assert isinstance(data, bytes_type) | |||
|
606 | self._data = data | |||
|
607 | ||||
|
608 | def __len__(self): | |||
|
609 | return len(self._data) | |||
|
610 | ||||
|
611 | def dict_id(self): | |||
|
612 | return int_type(lib.ZDICT_getDictID(self._data, len(self._data))) | |||
|
613 | ||||
|
614 | def as_bytes(self): | |||
|
615 | return self._data | |||
|
616 | ||||
|
617 | ||||
|
618 | def train_dictionary(dict_size, samples, parameters=None): | |||
|
619 | if not isinstance(samples, list): | |||
|
620 | raise TypeError('samples must be a list') | |||
|
621 | ||||
|
622 | total_size = sum(map(len, samples)) | |||
|
623 | ||||
|
624 | samples_buffer = new_nonzero('char[]', total_size) | |||
|
625 | sample_sizes = new_nonzero('size_t[]', len(samples)) | |||
|
626 | ||||
|
627 | offset = 0 | |||
|
628 | for i, sample in enumerate(samples): | |||
|
629 | if not isinstance(sample, bytes_type): | |||
|
630 | raise ValueError('samples must be bytes') | |||
|
631 | ||||
|
632 | l = len(sample) | |||
|
633 | ffi.memmove(samples_buffer + offset, sample, l) | |||
|
634 | offset += l | |||
|
635 | sample_sizes[i] = l | |||
|
636 | ||||
|
637 | dict_data = new_nonzero('char[]', dict_size) | |||
|
638 | ||||
|
639 | zresult = lib.ZDICT_trainFromBuffer(ffi.addressof(dict_data), dict_size, | |||
|
640 | ffi.addressof(samples_buffer), | |||
|
641 | ffi.addressof(sample_sizes, 0), | |||
|
642 | len(samples)) | |||
|
643 | if lib.ZDICT_isError(zresult): | |||
|
644 | raise ZstdError('Cannot train dict: %s' % | |||
|
645 | ffi.string(lib.ZDICT_getErrorName(zresult))) | |||
|
646 | ||||
|
647 | return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:]) | |||
|
648 | ||||
|
649 | ||||
|
650 | class ZstdDecompressionObj(object): | |||
|
651 | def __init__(self, decompressor): | |||
|
652 | self._decompressor = decompressor | |||
|
653 | self._dstream = self._decompressor._get_dstream() | |||
|
654 | self._finished = False | |||
|
655 | ||||
|
656 | def decompress(self, data): | |||
|
657 | if self._finished: | |||
|
658 | raise ZstdError('cannot use a decompressobj multiple times') | |||
|
659 | ||||
|
660 | in_buffer = ffi.new('ZSTD_inBuffer *') | |||
|
661 | out_buffer = ffi.new('ZSTD_outBuffer *') | |||
|
662 | ||||
|
663 | data_buffer = ffi.from_buffer(data) | |||
|
664 | in_buffer.src = data_buffer | |||
|
665 | in_buffer.size = len(data_buffer) | |||
|
666 | in_buffer.pos = 0 | |||
|
667 | ||||
|
668 | dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |||
|
669 | out_buffer.dst = dst_buffer | |||
|
670 | out_buffer.size = len(dst_buffer) | |||
|
671 | out_buffer.pos = 0 | |||
|
672 | ||||
|
673 | chunks = [] | |||
|
674 | ||||
|
675 | while in_buffer.pos < in_buffer.size: | |||
|
676 | zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer) | |||
|
677 | if lib.ZSTD_isError(zresult): | |||
|
678 | raise ZstdError('zstd decompressor error: %s' % | |||
|
679 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
680 | ||||
|
681 | if zresult == 0: | |||
|
682 | self._finished = True | |||
|
683 | self._dstream = None | |||
|
684 | self._decompressor = None | |||
|
685 | ||||
|
686 | if out_buffer.pos: | |||
|
687 | chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |||
|
688 | out_buffer.pos = 0 | |||
|
689 | ||||
|
690 | return b''.join(chunks) | |||
|
691 | ||||
|
692 | ||||
|
693 | class ZstdDecompressionWriter(object): | |||
|
694 | def __init__(self, decompressor, writer, write_size): | |||
|
695 | self._decompressor = decompressor | |||
|
696 | self._writer = writer | |||
|
697 | self._write_size = write_size | |||
|
698 | self._dstream = None | |||
|
699 | self._entered = False | |||
|
700 | ||||
|
701 | def __enter__(self): | |||
|
702 | if self._entered: | |||
|
703 | raise ZstdError('cannot __enter__ multiple times') | |||
|
704 | ||||
|
705 | self._dstream = self._decompressor._get_dstream() | |||
|
706 | self._entered = True | |||
|
707 | ||||
|
708 | return self | |||
|
709 | ||||
|
710 | def __exit__(self, exc_type, exc_value, exc_tb): | |||
|
711 | self._entered = False | |||
|
712 | self._dstream = None | |||
|
713 | ||||
|
714 | def memory_size(self): | |||
|
715 | if not self._dstream: | |||
|
716 | raise ZstdError('cannot determine size of inactive decompressor ' | |||
|
717 | 'call when context manager is active') | |||
|
718 | ||||
|
719 | return lib.ZSTD_sizeof_DStream(self._dstream) | |||
|
720 | ||||
|
721 | def write(self, data): | |||
|
722 | if not self._entered: | |||
|
723 | raise ZstdError('write must be called from an active context manager') | |||
|
724 | ||||
|
725 | total_write = 0 | |||
|
726 | ||||
|
727 | in_buffer = ffi.new('ZSTD_inBuffer *') | |||
|
728 | out_buffer = ffi.new('ZSTD_outBuffer *') | |||
|
729 | ||||
|
730 | data_buffer = ffi.from_buffer(data) | |||
|
731 | in_buffer.src = data_buffer | |||
|
732 | in_buffer.size = len(data_buffer) | |||
|
733 | in_buffer.pos = 0 | |||
|
734 | ||||
|
735 | dst_buffer = ffi.new('char[]', self._write_size) | |||
|
736 | out_buffer.dst = dst_buffer | |||
|
737 | out_buffer.size = len(dst_buffer) | |||
|
738 | out_buffer.pos = 0 | |||
|
739 | ||||
|
740 | while in_buffer.pos < in_buffer.size: | |||
|
741 | zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer) | |||
|
742 | if lib.ZSTD_isError(zresult): | |||
|
743 | raise ZstdError('zstd decompress error: %s' % | |||
|
744 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
745 | ||||
|
746 | if out_buffer.pos: | |||
|
747 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |||
|
748 | total_write += out_buffer.pos | |||
|
749 | out_buffer.pos = 0 | |||
|
750 | ||||
|
751 | return total_write | |||
|
752 | ||||
|
753 | ||||
|
754 | class ZstdDecompressor(object): | |||
|
755 | def __init__(self, dict_data=None): | |||
|
756 | self._dict_data = dict_data | |||
|
757 | ||||
|
758 | dctx = lib.ZSTD_createDCtx() | |||
|
759 | if dctx == ffi.NULL: | |||
|
760 | raise MemoryError() | |||
|
761 | ||||
|
762 | self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx) | |||
|
763 | ||||
|
764 | @property | |||
|
765 | def _ddict(self): | |||
|
766 | if self._dict_data: | |||
|
767 | dict_data = self._dict_data.as_bytes() | |||
|
768 | dict_size = len(self._dict_data) | |||
|
769 | ||||
|
770 | ddict = lib.ZSTD_createDDict(dict_data, dict_size) | |||
|
771 | if ddict == ffi.NULL: | |||
|
772 | raise ZstdError('could not create decompression dict') | |||
|
773 | else: | |||
|
774 | ddict = None | |||
|
775 | ||||
|
776 | self.__dict__['_ddict'] = ddict | |||
|
777 | return ddict | |||
|
778 | ||||
|
779 | def decompress(self, data, max_output_size=0): | |||
|
780 | data_buffer = ffi.from_buffer(data) | |||
|
781 | ||||
|
782 | orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx)) | |||
|
783 | dctx = ffi.cast('ZSTD_DCtx *', orig_dctx) | |||
|
784 | lib.ZSTD_copyDCtx(dctx, self._refdctx) | |||
|
785 | ||||
|
786 | ddict = self._ddict | |||
|
787 | ||||
|
788 | output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer)) | |||
|
789 | if output_size: | |||
|
790 | result_buffer = ffi.new('char[]', output_size) | |||
|
791 | result_size = output_size | |||
|
792 | else: | |||
|
793 | if not max_output_size: | |||
|
794 | raise ZstdError('input data invalid or missing content size ' | |||
|
795 | 'in frame header') | |||
|
796 | ||||
|
797 | result_buffer = ffi.new('char[]', max_output_size) | |||
|
798 | result_size = max_output_size | |||
|
799 | ||||
|
800 | if ddict: | |||
|
801 | zresult = lib.ZSTD_decompress_usingDDict(dctx, | |||
|
802 | result_buffer, result_size, | |||
|
803 | data_buffer, len(data_buffer), | |||
|
804 | ddict) | |||
|
805 | else: | |||
|
806 | zresult = lib.ZSTD_decompressDCtx(dctx, | |||
|
807 | result_buffer, result_size, | |||
|
808 | data_buffer, len(data_buffer)) | |||
|
809 | if lib.ZSTD_isError(zresult): | |||
|
810 | raise ZstdError('decompression error: %s' % | |||
|
811 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
812 | elif output_size and zresult != output_size: | |||
|
813 | raise ZstdError('decompression error: decompressed %d bytes; expected %d' % | |||
|
814 | (zresult, output_size)) | |||
|
815 | ||||
|
816 | return ffi.buffer(result_buffer, zresult)[:] | |||
|
817 | ||||
|
818 | def decompressobj(self): | |||
|
819 | return ZstdDecompressionObj(self) | |||
|
820 | ||||
|
821 | def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |||
|
822 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |||
|
823 | skip_bytes=0): | |||
|
824 | if skip_bytes >= read_size: | |||
|
825 | raise ValueError('skip_bytes must be smaller than read_size') | |||
|
826 | ||||
|
827 | if hasattr(reader, 'read'): | |||
|
828 | have_read = True | |||
|
829 | elif hasattr(reader, '__getitem__'): | |||
|
830 | have_read = False | |||
|
831 | buffer_offset = 0 | |||
|
832 | size = len(reader) | |||
|
833 | else: | |||
|
834 | raise ValueError('must pass an object with a read() method or ' | |||
|
835 | 'conforms to buffer protocol') | |||
|
836 | ||||
|
837 | if skip_bytes: | |||
|
838 | if have_read: | |||
|
839 | reader.read(skip_bytes) | |||
|
840 | else: | |||
|
841 | if skip_bytes > size: | |||
|
842 | raise ValueError('skip_bytes larger than first input chunk') | |||
|
843 | ||||
|
844 | buffer_offset = skip_bytes | |||
|
845 | ||||
|
846 | dstream = self._get_dstream() | |||
|
847 | ||||
|
848 | in_buffer = ffi.new('ZSTD_inBuffer *') | |||
|
849 | out_buffer = ffi.new('ZSTD_outBuffer *') | |||
|
850 | ||||
|
851 | dst_buffer = ffi.new('char[]', write_size) | |||
|
852 | out_buffer.dst = dst_buffer | |||
|
853 | out_buffer.size = len(dst_buffer) | |||
|
854 | out_buffer.pos = 0 | |||
|
855 | ||||
|
856 | while True: | |||
|
857 | assert out_buffer.pos == 0 | |||
|
858 | ||||
|
859 | if have_read: | |||
|
860 | read_result = reader.read(read_size) | |||
|
861 | else: | |||
|
862 | remaining = size - buffer_offset | |||
|
863 | slice_size = min(remaining, read_size) | |||
|
864 | read_result = reader[buffer_offset:buffer_offset + slice_size] | |||
|
865 | buffer_offset += slice_size | |||
|
866 | ||||
|
867 | # No new input. Break out of read loop. | |||
|
868 | if not read_result: | |||
|
869 | break | |||
|
870 | ||||
|
871 | # Feed all read data into decompressor and emit output until | |||
|
872 | # exhausted. | |||
|
873 | read_buffer = ffi.from_buffer(read_result) | |||
|
874 | in_buffer.src = read_buffer | |||
|
875 | in_buffer.size = len(read_buffer) | |||
|
876 | in_buffer.pos = 0 | |||
|
877 | ||||
|
878 | while in_buffer.pos < in_buffer.size: | |||
|
879 | assert out_buffer.pos == 0 | |||
|
880 | ||||
|
881 | zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer) | |||
|
882 | if lib.ZSTD_isError(zresult): | |||
|
883 | raise ZstdError('zstd decompress error: %s' % | |||
|
884 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
885 | ||||
|
886 | if out_buffer.pos: | |||
|
887 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] | |||
|
888 | out_buffer.pos = 0 | |||
|
889 | yield data | |||
|
890 | ||||
|
891 | if zresult == 0: | |||
|
892 | return | |||
|
893 | ||||
|
894 | # Repeat loop to collect more input data. | |||
|
895 | continue | |||
|
896 | ||||
|
897 | # If we get here, input is exhausted. | |||
|
898 | ||||
|
899 | def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |||
|
900 | if not hasattr(writer, 'write'): | |||
|
901 | raise ValueError('must pass an object with a write() method') | |||
|
902 | ||||
|
903 | return ZstdDecompressionWriter(self, writer, write_size) | |||
|
904 | ||||
|
905 | def copy_stream(self, ifh, ofh, | |||
|
906 | read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |||
|
907 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |||
|
908 | if not hasattr(ifh, 'read'): | |||
|
909 | raise ValueError('first argument must have a read() method') | |||
|
910 | if not hasattr(ofh, 'write'): | |||
|
911 | raise ValueError('second argument must have a write() method') | |||
|
912 | ||||
|
913 | dstream = self._get_dstream() | |||
|
914 | ||||
|
915 | in_buffer = ffi.new('ZSTD_inBuffer *') | |||
|
916 | out_buffer = ffi.new('ZSTD_outBuffer *') | |||
|
917 | ||||
|
918 | dst_buffer = ffi.new('char[]', write_size) | |||
|
919 | out_buffer.dst = dst_buffer | |||
|
920 | out_buffer.size = write_size | |||
|
921 | out_buffer.pos = 0 | |||
|
922 | ||||
|
923 | total_read, total_write = 0, 0 | |||
|
924 | ||||
|
925 | # Read all available input. | |||
|
926 | while True: | |||
|
927 | data = ifh.read(read_size) | |||
|
928 | if not data: | |||
|
929 | break | |||
|
930 | ||||
|
931 | data_buffer = ffi.from_buffer(data) | |||
|
932 | total_read += len(data_buffer) | |||
|
933 | in_buffer.src = data_buffer | |||
|
934 | in_buffer.size = len(data_buffer) | |||
|
935 | in_buffer.pos = 0 | |||
|
936 | ||||
|
937 | # Flush all read data to output. | |||
|
938 | while in_buffer.pos < in_buffer.size: | |||
|
939 | zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer) | |||
|
940 | if lib.ZSTD_isError(zresult): | |||
|
941 | raise ZstdError('zstd decompressor error: %s' % | |||
|
942 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
943 | ||||
|
944 | if out_buffer.pos: | |||
|
945 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) | |||
|
946 | total_write += out_buffer.pos | |||
|
947 | out_buffer.pos = 0 | |||
|
948 | ||||
|
949 | # Continue loop to keep reading. | |||
|
950 | ||||
|
951 | return total_read, total_write | |||
|
952 | ||||
|
953 | def decompress_content_dict_chain(self, frames): | |||
|
954 | if not isinstance(frames, list): | |||
|
955 | raise TypeError('argument must be a list') | |||
|
956 | ||||
|
957 | if not frames: | |||
|
958 | raise ValueError('empty input chain') | |||
|
959 | ||||
|
960 | # First chunk should not be using a dictionary. We handle it specially. | |||
|
961 | chunk = frames[0] | |||
|
962 | if not isinstance(chunk, bytes_type): | |||
|
963 | raise ValueError('chunk 0 must be bytes') | |||
|
964 | ||||
|
965 | # All chunks should be zstd frames and should have content size set. | |||
|
966 | chunk_buffer = ffi.from_buffer(chunk) | |||
|
967 | params = ffi.new('ZSTD_frameParams *') | |||
|
968 | zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer)) | |||
|
969 | if lib.ZSTD_isError(zresult): | |||
|
970 | raise ValueError('chunk 0 is not a valid zstd frame') | |||
|
971 | elif zresult: | |||
|
972 | raise ValueError('chunk 0 is too small to contain a zstd frame') | |||
|
973 | ||||
|
974 | if not params.frameContentSize: | |||
|
975 | raise ValueError('chunk 0 missing content size in frame') | |||
|
976 | ||||
|
977 | dctx = lib.ZSTD_createDCtx() | |||
|
978 | if dctx == ffi.NULL: | |||
|
979 | raise MemoryError() | |||
|
980 | ||||
|
981 | dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx) | |||
|
982 | ||||
|
983 | last_buffer = ffi.new('char[]', params.frameContentSize) | |||
|
984 | ||||
|
985 | zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer), | |||
|
986 | chunk_buffer, len(chunk_buffer)) | |||
|
987 | if lib.ZSTD_isError(zresult): | |||
|
988 | raise ZstdError('could not decompress chunk 0: %s' % | |||
|
989 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
990 | ||||
|
991 | # Special case of chain length of 1 | |||
|
992 | if len(frames) == 1: | |||
|
993 | return ffi.buffer(last_buffer, len(last_buffer))[:] | |||
|
994 | ||||
|
995 | i = 1 | |||
|
996 | while i < len(frames): | |||
|
997 | chunk = frames[i] | |||
|
998 | if not isinstance(chunk, bytes_type): | |||
|
999 | raise ValueError('chunk %d must be bytes' % i) | |||
|
1000 | ||||
|
1001 | chunk_buffer = ffi.from_buffer(chunk) | |||
|
1002 | zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer)) | |||
|
1003 | if lib.ZSTD_isError(zresult): | |||
|
1004 | raise ValueError('chunk %d is not a valid zstd frame' % i) | |||
|
1005 | elif zresult: | |||
|
1006 | raise ValueError('chunk %d is too small to contain a zstd frame' % i) | |||
|
1007 | ||||
|
1008 | if not params.frameContentSize: | |||
|
1009 | raise ValueError('chunk %d missing content size in frame' % i) | |||
|
1010 | ||||
|
1011 | dest_buffer = ffi.new('char[]', params.frameContentSize) | |||
|
1012 | ||||
|
1013 | zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer), | |||
|
1014 | chunk_buffer, len(chunk_buffer), | |||
|
1015 | last_buffer, len(last_buffer)) | |||
|
1016 | if lib.ZSTD_isError(zresult): | |||
|
1017 | raise ZstdError('could not decompress chunk %d' % i) | |||
|
1018 | ||||
|
1019 | last_buffer = dest_buffer | |||
|
1020 | i += 1 | |||
|
1021 | ||||
|
1022 | return ffi.buffer(last_buffer, len(last_buffer))[:] | |||
|
1023 | ||||
|
1024 | def _get_dstream(self): | |||
|
1025 | dstream = lib.ZSTD_createDStream() | |||
|
1026 | if dstream == ffi.NULL: | |||
|
1027 | raise MemoryError() | |||
|
1028 | ||||
|
1029 | dstream = ffi.gc(dstream, lib.ZSTD_freeDStream) | |||
|
1030 | ||||
|
1031 | if self._dict_data: | |||
|
1032 | zresult = lib.ZSTD_initDStream_usingDict(dstream, | |||
|
1033 | self._dict_data.as_bytes(), | |||
|
1034 | len(self._dict_data)) | |||
|
1035 | else: | |||
|
1036 | zresult = lib.ZSTD_initDStream(dstream) | |||
|
1037 | ||||
|
1038 | if lib.ZSTD_isError(zresult): | |||
|
1039 | raise ZstdError('could not initialize DStream: %s' % | |||
|
1040 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
1041 | ||||
|
1042 | return dstream |
@@ -7,7 +7,6 b'' | |||||
7 | contrib/python-zstandard/setup.py not using absolute_import |
|
7 | contrib/python-zstandard/setup.py not using absolute_import | |
8 | contrib/python-zstandard/setup_zstd.py not using absolute_import |
|
8 | contrib/python-zstandard/setup_zstd.py not using absolute_import | |
9 | contrib/python-zstandard/tests/common.py not using absolute_import |
|
9 | contrib/python-zstandard/tests/common.py not using absolute_import | |
10 | contrib/python-zstandard/tests/test_cffi.py not using absolute_import |
|
|||
11 | contrib/python-zstandard/tests/test_compressor.py not using absolute_import |
|
10 | contrib/python-zstandard/tests/test_compressor.py not using absolute_import | |
12 | contrib/python-zstandard/tests/test_data_structures.py not using absolute_import |
|
11 | contrib/python-zstandard/tests/test_data_structures.py not using absolute_import | |
13 | contrib/python-zstandard/tests/test_decompressor.py not using absolute_import |
|
12 | contrib/python-zstandard/tests/test_decompressor.py not using absolute_import |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now