##// END OF EJS Templates
contrib: add a partial-merge tool for sorted lists (such as Python imports)...
contrib: add a partial-merge tool for sorted lists (such as Python imports) This is a pretty naive tool that uses a regular expression for matching lines. It is based on a Google-internal tool that worked in a similar way. For now, the regular expression is hard-coded to attempt to match single-line Python imports. The only commit I've found in the hg core repo where the tool helped was commit 9cd6292abfdf. I think that's because we often use multiple imports per import statement. I think this tool is still a decent first step (especially once the regex is made configurable in the next patch). The merging should ideally use a proper Python parser and do the merge at the AST (or CST?) level, but that's significantly harder, especially if you want to preserve comments and whitespace. It's also less generic. Differential Revision: https://phab.mercurial-scm.org/D12380

File last commit:

r47090:e92ca942 default
r49874:681b25ea default
Show More
compressionparams.c
572 lines | 17.1 KiB | text/x-c | CLexer
/**
* Copyright (c) 2016-present, Gregory Szorc
* All rights reserved.
*
* This software may be modified and distributed under the terms
* of the BSD license. See the LICENSE file for details.
*/
#include "python-zstandard.h"
extern PyObject* ZstdError;
int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
ZSTD_getErrorName(zresult));
return 1;
}
return 0;
}
#define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
#define TRY_COPY_PARAMETER(source, dest, param) { \
int result; \
size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
if (ZSTD_isError(zresult)) { \
return 1; \
} \
zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
if (ZSTD_isError(zresult)) { \
return 1; \
} \
}
int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
return 0;
}
int reset_params(ZstdCompressionParametersObject* params) {
if (params->params) {
ZSTD_CCtxParams_reset(params->params);
}
else {
params->params = ZSTD_createCCtxParams();
if (!params->params) {
PyErr_NoMemory();
return 1;
}
}
return set_parameters(params->params, params);
}
#define TRY_GET_PARAMETER(params, param, value) { \
size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
if (ZSTD_isError(zresult)) { \
PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
return 1; \
} \
}
int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
int value;
TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
cparams->windowLog = value;
TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
cparams->chainLog = value;
TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
cparams->hashLog = value;
TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
cparams->searchLog = value;
TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
cparams->minMatch = value;
TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
cparams->targetLength = value;
TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
cparams->strategy = value;
return 0;
}
static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"format",
"compression_level",
"window_log",
"hash_log",
"chain_log",
"search_log",
"min_match",
"target_length",
"compression_strategy",
"strategy",
"write_content_size",
"write_checksum",
"write_dict_id",
"job_size",
"overlap_log",
"overlap_size_log",
"force_max_window",
"enable_ldm",
"ldm_hash_log",
"ldm_min_match",
"ldm_bucket_size_log",
"ldm_hash_rate_log",
"ldm_hash_every_log",
"threads",
NULL
};
int format = 0;
int compressionLevel = 0;
int windowLog = 0;
int hashLog = 0;
int chainLog = 0;
int searchLog = 0;
int minMatch = 0;
int targetLength = 0;
int compressionStrategy = -1;
int strategy = -1;
int contentSizeFlag = 1;
int checksumFlag = 0;
int dictIDFlag = 0;
int jobSize = 0;
int overlapLog = -1;
int overlapSizeLog = -1;
int forceMaxWindow = 0;
int enableLDM = 0;
int ldmHashLog = 0;
int ldmMinMatch = 0;
int ldmBucketSizeLog = 0;
int ldmHashRateLog = -1;
int ldmHashEveryLog = -1;
int threads = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
&searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
&contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
&overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
&ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
return -1;
}
if (reset_params(self)) {
return -1;
}
if (threads < 0) {
threads = cpu_count();
}
/* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
* because setting ZSTD_c_nbWorkers resets the other parameters. */
TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
if (compressionStrategy != -1 && strategy != -1) {
PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
return -1;
}
if (compressionStrategy != -1) {
strategy = compressionStrategy;
}
else if (strategy == -1) {
strategy = 0;
}
TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
if (overlapLog != -1 && overlapSizeLog != -1) {
PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
return -1;
}
if (overlapSizeLog != -1) {
overlapLog = overlapSizeLog;
}
else if (overlapLog == -1) {
overlapLog = 0;
}
TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
return -1;
}
if (ldmHashEveryLog != -1) {
ldmHashRateLog = ldmHashEveryLog;
}
else if (ldmHashRateLog == -1) {
ldmHashRateLog = 0;
}
TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
return 0;
}
PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
"Create a CompressionParameters from a compression level and target sizes\n"
);
ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
int managedKwargs = 0;
int level;
PyObject* sourceSize = NULL;
PyObject* dictSize = NULL;
unsigned PY_LONG_LONG iSourceSize = 0;
Py_ssize_t iDictSize = 0;
PyObject* val;
ZSTD_compressionParameters params;
ZstdCompressionParametersObject* result = NULL;
int res;
if (!PyArg_ParseTuple(args, "i:from_level",
&level)) {
return NULL;
}
if (!kwargs) {
kwargs = PyDict_New();
if (!kwargs) {
return NULL;
}
managedKwargs = 1;
}
sourceSize = PyDict_GetItemString(kwargs, "source_size");
if (sourceSize) {
#if PY_MAJOR_VERSION >= 3
iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
goto cleanup;
}
#else
iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
#endif
PyDict_DelItemString(kwargs, "source_size");
}
dictSize = PyDict_GetItemString(kwargs, "dict_size");
if (dictSize) {
#if PY_MAJOR_VERSION >= 3
iDictSize = PyLong_AsSsize_t(dictSize);
#else
iDictSize = PyInt_AsSsize_t(dictSize);
#endif
if (iDictSize == -1) {
goto cleanup;
}
PyDict_DelItemString(kwargs, "dict_size");
}
params = ZSTD_getCParams(level, iSourceSize, iDictSize);
/* Values derived from the input level and sizes are passed along to the
constructor. But only if a value doesn't already exist. */
val = PyDict_GetItemString(kwargs, "window_log");
if (!val) {
val = PyLong_FromUnsignedLong(params.windowLog);
if (!val) {
goto cleanup;
}
PyDict_SetItemString(kwargs, "window_log", val);
Py_DECREF(val);
}
val = PyDict_GetItemString(kwargs, "chain_log");
if (!val) {
val = PyLong_FromUnsignedLong(params.chainLog);
if (!val) {
goto cleanup;
}
PyDict_SetItemString(kwargs, "chain_log", val);
Py_DECREF(val);
}
val = PyDict_GetItemString(kwargs, "hash_log");
if (!val) {
val = PyLong_FromUnsignedLong(params.hashLog);
if (!val) {
goto cleanup;
}
PyDict_SetItemString(kwargs, "hash_log", val);
Py_DECREF(val);
}
val = PyDict_GetItemString(kwargs, "search_log");
if (!val) {
val = PyLong_FromUnsignedLong(params.searchLog);
if (!val) {
goto cleanup;
}
PyDict_SetItemString(kwargs, "search_log", val);
Py_DECREF(val);
}
val = PyDict_GetItemString(kwargs, "min_match");
if (!val) {
val = PyLong_FromUnsignedLong(params.minMatch);
if (!val) {
goto cleanup;
}
PyDict_SetItemString(kwargs, "min_match", val);
Py_DECREF(val);
}
val = PyDict_GetItemString(kwargs, "target_length");
if (!val) {
val = PyLong_FromUnsignedLong(params.targetLength);
if (!val) {
goto cleanup;
}
PyDict_SetItemString(kwargs, "target_length", val);
Py_DECREF(val);
}
val = PyDict_GetItemString(kwargs, "compression_strategy");
if (!val) {
val = PyLong_FromUnsignedLong(params.strategy);
if (!val) {
goto cleanup;
}
PyDict_SetItemString(kwargs, "compression_strategy", val);
Py_DECREF(val);
}
result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
if (!result) {
goto cleanup;
}
result->params = NULL;
val = PyTuple_New(0);
if (!val) {
Py_CLEAR(result);
goto cleanup;
}
res = ZstdCompressionParameters_init(result, val, kwargs);
Py_DECREF(val);
if (res) {
Py_CLEAR(result);
goto cleanup;
}
cleanup:
if (managedKwargs) {
Py_DECREF(kwargs);
}
return result;
}
PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
"Estimate the size in bytes of a compression context for compression parameters\n"
);
PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
}
PyDoc_STRVAR(ZstdCompressionParameters__doc__,
"ZstdCompressionParameters: low-level control over zstd compression");
static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
if (self->params) {
ZSTD_freeCCtxParams(self->params);
self->params = NULL;
}
PyObject_Del(self);
}
#define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
int result; \
size_t zresult; \
ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
if (ZSTD_isError(zresult)) { \
PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
ZSTD_getErrorName(zresult)); \
return NULL; \
} \
return PyLong_FromLong(result); \
}
PARAM_GETTER(format, ZSTD_c_format)
PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
PARAM_GETTER(window_log, ZSTD_c_windowLog)
PARAM_GETTER(hash_log, ZSTD_c_hashLog)
PARAM_GETTER(chain_log, ZSTD_c_chainLog)
PARAM_GETTER(search_log, ZSTD_c_searchLog)
PARAM_GETTER(min_match, ZSTD_c_minMatch)
PARAM_GETTER(target_length, ZSTD_c_targetLength)
PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
PARAM_GETTER(job_size, ZSTD_c_jobSize)
PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
PARAM_GETTER(threads, ZSTD_c_nbWorkers)
static PyMethodDef ZstdCompressionParameters_methods[] = {
{
"from_level",
(PyCFunction)CompressionParameters_from_level,
METH_VARARGS | METH_KEYWORDS | METH_STATIC,
ZstdCompressionParameters_from_level__doc__
},
{
"estimated_compression_context_size",
(PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
METH_NOARGS,
ZstdCompressionParameters_estimated_compression_context_size__doc__
},
{ NULL, NULL }
};
#define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
static PyGetSetDef ZstdCompressionParameters_getset[] = {
GET_SET_ENTRY(format),
GET_SET_ENTRY(compression_level),
GET_SET_ENTRY(window_log),
GET_SET_ENTRY(hash_log),
GET_SET_ENTRY(chain_log),
GET_SET_ENTRY(search_log),
GET_SET_ENTRY(min_match),
GET_SET_ENTRY(target_length),
GET_SET_ENTRY(compression_strategy),
GET_SET_ENTRY(write_content_size),
GET_SET_ENTRY(write_checksum),
GET_SET_ENTRY(write_dict_id),
GET_SET_ENTRY(threads),
GET_SET_ENTRY(job_size),
GET_SET_ENTRY(overlap_log),
/* TODO remove this deprecated attribute */
{ "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
GET_SET_ENTRY(force_max_window),
GET_SET_ENTRY(enable_ldm),
GET_SET_ENTRY(ldm_hash_log),
GET_SET_ENTRY(ldm_min_match),
GET_SET_ENTRY(ldm_bucket_size_log),
GET_SET_ENTRY(ldm_hash_rate_log),
/* TODO remove this deprecated attribute */
{ "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
{ NULL }
};
PyTypeObject ZstdCompressionParametersType = {
PyVarObject_HEAD_INIT(NULL, 0)
"ZstdCompressionParameters", /* tp_name */
sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
ZstdCompressionParameters__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
ZstdCompressionParameters_methods, /* tp_methods */
0, /* tp_members */
ZstdCompressionParameters_getset, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)ZstdCompressionParameters_init, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
void compressionparams_module_init(PyObject* mod) {
Py_SET_TYPE(&ZstdCompressionParametersType, &PyType_Type);
if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
return;
}
Py_INCREF(&ZstdCompressionParametersType);
PyModule_AddObject(mod, "ZstdCompressionParameters",
(PyObject*)&ZstdCompressionParametersType);
/* TODO remove deprecated alias. */
Py_INCREF(&ZstdCompressionParametersType);
PyModule_AddObject(mod, "CompressionParameters",
(PyObject*)&ZstdCompressionParametersType);
}