##// END OF EJS Templates
rebase: move actual rebase into a single transaction...
rebase: move actual rebase into a single transaction Previously, rebasing would open several transaction over the course of rebasing several commits. Opening a transaction can have notable overhead (like copying the dirstate) which can add up when rebasing many commits. This patch adds a single large transaction around the actual commit rebase operation, with a catch for intervention which serializes the current state if we need to drop back to the terminal for user intervention. Amazingly, almost all the tests seem to pass. On large repos with large working copies, this can speed up rebasing 7 commits by 25%. I'd expect the percentage to be a bit larger for rebasing even more commits. There are minor test changes because we're rolling back the entire transaction during unexpected exceptions instead of just stopping mid-rebase, so there's no more backup bundle. It also leave an unknown file in the working copy, since our clean up 'hg update' doesn't delete unknown files.

File last commit:

r30895:c32454d6 default
r31226:cf8ad0e6 default
Show More
compressiondict.c
248 lines | 7.8 KiB | text/x-c | CLexer
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 /**
* Copyright (c) 2016-present, Gregory Szorc
* All rights reserved.
*
* This software may be modified and distributed under the terms
* of the BSD license. See the LICENSE file for details.
*/
#include "python-zstandard.h"
extern PyObject* ZstdError;
ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
size_t capacity;
PyObject* samples;
Py_ssize_t samplesLen;
PyObject* parameters = NULL;
ZDICT_params_t zparams;
Py_ssize_t sampleIndex;
Py_ssize_t sampleSize;
PyObject* sampleItem;
size_t zresult;
void* sampleBuffer;
void* sampleOffset;
size_t samplesSize = 0;
size_t* sampleSizes;
void* dict;
ZstdCompressionDict* result;
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!:train_dictionary",
kwlist,
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 &capacity,
&PyList_Type, &samples,
(PyObject*)&DictParametersType, &parameters)) {
return NULL;
}
/* Validate parameters first since it is easiest. */
zparams.selectivityLevel = 0;
zparams.compressionLevel = 0;
zparams.notificationLevel = 0;
zparams.dictID = 0;
zparams.reserved[0] = 0;
zparams.reserved[1] = 0;
if (parameters) {
/* TODO validate data ranges */
zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
}
/* Figure out the size of the raw samples */
samplesLen = PyList_Size(samples);
for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
sampleItem = PyList_GetItem(samples, sampleIndex);
if (!PyBytes_Check(sampleItem)) {
PyErr_SetString(PyExc_ValueError, "samples must be bytes");
return NULL;
}
samplesSize += PyBytes_GET_SIZE(sampleItem);
}
/* Now that we know the total size of the raw simples, we can allocate
a buffer for the raw data */
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 sampleBuffer = PyMem_Malloc(samplesSize);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 if (!sampleBuffer) {
PyErr_NoMemory();
return NULL;
}
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 if (!sampleSizes) {
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 PyMem_Free(sampleBuffer);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 PyErr_NoMemory();
return NULL;
}
sampleOffset = sampleBuffer;
/* Now iterate again and assemble the samples in the buffer */
for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
sampleItem = PyList_GetItem(samples, sampleIndex);
sampleSize = PyBytes_GET_SIZE(sampleItem);
sampleSizes[sampleIndex] = sampleSize;
memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
sampleOffset = (char*)sampleOffset + sampleSize;
}
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 dict = PyMem_Malloc(capacity);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 if (!dict) {
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 PyMem_Free(sampleSizes);
PyMem_Free(sampleBuffer);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 PyErr_NoMemory();
return NULL;
}
zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
sampleBuffer, sampleSizes, (unsigned int)samplesLen,
zparams);
if (ZDICT_isError(zresult)) {
PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 PyMem_Free(dict);
PyMem_Free(sampleSizes);
PyMem_Free(sampleBuffer);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 return NULL;
}
result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
if (!result) {
return NULL;
}
result->dictData = dict;
result->dictSize = zresult;
return result;
}
PyDoc_STRVAR(ZstdCompressionDict__doc__,
"ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
"\n"
"This type holds the results of a computed Zstandard compression dictionary.\n"
"Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
"obtained from another source into the constructor.\n"
);
static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
const char* source;
Py_ssize_t sourceSize;
self->dictData = NULL;
self->dictSize = 0;
#if PY_MAJOR_VERSION >= 3
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 #else
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 #endif
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 &source, &sourceSize)) {
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 return -1;
}
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 self->dictData = PyMem_Malloc(sourceSize);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 if (!self->dictData) {
PyErr_NoMemory();
return -1;
}
memcpy(self->dictData, source, sourceSize);
self->dictSize = sourceSize;
return 0;
}
static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
if (self->dictData) {
Gregory Szorc
zstd: vendor python-zstandard 0.6.0...
r30822 PyMem_Free(self->dictData);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 self->dictData = NULL;
}
PyObject_Del(self);
}
static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
return PyLong_FromLong(dictID);
}
static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
}
static PyMethodDef ZstdCompressionDict_methods[] = {
{ "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
{ "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
{ NULL, NULL }
};
static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
return self->dictSize;
}
static PySequenceMethods ZstdCompressionDict_sq = {
(lenfunc)ZstdCompressionDict_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
0, /* sq_item */
0, /* sq_ass_item */
0, /* sq_contains */
0, /* sq_inplace_concat */
0 /* sq_inplace_repeat */
};
PyTypeObject ZstdCompressionDictType = {
PyVarObject_HEAD_INIT(NULL, 0)
"zstd.ZstdCompressionDict", /* tp_name */
sizeof(ZstdCompressionDict), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&ZstdCompressionDict_sq, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
ZstdCompressionDict__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
ZstdCompressionDict_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)ZstdCompressionDict_init, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
void compressiondict_module_init(PyObject* mod) {
Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
if (PyType_Ready(&ZstdCompressionDictType) < 0) {
return;
}
Py_INCREF((PyObject*)&ZstdCompressionDictType);
PyModule_AddObject(mod, "ZstdCompressionDict",
(PyObject*)&ZstdCompressionDictType);
}