##// END OF EJS Templates
copies: move from a copy on branchpoint to a copy on write approach...
copies: move from a copy on branchpoint to a copy on write approach Before this changes, any branch points results in a copy of the dictionary containing the copy information. This can be very costly for branchy history with few rename information. Instead, we take a "copy on write" approach. Copying the input data only when we are about to update them. In practice we where already doing the copying in half of these case (because `_chain` makes a copy), so we don't add a significant cost here even in the linear case. However the speed up in branchy case is very significant. Here are some timing on the pypy repository. revision: large amount; added files: large amount; rename small amount; c3b14617fbd7 9ba6ab77fd29 before: ! wall 1.399863 comb 1.400000 user 1.370000 sys 0.030000 (median of 10) after: ! wall 0.766453 comb 0.770000 user 0.750000 sys 0.020000 (median of 11) revision: large amount; added files: small amount; rename small amount; c3b14617fbd7 f650a9b140d2 before: ! wall 1.876748 comb 1.890000 user 1.870000 sys 0.020000 (median of 10) after: ! wall 1.167223 comb 1.170000 user 1.150000 sys 0.020000 (median of 10) revision: large amount; added files: large amount; rename large amount; 08ea3258278e d9fa043f30c0 before: ! wall 0.242457 comb 0.240000 user 0.240000 sys 0.000000 (median of 39) after: ! wall 0.211476 comb 0.210000 user 0.210000 sys 0.000000 (median of 45) revision: small amount; added files: large amount; rename large amount; df6f7a526b60 a83dc6a2d56f before: ! wall 0.013193 comb 0.020000 user 0.020000 sys 0.000000 (median of 224) after: ! wall 0.013290 comb 0.010000 user 0.010000 sys 0.000000 (median of 222) revision: small amount; added files: large amount; rename small amount; 4aa4e1f8e19a 169138063d63 before: ! wall 0.001673 comb 0.000000 user 0.000000 sys 0.000000 (median of 1000) after: ! wall 0.001677 comb 0.000000 user 0.000000 sys 0.000000 (median of 1000) revision: small amount; added files: small amount; rename small amount; 4bc173b045a6 964879152e2e before: ! wall 0.000119 comb 0.000000 user 0.000000 sys 0.000000 (median of 8023) after: ! wall 0.000119 comb 0.000000 user 0.000000 sys 0.000000 (median of 7997) revision: medium amount; added files: large amount; rename medium amount; c95f1ced15f2 2c68e87c3efe before: ! wall 0.201898 comb 0.210000 user 0.200000 sys 0.010000 (median of 48) after: ! wall 0.167415 comb 0.170000 user 0.160000 sys 0.010000 (median of 58) revision: medium amount; added files: medium amount; rename small amount; d343da0c55a8 d7746d32bf9d before: ! wall 0.036820 comb 0.040000 user 0.040000 sys 0.000000 (median of 100) after: ! wall 0.035797 comb 0.040000 user 0.040000 sys 0.000000 (median of 100) The extra cost in the linear case can be reclaimed later with some extra logic. Differential Revision: https://phab.mercurial-scm.org/D7124

File last commit:

r42237:675775c3 default
r43594:ffd04bc9 default
Show More
compressionwriter.c
372 lines | 10.6 KiB | text/x-c | CLexer
/**
* Copyright (c) 2016-present, Gregory Szorc
* All rights reserved.
*
* This software may be modified and distributed under the terms
* of the BSD license. See the LICENSE file for details.
*/
#include "python-zstandard.h"
extern PyObject* ZstdError;
PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
"""A context manager used for writing compressed output to a writer.\n"
);
static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
Py_XDECREF(self->compressor);
Py_XDECREF(self->writer);
PyMem_Free(self->output.dst);
self->output.dst = NULL;
PyObject_Del(self);
}
static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
if (self->closed) {
PyErr_SetString(PyExc_ValueError, "stream is closed");
return NULL;
}
if (self->entered) {
PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
return NULL;
}
self->entered = 1;
Py_INCREF(self);
return (PyObject*)self;
}
static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
PyObject* exc_type;
PyObject* exc_value;
PyObject* exc_tb;
if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
return NULL;
}
self->entered = 0;
if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL);
if (NULL == result) {
return NULL;
}
}
Py_RETURN_FALSE;
}
static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
}
static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"data",
NULL
};
PyObject* result = NULL;
Py_buffer source;
size_t zresult;
ZSTD_inBuffer input;
PyObject* res;
Py_ssize_t totalWrite = 0;
#if PY_MAJOR_VERSION >= 3
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
#else
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
#endif
kwlist, &source)) {
return NULL;
}
if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
PyErr_SetString(PyExc_ValueError,
"data buffer should be contiguous and have at most one dimension");
goto finally;
}
if (self->closed) {
PyErr_SetString(PyExc_ValueError, "stream is closed");
return NULL;
}
self->output.pos = 0;
input.src = source.buf;
input.size = source.len;
input.pos = 0;
while (input.pos < (size_t)source.len) {
Py_BEGIN_ALLOW_THREADS
zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
Py_END_ALLOW_THREADS
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
goto finally;
}
/* Copy data from output buffer to writer. */
if (self->output.pos) {
#if PY_MAJOR_VERSION >= 3
res = PyObject_CallMethod(self->writer, "write", "y#",
#else
res = PyObject_CallMethod(self->writer, "write", "s#",
#endif
self->output.dst, self->output.pos);
Py_XDECREF(res);
totalWrite += self->output.pos;
self->bytesCompressed += self->output.pos;
}
self->output.pos = 0;
}
if (self->writeReturnRead) {
result = PyLong_FromSize_t(input.pos);
}
else {
result = PyLong_FromSsize_t(totalWrite);
}
finally:
PyBuffer_Release(&source);
return result;
}
static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"flush_mode",
NULL
};
size_t zresult;
ZSTD_inBuffer input;
PyObject* res;
Py_ssize_t totalWrite = 0;
unsigned flush_mode = 0;
ZSTD_EndDirective flush;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush",
kwlist, &flush_mode)) {
return NULL;
}
switch (flush_mode) {
case 0:
flush = ZSTD_e_flush;
break;
case 1:
flush = ZSTD_e_end;
break;
default:
PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
return NULL;
}
if (self->closed) {
PyErr_SetString(PyExc_ValueError, "stream is closed");
return NULL;
}
self->output.pos = 0;
input.src = NULL;
input.size = 0;
input.pos = 0;
while (1) {
Py_BEGIN_ALLOW_THREADS
zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush);
Py_END_ALLOW_THREADS
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
return NULL;
}
/* Copy data from output buffer to writer. */
if (self->output.pos) {
#if PY_MAJOR_VERSION >= 3
res = PyObject_CallMethod(self->writer, "write", "y#",
#else
res = PyObject_CallMethod(self->writer, "write", "s#",
#endif
self->output.dst, self->output.pos);
Py_XDECREF(res);
totalWrite += self->output.pos;
self->bytesCompressed += self->output.pos;
}
self->output.pos = 0;
if (!zresult) {
break;
}
}
return PyLong_FromSsize_t(totalWrite);
}
static PyObject* ZstdCompressionWriter_close(ZstdCompressionWriter* self) {
PyObject* result;
if (self->closed) {
Py_RETURN_NONE;
}
result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1);
self->closed = 1;
if (NULL == result) {
return NULL;
}
/* Call close on underlying stream as well. */
if (PyObject_HasAttrString(self->writer, "close")) {
return PyObject_CallMethod(self->writer, "close", NULL);
}
Py_RETURN_NONE;
}
static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) {
if (PyObject_HasAttrString(self->writer, "fileno")) {
return PyObject_CallMethod(self->writer, "fileno", NULL);
}
else {
PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
return NULL;
}
}
static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
return PyLong_FromUnsignedLongLong(self->bytesCompressed);
}
static PyObject* ZstdCompressionWriter_writelines(PyObject* self, PyObject* args) {
PyErr_SetNone(PyExc_NotImplementedError);
return NULL;
}
static PyObject* ZstdCompressionWriter_false(PyObject* self, PyObject* args) {
Py_RETURN_FALSE;
}
static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) {
Py_RETURN_TRUE;
}
static PyObject* ZstdCompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
PyObject* iomod;
PyObject* exc;
iomod = PyImport_ImportModule("io");
if (NULL == iomod) {
return NULL;
}
exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
if (NULL == exc) {
Py_DECREF(iomod);
return NULL;
}
PyErr_SetNone(exc);
Py_DECREF(exc);
Py_DECREF(iomod);
return NULL;
}
static PyMethodDef ZstdCompressionWriter_methods[] = {
{ "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
PyDoc_STR("Enter a compression context.") },
{ "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
PyDoc_STR("Exit a compression context.") },
{ "close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL },
{ "fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL },
{ "isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
{ "readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
{ "readline", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
{ "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
{ "seek", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
{ "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL },
{ "truncate", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
{ "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL },
{ "writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL },
{ "read", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
{ "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
{ "readinto", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
{ "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
PyDoc_STR("Obtain the memory size of the underlying compressor") },
{ "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
PyDoc_STR("Compress data") },
{ "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS,
PyDoc_STR("Flush data and finish a zstd frame") },
{ "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
PyDoc_STR("Returns current number of bytes compressed") },
{ NULL, NULL }
};
static PyMemberDef ZstdCompressionWriter_members[] = {
{ "closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL },
{ NULL }
};
PyTypeObject ZstdCompressionWriterType = {
PyVarObject_HEAD_INIT(NULL, 0)
"zstd.ZstdCompressionWriter", /* tp_name */
sizeof(ZstdCompressionWriter), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
ZstdCompresssionWriter__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
ZstdCompressionWriter_methods, /* tp_methods */
ZstdCompressionWriter_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
void compressionwriter_module_init(PyObject* mod) {
Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
return;
}
}