##// END OF EJS Templates
subrepo: handle 'C:' style paths on the command line (issue5770)...
subrepo: handle 'C:' style paths on the command line (issue5770) If you think 'C:' and 'C:\' are equivalent paths, see the inline comment before proceeding. The problem here was that several commands that take a URL argument (incoming, outgoing, pull, and push) will use that value to set 'repo._subtoppath' on the repository object after command specific manipulation of it, but before converting it to an absolute path. When an operation is performed on a relative subrepo, subrepo._abssource() will posixpath.join() this value with the relative subrepo path. That adds a '/' after the drive letter, changing how it is evaluated by abspath()/realpath() in vfsmod.vfs(..., realpath=True) as the subrepo is instantiated. I initially tried sanitizing the path in url.localpath(), because url.isabs() only checks that it starts with a drive letter. By the sample behavior, this is clearly not an absolute path. (Though the comment in isabs() is weasely- this style path can't be joined either.) But not everything funnels through there, and it required explicitly calling localpath() in hg.parseurl() and assigning to url.path to fix. But then tests failed with urls like 'a#0'. Next up was sanitizing the path in the url constructor. That caused doctest failures, because there are drive letter tests, so those got expanded in system specific ways. Yuya correctly pointed out that util.url is a parser, and shouldn't be substituting the path too. Rather than fixing every command call site, just convert it in the common subrepo location. I don't see any sanitizing on the path config options, so I fixed those too. Note that while the behavior is fixed here, there are still places where 'comparing with C:' gets printed out, and that's not great for debugging purposes. (Specifically I saw it in `hg incoming -B C:`, without subrepos.) While clone will write out an absolute default path, I wonder what would happen if a user edited that path to be 'C:'. (I don't think supporting relative paths in .hgrc is a sane thing to do, but while we're poking holes in things...) Since this is such an oddball case, it still leaks through in places, and there seems to be a lot of duplicate url parsing, maybe the url parsing should be moved to dispatch, and provide the command with a url object? Then we could convert this to an absolute path once, and not have to worry about it in the rest of the code. I also checked '--cwd C:' on the command line, and it was previously working because os.chdir() will DTRT. Finally, one other note from the url.localpath() experimenting. I don't see any cases where 'self._hostport' can hold a drive letter. So I'm wondering if that is wrong/old code.

File last commit:

r31796:e0dc4053 default
r35795:0c0689a7 default
Show More
compressiondict.c
392 lines | 10.8 KiB | text/x-c | CLexer
/**
* Copyright (c) 2016-present, Gregory Szorc
* All rights reserved.
*
* This software may be modified and distributed under the terms
* of the BSD license. See the LICENSE file for details.
*/
#include "python-zstandard.h"
extern PyObject* ZstdError;
ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"dict_size",
"samples",
"selectivity",
"level",
"notifications",
"dict_id",
NULL
};
size_t capacity;
PyObject* samples;
Py_ssize_t samplesLen;
unsigned selectivity = 0;
int level = 0;
unsigned notifications = 0;
unsigned dictID = 0;
ZDICT_params_t zparams;
Py_ssize_t sampleIndex;
Py_ssize_t sampleSize;
PyObject* sampleItem;
size_t zresult;
void* sampleBuffer = NULL;
void* sampleOffset;
size_t samplesSize = 0;
size_t* sampleSizes = NULL;
void* dict = NULL;
ZstdCompressionDict* result = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary",
kwlist,
&capacity,
&PyList_Type, &samples,
&selectivity, &level, &notifications, &dictID)) {
return NULL;
}
memset(&zparams, 0, sizeof(zparams));
zparams.selectivityLevel = selectivity;
zparams.compressionLevel = level;
zparams.notificationLevel = notifications;
zparams.dictID = dictID;
/* Figure out the size of the raw samples */
samplesLen = PyList_Size(samples);
for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
sampleItem = PyList_GetItem(samples, sampleIndex);
if (!PyBytes_Check(sampleItem)) {
PyErr_SetString(PyExc_ValueError, "samples must be bytes");
return NULL;
}
samplesSize += PyBytes_GET_SIZE(sampleItem);
}
/* Now that we know the total size of the raw simples, we can allocate
a buffer for the raw data */
sampleBuffer = PyMem_Malloc(samplesSize);
if (!sampleBuffer) {
PyErr_NoMemory();
goto finally;
}
sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
if (!sampleSizes) {
PyErr_NoMemory();
goto finally;
}
sampleOffset = sampleBuffer;
/* Now iterate again and assemble the samples in the buffer */
for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
sampleItem = PyList_GetItem(samples, sampleIndex);
sampleSize = PyBytes_GET_SIZE(sampleItem);
sampleSizes[sampleIndex] = sampleSize;
memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
sampleOffset = (char*)sampleOffset + sampleSize;
}
dict = PyMem_Malloc(capacity);
if (!dict) {
PyErr_NoMemory();
goto finally;
}
/* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */
Py_BEGIN_ALLOW_THREADS
zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
sampleBuffer, sampleSizes, (unsigned int)samplesLen,
zparams);
Py_END_ALLOW_THREADS
if (ZDICT_isError(zresult)) {
PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
PyMem_Free(dict);
goto finally;
}
result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
if (!result) {
goto finally;
}
result->dictData = dict;
result->dictSize = zresult;
result->d = 0;
result->k = 0;
finally:
PyMem_Free(sampleBuffer);
PyMem_Free(sampleSizes);
return result;
}
ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"dict_size",
"samples",
"k",
"d",
"notifications",
"dict_id",
"level",
"optimize",
"steps",
"threads",
NULL
};
size_t capacity;
PyObject* samples;
unsigned k = 0;
unsigned d = 0;
unsigned notifications = 0;
unsigned dictID = 0;
int level = 0;
PyObject* optimize = NULL;
unsigned steps = 0;
int threads = 0;
COVER_params_t params;
Py_ssize_t samplesLen;
Py_ssize_t i;
size_t samplesSize = 0;
void* sampleBuffer = NULL;
size_t* sampleSizes = NULL;
void* sampleOffset;
Py_ssize_t sampleSize;
void* dict = NULL;
size_t zresult;
ZstdCompressionDict* result = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiOIi:train_cover_dictionary",
kwlist, &capacity, &PyList_Type, &samples,
&k, &d, &notifications, &dictID, &level, &optimize, &steps, &threads)) {
return NULL;
}
if (threads < 0) {
threads = cpu_count();
}
memset(&params, 0, sizeof(params));
params.k = k;
params.d = d;
params.steps = steps;
params.nbThreads = threads;
params.notificationLevel = notifications;
params.dictID = dictID;
params.compressionLevel = level;
/* Figure out total size of input samples. */
samplesLen = PyList_Size(samples);
for (i = 0; i < samplesLen; i++) {
PyObject* sampleItem = PyList_GET_ITEM(samples, i);
if (!PyBytes_Check(sampleItem)) {
PyErr_SetString(PyExc_ValueError, "samples must be bytes");
return NULL;
}
samplesSize += PyBytes_GET_SIZE(sampleItem);
}
sampleBuffer = PyMem_Malloc(samplesSize);
if (!sampleBuffer) {
PyErr_NoMemory();
goto finally;
}
sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
if (!sampleSizes) {
PyErr_NoMemory();
goto finally;
}
sampleOffset = sampleBuffer;
for (i = 0; i < samplesLen; i++) {
PyObject* sampleItem = PyList_GET_ITEM(samples, i);
sampleSize = PyBytes_GET_SIZE(sampleItem);
sampleSizes[i] = sampleSize;
memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
sampleOffset = (char*)sampleOffset + sampleSize;
}
dict = PyMem_Malloc(capacity);
if (!dict) {
PyErr_NoMemory();
goto finally;
}
Py_BEGIN_ALLOW_THREADS
if (optimize && PyObject_IsTrue(optimize)) {
zresult = COVER_optimizeTrainFromBuffer(dict, capacity,
sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
}
else {
zresult = COVER_trainFromBuffer(dict, capacity,
sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
}
Py_END_ALLOW_THREADS
if (ZDICT_isError(zresult)) {
PyMem_Free(dict);
PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
goto finally;
}
result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
if (!result) {
PyMem_Free(dict);
goto finally;
}
result->dictData = dict;
result->dictSize = zresult;
result->d = params.d;
result->k = params.k;
finally:
PyMem_Free(sampleBuffer);
PyMem_Free(sampleSizes);
return result;
}
PyDoc_STRVAR(ZstdCompressionDict__doc__,
"ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
"\n"
"This type holds the results of a computed Zstandard compression dictionary.\n"
"Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
"obtained from another source into the constructor.\n"
);
static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
const char* source;
Py_ssize_t sourceSize;
self->dictData = NULL;
self->dictSize = 0;
#if PY_MAJOR_VERSION >= 3
if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
#else
if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
#endif
&source, &sourceSize)) {
return -1;
}
self->dictData = PyMem_Malloc(sourceSize);
if (!self->dictData) {
PyErr_NoMemory();
return -1;
}
memcpy(self->dictData, source, sourceSize);
self->dictSize = sourceSize;
return 0;
}
static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
if (self->dictData) {
PyMem_Free(self->dictData);
self->dictData = NULL;
}
PyObject_Del(self);
}
static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
return PyLong_FromLong(dictID);
}
static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
}
static PyMethodDef ZstdCompressionDict_methods[] = {
{ "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
{ "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
{ NULL, NULL }
};
static PyMemberDef ZstdCompressionDict_members[] = {
{ "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
"segment size" },
{ "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
"dmer size" },
{ NULL }
};
static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
return self->dictSize;
}
static PySequenceMethods ZstdCompressionDict_sq = {
(lenfunc)ZstdCompressionDict_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
0, /* sq_item */
0, /* sq_ass_item */
0, /* sq_contains */
0, /* sq_inplace_concat */
0 /* sq_inplace_repeat */
};
PyTypeObject ZstdCompressionDictType = {
PyVarObject_HEAD_INIT(NULL, 0)
"zstd.ZstdCompressionDict", /* tp_name */
sizeof(ZstdCompressionDict), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&ZstdCompressionDict_sq, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
ZstdCompressionDict__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
ZstdCompressionDict_methods, /* tp_methods */
ZstdCompressionDict_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)ZstdCompressionDict_init, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
void compressiondict_module_init(PyObject* mod) {
Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
if (PyType_Ready(&ZstdCompressionDictType) < 0) {
return;
}
Py_INCREF((PyObject*)&ZstdCompressionDictType);
PyModule_AddObject(mod, "ZstdCompressionDict",
(PyObject*)&ZstdCompressionDictType);
}