upstream/mercurial-mirror Commit - r49066:1730b2fc

dirstate-v2: adds a flag to mark a file as modified...

Simon Sapin -

r49066:1730b2fc default

parent child

mercurial/cext/parsers.c

0 +11 -8

              /*
               parsers.c - efficient content parsing
               Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
               This software may be used and distributed according to the terms of
               the GNU General Public License, incorporated herein by reference.
              */
              #define PY_SSIZE_T_CLEAN
              #include <Python.h>
              #include <ctype.h>
              #include <stddef.h>
              #include <string.h>
              #include "bitmanipulation.h"
              #include "charencode.h"
              #include "util.h"
              #ifdef IS_PY3K
              /* The mapping of Python types is meant to be temporary to get Python
               * 3 to compile. We should remove this once Python 3 support is fully
               * supported and proper types are used in the extensions themselves. */
              #define PyInt_Check PyLong_Check
              #define PyInt_FromLong PyLong_FromLong
              #define PyInt_FromSsize_t PyLong_FromSsize_t
              #define PyInt_AsLong PyLong_AsLong
              #endif
              static const char *const versionerrortext = "Python minor version mismatch";
              static const int dirstate_v1_from_p2 = -2;
              static const int dirstate_v1_nonnormal = -1;
              static const int ambiguous_time = -1;
              static PyObject *dict_new_presized(PyObject *self, PyObject *args)
              {
              	Py_ssize_t expected_size;
              	if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
              		return NULL;
              	}
              	return _dict_new_presized(expected_size);
              }
              static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
                                                 PyObject *kwds)
              {
              	/* We do all the initialization here and not a tp_init function because
              	 * dirstate_item is immutable. */
              	dirstateItemObject *t;
              	int wc_tracked;
              	int p1_tracked;
              	int p2_info;
              	int has_meaningful_data;
              	int has_meaningful_mtime;
              	int mode;
              	int size;
              	int mtime;
              	PyObject *parentfiledata;
              	static char *keywords_name[] = {
              	    "wc_tracked",
              	    "p1_tracked",
              	    "p2_info",
              	    "has_meaningful_data",
              	    "has_meaningful_mtime",
              	    "parentfiledata",
              	    NULL,
              	};
              	wc_tracked = 0;
              	p1_tracked = 0;
              	p2_info = 0;
              	has_meaningful_mtime = 1;
              	has_meaningful_data = 1;
              	parentfiledata = Py_None;
              	if (!PyArg_ParseTupleAndKeywords(
              	        args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
              	        &p2_info, &has_meaningful_data, &has_meaningful_mtime,
              	        &parentfiledata)) {
              		return NULL;
              	}
              	t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
              	if (!t) {
              		return NULL;
              	}
              	t->flags = 0;
              	if (wc_tracked) {
              		t->flags |= dirstate_flag_wc_tracked;
              	}
              	if (p1_tracked) {
              		t->flags |= dirstate_flag_p1_tracked;
              	}
              	if (p2_info) {
              		t->flags |= dirstate_flag_p2_info;
              	}
              	if (parentfiledata != Py_None) {
              		if (!PyTuple_CheckExact(parentfiledata)) {
              			PyErr_SetString(
              			    PyExc_TypeError,
              			    "parentfiledata should be a Tuple or None");
              			return NULL;
              		}
              		mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
              		size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
              		mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
              	} else {
              		has_meaningful_data = 0;
              		has_meaningful_mtime = 0;
              	}
              	if (has_meaningful_data) {
              		t->flags |= dirstate_flag_has_meaningful_data;
              		t->mode = mode;
              		t->size = size;
              	} else {
              		t->mode = 0;
              		t->size = 0;
              	}
              	if (has_meaningful_mtime) {
              		t->flags |= dirstate_flag_has_file_mtime;
              		t->mtime = mtime;
              	} else {
              		t->mtime = 0;
              	}
              	return (PyObject *)t;
              }
              static void dirstate_item_dealloc(PyObject *o)
              {
              	PyObject_Del(o);
              }
              static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
              {
              	return (self->flags & dirstate_flag_wc_tracked);
              }
              static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
              {
-             	const unsigned char mask = dirstate_flag_wc_tracked |
-             	                           dirstate_flag_p1_tracked |
+             	const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
-             	                           dirstate_flag_p2_info;
              	return (self->flags & mask);
              }
              static inline bool dirstate_item_c_added(dirstateItemObject *self)
              {
-             	const unsigned char mask =
-             	    (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
+             	const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
              	     dirstate_flag_p2_info);
-             	const unsigned char target = dirstate_flag_wc_tracked;
+             	const int target = dirstate_flag_wc_tracked;
              	return (self->flags & mask) == target;
              }
              static inline bool dirstate_item_c_removed(dirstateItemObject *self)
              {
              	if (self->flags & dirstate_flag_wc_tracked) {
              		return false;
              	}
              	return (self->flags &
              	        (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
              }
              static inline bool dirstate_item_c_merged(dirstateItemObject *self)
              {
              	return ((self->flags & dirstate_flag_wc_tracked) &&
              	        (self->flags & dirstate_flag_p1_tracked) &&
              	        (self->flags & dirstate_flag_p2_info));
              }
              static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
              {
              	return ((self->flags & dirstate_flag_wc_tracked) &&
              	        !(self->flags & dirstate_flag_p1_tracked) &&
              	        (self->flags & dirstate_flag_p2_info));
              }
              static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
              {
              	if (dirstate_item_c_removed(self)) {
              		return 'r';
              	} else if (dirstate_item_c_merged(self)) {
              		return 'm';
              	} else if (dirstate_item_c_added(self)) {
              		return 'a';
              	} else {
              		return 'n';
              	}
              }
              static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
              {
              	if (self->flags & dirstate_flag_has_meaningful_data) {
              		return self->mode;
              	} else {
              		return 0;
              	}
              }
              static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
              {
              	if (!(self->flags & dirstate_flag_wc_tracked) &&
              	    (self->flags & dirstate_flag_p2_info)) {
              		if (self->flags & dirstate_flag_p1_tracked) {
              			return dirstate_v1_nonnormal;
              		} else {
              			return dirstate_v1_from_p2;
              		}
              	} else if (dirstate_item_c_removed(self)) {
              		return 0;
              	} else if (self->flags & dirstate_flag_p2_info) {
              		return dirstate_v1_from_p2;
              	} else if (dirstate_item_c_added(self)) {
              		return dirstate_v1_nonnormal;
              	} else if (self->flags & dirstate_flag_has_meaningful_data) {
              		return self->size;
              	} else {
              		return dirstate_v1_nonnormal;
              	}
              }
              static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
              {
              	if (dirstate_item_c_removed(self)) {
              		return 0;
              	} else if (!(self->flags & dirstate_flag_has_file_mtime) ||
              	           !(self->flags & dirstate_flag_p1_tracked) ||
              	           !(self->flags & dirstate_flag_wc_tracked) ||
              	           (self->flags & dirstate_flag_p2_info)) {
              		return ambiguous_time;
              	} else {
              		return self->mtime;
              	}
              }
              static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
              {
-             	unsigned char flags = self->flags;
+             	int flags = self->flags;
              	int mode = dirstate_item_c_v1_mode(self);
              	if ((mode & S_IXUSR) != 0) {
              		flags |= dirstate_flag_mode_exec_perm;
              	} else {
              		flags &= ~dirstate_flag_mode_exec_perm;
              	}
              	if (S_ISLNK(mode)) {
              		flags |= dirstate_flag_mode_is_symlink;
              	} else {
              		flags &= ~dirstate_flag_mode_is_symlink;
              	}
-             	return Py_BuildValue("Bii", flags, self->size, self->mtime);
+             	return Py_BuildValue("iii", flags, self->size, self->mtime);
              };
              static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
              {
              	char state = dirstate_item_c_v1_state(self);
              	return PyBytes_FromStringAndSize(&state, 1);
              };
              static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
              {
              	return PyInt_FromLong(dirstate_item_c_v1_mode(self));
              };
              static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
              {
              	return PyInt_FromLong(dirstate_item_c_v1_size(self));
              };
              static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
              {
              	return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
              };
              static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
                                                        PyObject *value)
              {
              	long now;
              	if (!pylong_to_long(value, &now)) {
              		return NULL;
              	}
              	if (dirstate_item_c_v1_state(self) == 'n' &&
              	    dirstate_item_c_v1_mtime(self) == now) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              /* This will never change since it's bound to V1
               */
              static inline dirstateItemObject *
              dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
              {
              	dirstateItemObject *t =
              	    PyObject_New(dirstateItemObject, &dirstateItemType);
              	if (!t) {
              		return NULL;
              	}
              	t->flags = 0;
              	t->mode = 0;
              	t->size = 0;
              	t->mtime = 0;
              	if (state == 'm') {
              		t->flags = (dirstate_flag_wc_tracked |
              		            dirstate_flag_p1_tracked | dirstate_flag_p2_info);
              	} else if (state == 'a') {
              		t->flags = dirstate_flag_wc_tracked;
              	} else if (state == 'r') {
              		if (size == dirstate_v1_nonnormal) {
              			t->flags =
              			    dirstate_flag_p1_tracked | dirstate_flag_p2_info;
              		} else if (size == dirstate_v1_from_p2) {
              			t->flags = dirstate_flag_p2_info;
              		} else {
              			t->flags = dirstate_flag_p1_tracked;
              		}
              	} else if (state == 'n') {
              		if (size == dirstate_v1_from_p2) {
              			t->flags =
              			    dirstate_flag_wc_tracked | dirstate_flag_p2_info;
              		} else if (size == dirstate_v1_nonnormal) {
              			t->flags =
              			    dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
              		} else if (mtime == ambiguous_time) {
              			t->flags = (dirstate_flag_wc_tracked |
              			            dirstate_flag_p1_tracked |
              			            dirstate_flag_has_meaningful_data);
              			t->mode = mode;
              			t->size = size;
              		} else {
              			t->flags = (dirstate_flag_wc_tracked |
              			            dirstate_flag_p1_tracked |
              			            dirstate_flag_has_meaningful_data |
              			            dirstate_flag_has_file_mtime);
              			t->mode = mode;
              			t->size = size;
              			t->mtime = mtime;
              		}
              	} else {
              		PyErr_Format(PyExc_RuntimeError,
              		             "unknown state: `%c` (%d, %d, %d)", state, mode,
              		             size, mtime, NULL);
              		Py_DECREF(t);
              		return NULL;
              	}
              	return t;
              }
              /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
              static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
                                                          PyObject *args)
              {
              	/* We do all the initialization here and not a tp_init function because
              	 * dirstate_item is immutable. */
              	char state;
              	int size, mode, mtime;
              	if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
              		return NULL;
              	}
              	return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
              };
              static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
                                                          PyObject *args)
              {
              	dirstateItemObject *t =
              	    PyObject_New(dirstateItemObject, &dirstateItemType);
              	if (!t) {
              		return NULL;
              	}
-             	if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
+             	if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) {
              		return NULL;
              	}
+             	if (t->flags & dirstate_flag_expected_state_is_modified) {
+             		t->flags &= ~(dirstate_flag_expected_state_is_modified |
+             		              dirstate_flag_has_meaningful_data |
+             		              dirstate_flag_has_file_mtime);
+             	}
              	t->mode = 0;
              	if (t->flags & dirstate_flag_has_meaningful_data) {
              		if (t->flags & dirstate_flag_mode_exec_perm) {
              			t->mode = 0755;
              		} else {
              			t->mode = 0644;
              		}
              		if (t->flags & dirstate_flag_mode_is_symlink) {
              			t->mode |= S_IFLNK;
              		} else {
              			t->mode |= S_IFREG;
              		}
              	}
              	return (PyObject *)t;
              };
              /* This means the next status call will have to actually check its content
                 to make sure it is correct. */
              static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
              {
              	self->flags &= ~dirstate_flag_has_file_mtime;
              	Py_RETURN_NONE;
              }
              /* See docstring of the python implementation for details */
              static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
                                                       PyObject *args)
              {
              	int size, mode, mtime;
              	if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
              		return NULL;
              	}
              	self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
              	              dirstate_flag_has_meaningful_data |
              	              dirstate_flag_has_file_mtime;
              	self->mode = mode;
              	self->size = size;
              	self->mtime = mtime;
              	Py_RETURN_NONE;
              }
              static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
              {
              	self->flags |= dirstate_flag_wc_tracked;
              	self->flags &= ~dirstate_flag_has_file_mtime;
              	Py_RETURN_NONE;
              }
              static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
              {
              	self->flags &= ~dirstate_flag_wc_tracked;
              	self->mode = 0;
              	self->mtime = 0;
              	self->size = 0;
              	Py_RETURN_NONE;
              }
              static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
              {
              	if (self->flags & dirstate_flag_p2_info) {
              		self->flags &= ~(dirstate_flag_p2_info |
              		                 dirstate_flag_has_meaningful_data |
              		                 dirstate_flag_has_file_mtime);
              		self->mode = 0;
              		self->mtime = 0;
              		self->size = 0;
              	}
              	Py_RETURN_NONE;
              }
              static PyMethodDef dirstate_item_methods[] = {
                  {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
                   "return data suitable for v2 serialization"},
                  {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
                   "return a \"state\" suitable for v1 serialization"},
                  {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
                   "return a \"mode\" suitable for v1 serialization"},
                  {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
                   "return a \"size\" suitable for v1 serialization"},
                  {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
                   "return a \"mtime\" suitable for v1 serialization"},
                  {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
                   "True if the stored mtime would be ambiguous with the current time"},
                  {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
                   METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
                  {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
                   METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
                  {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
                   METH_NOARGS, "mark a file as \"possibly dirty\""},
                  {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
                   "mark a file as \"clean\""},
                  {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
                   "mark a file as \"tracked\""},
                  {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
                   "mark a file as \"untracked\""},
                  {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
                   "remove all \"merge-only\" from a DirstateItem"},
                  {NULL} /* Sentinel */
              };
              static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
              {
              	return PyInt_FromLong(dirstate_item_c_v1_mode(self));
              };
              static PyObject *dirstate_item_get_size(dirstateItemObject *self)
              {
              	return PyInt_FromLong(dirstate_item_c_v1_size(self));
              };
              static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
              {
              	return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
              };
              static PyObject *dirstate_item_get_state(dirstateItemObject *self)
              {
              	char state = dirstate_item_c_v1_state(self);
              	return PyBytes_FromStringAndSize(&state, 1);
              };
              static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
              {
              	if (dirstate_item_c_tracked(self)) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
              {
              	if (self->flags & dirstate_flag_p1_tracked) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyObject *dirstate_item_get_added(dirstateItemObject *self)
              {
              	if (dirstate_item_c_added(self)) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
              {
              	if (self->flags & dirstate_flag_wc_tracked &&
              	    self->flags & dirstate_flag_p2_info) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
              {
              	if (dirstate_item_c_merged(self)) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
              {
              	if (dirstate_item_c_from_p2(self)) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
              {
              	if (!(self->flags & dirstate_flag_wc_tracked)) {
              		Py_RETURN_FALSE;
              	} else if (!(self->flags & dirstate_flag_p1_tracked)) {
              		Py_RETURN_FALSE;
              	} else if (self->flags & dirstate_flag_p2_info) {
              		Py_RETURN_FALSE;
              	} else {
              		Py_RETURN_TRUE;
              	}
              };
              static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
              {
              	if (dirstate_item_c_any_tracked(self)) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
              {
              	if (dirstate_item_c_removed(self)) {
              		Py_RETURN_TRUE;
              	} else {
              		Py_RETURN_FALSE;
              	}
              };
              static PyGetSetDef dirstate_item_getset[] = {
                  {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
                  {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
                  {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
                  {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
                  {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
                  {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
                   NULL},
                  {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
                  {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
                  {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
                  {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
                  {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
                   NULL},
                  {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
                   NULL},
                  {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
                  {NULL} /* Sentinel */
              };
              PyTypeObject dirstateItemType = {
                  PyVarObject_HEAD_INIT(NULL, 0)     /* header */
                  "dirstate_tuple",                  /* tp_name */
                  sizeof(dirstateItemObject),        /* tp_basicsize */
 ,                                 /* tp_itemsize */
                  (destructor)dirstate_item_dealloc, /* tp_dealloc */
 ,                                 /* tp_print */
 ,                                 /* tp_getattr */
 ,                                 /* tp_setattr */
 ,                                 /* tp_compare */
 ,                                 /* tp_repr */
 ,                                 /* tp_as_number */
 ,                                 /* tp_as_sequence */
 ,                                 /* tp_as_mapping */
 ,                                 /* tp_hash  */
 ,                                 /* tp_call */
 ,                                 /* tp_str */
 ,                                 /* tp_getattro */
 ,                                 /* tp_setattro */
 ,                                 /* tp_as_buffer */
                  Py_TPFLAGS_DEFAULT,                /* tp_flags */
                  "dirstate tuple",                  /* tp_doc */
 ,                                 /* tp_traverse */
 ,                                 /* tp_clear */
 ,                                 /* tp_richcompare */
 ,                                 /* tp_weaklistoffset */
 ,                                 /* tp_iter */
 ,                                 /* tp_iternext */
                  dirstate_item_methods,             /* tp_methods */
 ,                                 /* tp_members */
                  dirstate_item_getset,              /* tp_getset */
 ,                                 /* tp_base */
 ,                                 /* tp_dict */
 ,                                 /* tp_descr_get */
 ,                                 /* tp_descr_set */
 ,                                 /* tp_dictoffset */
 ,                                 /* tp_init */
 ,                                 /* tp_alloc */
                  dirstate_item_new,                 /* tp_new */
              };
              static PyObject *parse_dirstate(PyObject *self, PyObject *args)
              {
              	PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
              	PyObject *fname = NULL, *cname = NULL, *entry = NULL;
              	char state, *cur, *str, *cpos;
              	int mode, size, mtime;
              	unsigned int flen, pos = 40;
              	Py_ssize_t len = 40;
              	Py_ssize_t readlen;
              	if (!PyArg_ParseTuple(
              	        args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
              	        &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
              		goto quit;
              	}
              	len = readlen;
              	/* read parents */
              	if (len < 40) {
              		PyErr_SetString(PyExc_ValueError,
              		                "too little data for parents");
              		goto quit;
              	}
              	parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
              	                        str + 20, (Py_ssize_t)20);
              	if (!parents) {
              		goto quit;
              	}
              	/* read filenames */
              	while (pos >= 40 && pos < len) {
              		if (pos + 17 > len) {
              			PyErr_SetString(PyExc_ValueError,
              			                "overflow in dirstate");
              			goto quit;
              		}
              		cur = str + pos;
              		/* unpack header */
              		state = *cur;
              		mode = getbe32(cur + 1);
              		size = getbe32(cur + 5);
              		mtime = getbe32(cur + 9);
              		flen = getbe32(cur + 13);
              		pos += 17;
              		cur += 17;
              		if (flen > len - pos) {
              			PyErr_SetString(PyExc_ValueError,
              			                "overflow in dirstate");
              			goto quit;
              		}
              		entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
              		                                               size, mtime);
              		if (!entry)
              			goto quit;
              		cpos = memchr(cur, 0, flen);
              		if (cpos) {
              			fname = PyBytes_FromStringAndSize(cur, cpos - cur);
              			cname = PyBytes_FromStringAndSize(
              			    cpos + 1, flen - (cpos - cur) - 1);
              			if (!fname || !cname ||
              			    PyDict_SetItem(cmap, fname, cname) == -1 ||
              			    PyDict_SetItem(dmap, fname, entry) == -1) {
              				goto quit;
              			}
              			Py_DECREF(cname);
              		} else {
              			fname = PyBytes_FromStringAndSize(cur, flen);
              			if (!fname ||
              			    PyDict_SetItem(dmap, fname, entry) == -1) {
              				goto quit;
              			}
              		}
              		Py_DECREF(fname);
              		Py_DECREF(entry);
              		fname = cname = entry = NULL;
              		pos += flen;
              	}
              	ret = parents;
              	Py_INCREF(ret);
              quit:
              	Py_XDECREF(fname);
              	Py_XDECREF(cname);
              	Py_XDECREF(entry);
              	Py_XDECREF(parents);
              	return ret;
              }
              /*
               * Efficiently pack a dirstate object into its on-disk format.
               */
              static PyObject *pack_dirstate(PyObject *self, PyObject *args)
              {
              	PyObject *packobj = NULL;
              	PyObject *map, *copymap, *pl, *mtime_unset = NULL;
              	Py_ssize_t nbytes, pos, l;
              	PyObject *k, *v = NULL, *pn;
              	char *p, *s;
              	int now;
              	if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
              	                      &PyDict_Type, &copymap, &PyTuple_Type, &pl,
              	                      &now)) {
              		return NULL;
              	}
              	if (PyTuple_Size(pl) != 2) {
              		PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
              		return NULL;
              	}
              	/* Figure out how much we need to allocate. */
              	for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
              		PyObject *c;
              		if (!PyBytes_Check(k)) {
              			PyErr_SetString(PyExc_TypeError, "expected string key");
              			goto bail;
              		}
              		nbytes += PyBytes_GET_SIZE(k) + 17;
              		c = PyDict_GetItem(copymap, k);
              		if (c) {
              			if (!PyBytes_Check(c)) {
              				PyErr_SetString(PyExc_TypeError,
              				                "expected string key");
              				goto bail;
              			}
              			nbytes += PyBytes_GET_SIZE(c) + 1;
              		}
              	}
              	packobj = PyBytes_FromStringAndSize(NULL, nbytes);
              	if (packobj == NULL) {
              		goto bail;
              	}
              	p = PyBytes_AS_STRING(packobj);
              	pn = PyTuple_GET_ITEM(pl, 0);
              	if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
              		PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
              		goto bail;
              	}
              	memcpy(p, s, l);
              	p += 20;
              	pn = PyTuple_GET_ITEM(pl, 1);
              	if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
              		PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
              		goto bail;
              	}
              	memcpy(p, s, l);
              	p += 20;
              	for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
              		dirstateItemObject *tuple;
              		char state;
              		int mode, size, mtime;
              		Py_ssize_t len, l;
              		PyObject *o;
              		char *t;
              		if (!dirstate_tuple_check(v)) {
              			PyErr_SetString(PyExc_TypeError,
              			                "expected a dirstate tuple");
              			goto bail;
              		}
              		tuple = (dirstateItemObject *)v;
              		state = dirstate_item_c_v1_state(tuple);
              		mode = dirstate_item_c_v1_mode(tuple);
              		size = dirstate_item_c_v1_size(tuple);
              		mtime = dirstate_item_c_v1_mtime(tuple);
              		if (state == 'n' && mtime == now) {
              			/* See pure/parsers.py:pack_dirstate for why we do
              			 * this. */
              			mtime = -1;
              			mtime_unset = (PyObject *)dirstate_item_from_v1_data(
              			    state, mode, size, mtime);
              			if (!mtime_unset) {
              				goto bail;
              			}
              			if (PyDict_SetItem(map, k, mtime_unset) == -1) {
              				goto bail;
              			}
              			Py_DECREF(mtime_unset);
              			mtime_unset = NULL;
              		}
              		*p++ = state;
              		putbe32((uint32_t)mode, p);
              		putbe32((uint32_t)size, p + 4);
              		putbe32((uint32_t)mtime, p + 8);
              		t = p + 12;
              		p += 16;
              		len = PyBytes_GET_SIZE(k);
              		memcpy(p, PyBytes_AS_STRING(k), len);
              		p += len;
              		o = PyDict_GetItem(copymap, k);
              		if (o) {
              			*p++ = '\0';
              			l = PyBytes_GET_SIZE(o);
              			memcpy(p, PyBytes_AS_STRING(o), l);
              			p += l;
              			len += l + 1;
              		}
              		putbe32((uint32_t)len, t);
              	}
              	pos = p - PyBytes_AS_STRING(packobj);
              	if (pos != nbytes) {
              		PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
              		             (long)pos, (long)nbytes);
              		goto bail;
              	}
              	return packobj;
              bail:
              	Py_XDECREF(mtime_unset);
              	Py_XDECREF(packobj);
              	Py_XDECREF(v);
              	return NULL;
              }
              #define BUMPED_FIX 1
              #define USING_SHA_256 2
              #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
              static PyObject *readshas(const char *source, unsigned char num,
                                        Py_ssize_t hashwidth)
              {
              	int i;
              	PyObject *list = PyTuple_New(num);
              	if (list == NULL) {
              		return NULL;
              	}
              	for (i = 0; i < num; i++) {
              		PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
              		if (hash == NULL) {
              			Py_DECREF(list);
              			return NULL;
              		}
              		PyTuple_SET_ITEM(list, i, hash);
              		source += hashwidth;
              	}
              	return list;
              }
              static PyObject *fm1readmarker(const char *databegin, const char *dataend,
                                             uint32_t *msize)
              {
              	const char *data = databegin;
              	const char *meta;
              	double mtime;
              	int16_t tz;
              	uint16_t flags;
              	unsigned char nsuccs, nparents, nmetadata;
              	Py_ssize_t hashwidth = 20;
              	PyObject *prec = NULL, *parents = NULL, *succs = NULL;
              	PyObject *metadata = NULL, *ret = NULL;
              	int i;
              	if (data + FM1_HEADER_SIZE > dataend) {
              		goto overflow;
              	}
              	*msize = getbe32(data);
              	data += 4;
              	mtime = getbefloat64(data);
              	data += 8;
              	tz = getbeint16(data);
              	data += 2;
              	flags = getbeuint16(data);
              	data += 2;
              	if (flags & USING_SHA_256) {
              		hashwidth = 32;
              	}
              	nsuccs = (unsigned char)(*data++);
              	nparents = (unsigned char)(*data++);
              	nmetadata = (unsigned char)(*data++);
              	if (databegin + *msize > dataend) {
              		goto overflow;
              	}
              	dataend = databegin + *msize; /* narrow down to marker size */
              	if (data + hashwidth > dataend) {
              		goto overflow;
              	}
              	prec = PyBytes_FromStringAndSize(data, hashwidth);
              	data += hashwidth;
              	if (prec == NULL) {
              		goto bail;
              	}
              	if (data + nsuccs * hashwidth > dataend) {
              		goto overflow;
              	}
              	succs = readshas(data, nsuccs, hashwidth);
              	if (succs == NULL) {
              		goto bail;
              	}
              	data += nsuccs * hashwidth;
              	if (nparents == 1 || nparents == 2) {
              		if (data + nparents * hashwidth > dataend) {
              			goto overflow;
              		}
              		parents = readshas(data, nparents, hashwidth);
              		if (parents == NULL) {
              			goto bail;
              		}
              		data += nparents * hashwidth;
              	} else {
              		parents = Py_None;
              		Py_INCREF(parents);
              	}
              	if (data + 2 * nmetadata > dataend) {
              		goto overflow;
              	}
              	meta = data + (2 * nmetadata);
              	metadata = PyTuple_New(nmetadata);
              	if (metadata == NULL) {
              		goto bail;
              	}
              	for (i = 0; i < nmetadata; i++) {
              		PyObject *tmp, *left = NULL, *right = NULL;
              		Py_ssize_t leftsize = (unsigned char)(*data++);
              		Py_ssize_t rightsize = (unsigned char)(*data++);
              		if (meta + leftsize + rightsize > dataend) {
              			goto overflow;
              		}
              		left = PyBytes_FromStringAndSize(meta, leftsize);
              		meta += leftsize;
              		right = PyBytes_FromStringAndSize(meta, rightsize);
              		meta += rightsize;
              		tmp = PyTuple_New(2);
              		if (!left || !right || !tmp) {
              			Py_XDECREF(left);
              			Py_XDECREF(right);
              			Py_XDECREF(tmp);
              			goto bail;
              		}
              		PyTuple_SET_ITEM(tmp, 0, left);
              		PyTuple_SET_ITEM(tmp, 1, right);
              		PyTuple_SET_ITEM(metadata, i, tmp);
              	}
              	ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
              	                    (int)tz * 60, parents);
              	goto bail; /* return successfully */
              overflow:
              	PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
              bail:
              	Py_XDECREF(prec);
              	Py_XDECREF(succs);
              	Py_XDECREF(metadata);
              	Py_XDECREF(parents);
              	return ret;
              }
              static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
              {
              	const char *data, *dataend;
              	Py_ssize_t datalen, offset, stop;
              	PyObject *markers = NULL;
              	if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
              	                      &offset, &stop)) {
              		return NULL;
              	}
              	if (offset < 0) {
              		PyErr_SetString(PyExc_ValueError,
              		                "invalid negative offset in fm1readmarkers");
              		return NULL;
              	}
              	if (stop > datalen) {
              		PyErr_SetString(
              		    PyExc_ValueError,
              		    "stop longer than data length in fm1readmarkers");
              		return NULL;
              	}
              	dataend = data + datalen;
              	data += offset;
              	markers = PyList_New(0);
              	if (!markers) {
              		return NULL;
              	}
              	while (offset < stop) {
              		uint32_t msize;
              		int error;
              		PyObject *record = fm1readmarker(data, dataend, &msize);
              		if (!record) {
              			goto bail;
              		}
              		error = PyList_Append(markers, record);
              		Py_DECREF(record);
              		if (error) {
              			goto bail;
              		}
              		data += msize;
              		offset += msize;
              	}
              	return markers;
              bail:
              	Py_DECREF(markers);
              	return NULL;
              }
              static char parsers_doc[] = "Efficient content parsing.";
              PyObject *encodedir(PyObject *self, PyObject *args);
              PyObject *pathencode(PyObject *self, PyObject *args);
              PyObject *lowerencode(PyObject *self, PyObject *args);
              PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
              static PyMethodDef methods[] = {
                  {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
                  {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
                  {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
                   "parse a revlog index\n"},
                  {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
                  {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
                  {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
                  {"dict_new_presized", dict_new_presized, METH_VARARGS,
                   "construct a dict with an expected size\n"},
                  {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
                   "make file foldmap\n"},
                  {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
                   "escape a UTF-8 byte string to JSON (fast path)\n"},
                  {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
                  {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
                  {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
                  {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
                   "parse v1 obsolete markers\n"},
                  {NULL, NULL}};
              void dirs_module_init(PyObject *mod);
              void manifest_module_init(PyObject *mod);
              void revlog_module_init(PyObject *mod);
              static const int version = 20;
              static void module_init(PyObject *mod)
              {
              	PyModule_AddIntConstant(mod, "version", version);
              	/* This module constant has two purposes.  First, it lets us unit test
              	 * the ImportError raised without hard-coding any error text.  This
              	 * means we can change the text in the future without breaking tests,
              	 * even across changesets without a recompile.  Second, its presence
              	 * can be used to determine whether the version-checking logic is
              	 * present, which also helps in testing across changesets without a
              	 * recompile.  Note that this means the pure-Python version of parsers
              	 * should not have this module constant. */
              	PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
              	dirs_module_init(mod);
              	manifest_module_init(mod);
              	revlog_module_init(mod);
              	if (PyType_Ready(&dirstateItemType) < 0) {
              		return;
              	}
              	Py_INCREF(&dirstateItemType);
              	PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
              }
              static int check_python_version(void)
              {
              	PyObject *sys = PyImport_ImportModule("sys"), *ver;
              	long hexversion;
              	if (!sys) {
              		return -1;
              	}
              	ver = PyObject_GetAttrString(sys, "hexversion");
              	Py_DECREF(sys);
              	if (!ver) {
              		return -1;
              	}
              	hexversion = PyInt_AsLong(ver);
              	Py_DECREF(ver);
              	/* sys.hexversion is a 32-bit number by default, so the -1 case
              	 * should only occur in unusual circumstances (e.g. if sys.hexversion
              	 * is manually set to an invalid value). */
              	if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
              		PyErr_Format(PyExc_ImportError,
              		             "%s: The Mercurial extension "
              		             "modules were compiled with Python " PY_VERSION
              		             ", but "
              		             "Mercurial is currently using Python with "
              		             "sys.hexversion=%ld: "
              		             "Python %s\n at: %s",
              		             versionerrortext, hexversion, Py_GetVersion(),
              		             Py_GetProgramFullPath());
              		return -1;
              	}
              	return 0;
              }
              #ifdef IS_PY3K
              static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
                                                          parsers_doc, -1, methods};
              PyMODINIT_FUNC PyInit_parsers(void)
              {
              	PyObject *mod;
              	if (check_python_version() == -1)
              		return NULL;
              	mod = PyModule_Create(&parsers_module);
              	module_init(mod);
              	return mod;
              }
              #else
              PyMODINIT_FUNC initparsers(void)
              {
              	PyObject *mod;
              	if (check_python_version() == -1) {
              		return;
              	}
              	mod = Py_InitModule3("parsers", methods, parsers_doc);
              	module_init(mod);
              }
              #endif

mercurial/cext/util.h

0 +10 -9

              /*
               util.h - utility functions for interfacing with the various python APIs.
               This software may be used and distributed according to the terms of
               the GNU General Public License, incorporated herein by reference.
              */
              #ifndef _HG_UTIL_H_
              #define _HG_UTIL_H_
              #include "compat.h"
              #if PY_MAJOR_VERSION >= 3
              #define IS_PY3K
              #endif
              /* helper to switch things like string literal depending on Python version */
              #ifdef IS_PY3K
              #define PY23(py2, py3) py3
              #else
              #define PY23(py2, py3) py2
              #endif
              /* clang-format off */
              typedef struct {
              	PyObject_HEAD
-             	unsigned char flags;
+             	int flags;
              	int mode;
              	int size;
              	int mtime;
              } dirstateItemObject;
              /* clang-format on */
-             static const unsigned char dirstate_flag_wc_tracked = 1;
-             static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
-             static const unsigned char dirstate_flag_p2_info = 1 << 2;
-             static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
-             static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
-             static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
-             static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
-             static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
+             static const int dirstate_flag_wc_tracked = 1;
+             static const int dirstate_flag_p1_tracked = 1 << 1;
+             static const int dirstate_flag_p2_info = 1 << 2;
+             static const int dirstate_flag_has_meaningful_data = 1 << 3;
+             static const int dirstate_flag_has_file_mtime = 1 << 4;
+             static const int dirstate_flag_has_directory_mtime = 1 << 5;
+             static const int dirstate_flag_mode_exec_perm = 1 << 6;
+             static const int dirstate_flag_mode_is_symlink = 1 << 7;
+             static const int dirstate_flag_expected_state_is_modified = 1 << 8;
              extern PyTypeObject dirstateItemType;
              #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
              #ifndef MIN
              #define MIN(a, b) (((a) < (b)) ? (a) : (b))
              #endif
              /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
              #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
              #define true 1
              #define false 0
              typedef unsigned char bool;
              #else
              #include <stdbool.h>
              #endif
              static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
              {
              	/* _PyDict_NewPresized expects a minused parameter, but it actually
              	   creates a dictionary that's the nearest power of two bigger than the
              	   parameter. For example, with the initial minused = 1000, the
              	   dictionary created has size 1024. Of course in a lot of cases that
              	   can be greater than the maximum load factor Python's dict object
              	   expects (= 2/3), so as soon as we cross the threshold we'll resize
              	   anyway. So create a dictionary that's at least 3/2 the size. */
              	return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
              }
              /* Convert a PyInt or PyLong to a long. Returns false if there is an
                 error, in which case an exception will already have been set. */
              static inline bool pylong_to_long(PyObject *pylong, long *out)
              {
              	*out = PyLong_AsLong(pylong);
              	/* Fast path to avoid hitting PyErr_Occurred if the value was obviously
              	 * not an error. */
              	if (*out != -1) {
              		return true;
              	}
              	return PyErr_Occurred() == NULL;
              }
              #endif /* _HG_UTIL_H_ */

mercurial/helptext/internals/dirstate-v2.txt

0 +20 -4

              The *dirstate* is what Mercurial uses internally to track
              the state of files in the working directory,
              such as set by commands like `hg add` and `hg rm`.
              It also contains some cached data that help make `hg status` faster.
              The name refers both to `.hg/dirstate` on the filesystem
              and the corresponding data structure in memory while a Mercurial process
              is running.
              The original file format, retroactively dubbed `dirstate-v1`,
              is described at https://www.mercurial-scm.org/wiki/DirState.
              It is made of a flat sequence of unordered variable-size entries,
              so accessing any information in it requires parsing all of it.
              Similarly, saving changes requires rewriting the entire file.
              The newer `dirsate-v2` file format is designed to fix these limitations
              and make `hg status` faster.
              User guide
              ==========
              Compatibility
              -------------
              The file format is experimental and may still change.
              Different versions of Mercurial may not be compatible with each other
              when working on a local repository that uses this format.
              When using an incompatible version with the experimental format,
              anything can happen including data corruption.
              Since the dirstate is entirely local and not relevant to the wire protocol,
              `dirstate-v2` does not affect compatibility with remote Mercurial versions.
              When `share-safe` is enabled, different repositories sharing the same store
              can use different dirstate formats.
              Enabling `dirsate-v2` for new local repositories
              ------------------------------------------------
              When creating a new local repository such as with `hg init` or `hg clone`,
              the `exp-dirstate-v2` boolean in the `format` configuration section
              controls whether to use this file format.
              This is disabled by default as of this writing.
              To enable it for a single repository, run for example::
                  $ hg init my-project --config format.exp-dirstate-v2=1
              Checking the format of an existing local repsitory
              --------------------------------------------------
              The `debugformat` commands prints information about
              which of multiple optional formats are used in the current repository,
              including `dirstate-v2`::
                  $ hg debugformat
                  format-variant     repo
                  fncache:            yes
                  dirstate-v2:        yes
                  […]
              Upgrading or downgrading an existing local repository
              -----------------------------------------------------
              The `debugupgrade` command does various upgrades or downgrades
              on a local repository
              based on the current Mercurial version and on configuration.
              The same `format.exp-dirstate-v2` configuration is used again.
              Example to upgrade::
                  $ hg debugupgrade --config format.exp-dirstate-v2=1
              Example to downgrade to `dirstate-v1`::
                  $ hg debugupgrade --config format.exp-dirstate-v2=0
              Both of this commands do nothing but print a list of proposed changes,
              which may include changes unrelated to the dirstate.
              Those other changes are controlled by their own configuration keys.
              Add `--run` to a command to actually apply the proposed changes.
              Backups of `.hg/requires` and `.hg/dirstate` are created
              in a `.hg/upgradebackup.*` directory.
              If something goes wrong, restoring those files should undo the change.
              Note that upgrading affects compatibility with older versions of Mercurial
              as noted above.
              This can be relevant when a repository’s files are on a USB drive
              or some other removable media, or shared over the network, etc.
              Internal filesystem representation
              ==================================
              Requirements file
              -----------------
              The `.hg/requires` file indicates which of various optional file formats
              are used by a given repository.
              Mercurial aborts when seeing a requirement it does not know about,
              which avoids older version accidentally messing up a respository
              that uses a format that was introduced later.
              For versions that do support a format, the presence or absence of
              the corresponding requirement indicates whether to use that format.
              When the file contains a `exp-dirstate-v2` line,
              the `dirstate-v2` format is used.
              With no such line `dirstate-v1` is used.
              High level description
              ----------------------
              Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
              in `dirstate-v2` that file is a "docket" file
              that only contains some metadata
              and points to separate data file named `.hg/dirstate.{ID}`,
              where `{ID}` is a random identifier.
              This separation allows making data files append-only
              and therefore safer to memory-map.
              Creating a new data file (occasionally to clean up unused data)
              can be done with a different ID
              without disrupting another Mercurial process
              that could still be using the previous data file.
              Both files have a format designed to reduce the need for parsing,
              by using fixed-size binary components as much as possible.
              For data that is not fixed-size,
              references to other parts of a file can be made by storing "pseudo-pointers":
              integers counted in bytes from the start of a file.
              For read-only access no data structure is needed,
              only a bytes buffer (possibly memory-mapped directly from the filesystem)
              with specific parts read on demand.
              The data file contains "nodes" organized in a tree.
              Each node represents a file or directory inside the working directory
              or its parent changeset.
              This tree has the same structure as the filesystem,
              so a node representing a directory has child nodes representing
              the files and subdirectories contained directly in that directory.
              The docket file format
              ----------------------
              This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
              and `mercurial/dirstateutils/docket.py`.
              Components of the docket file are found at fixed offsets,
              counted in bytes from the start of the file:
              * Offset 0:
                The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
                This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
                although it is not strictly necessary
                since `.hg/requires` determines which format to use.
              * Offset 12:
                The changeset node ID on the first parent of the working directory,
                as up to 32 binary bytes.
                If a node ID is shorter (20 bytes for SHA-1),
                it is start-aligned and the rest of the bytes are set to zero.
              * Offset 44:
                The changeset node ID on the second parent of the working directory,
                or all zeros if there isn’t one.
                Also 32 binary bytes.
              * Offset 76:
                Tree metadata on 44 bytes, described below.
                Its separation in this documentation from the rest of the docket
                reflects a detail of the current implementation.
                Since tree metadata is also made of fields at fixed offsets, those could
                be inlined here by adding 76 bytes to each offset.
              * Offset 120:
                The used size of the data file, as a 32-bit big-endian integer.
                The actual size of the data file may be larger
                (if another Mercurial processis in appending to it
                but has not updated the docket yet).
                That extra data must be ignored.
              * Offset 124:
                The length of the data file identifier, as a 8-bit integer.
              * Offset 125:
                The data file identifier.
              * Any additional data is current ignored, and dropped when updating the file.
              Tree metadata in the docket file
              --------------------------------
              Tree metadata is similarly made of components at fixed offsets.
              These offsets are counted in bytes from the start of tree metadata,
              which is 76 bytes after the start of the docket file.
              This metadata can be thought of as the singular root of the tree
              formed by nodes in the data file.
              * Offset 0:
                Pseudo-pointer to the start of root nodes,
                counted in bytes from the start of the data file,
                as a 32-bit big-endian integer.
                These nodes describe files and directories found directly
                at the root of the working directory.
              * Offset 4:
                Number of root nodes, as a 32-bit big-endian integer.
              * Offset 8:
                Total number of nodes in the entire tree that "have a dirstate entry",
                as a 32-bit big-endian integer.
                Those nodes represent files that would be present at all in `dirstate-v1`.
                This is typically less than the total number of nodes.
                This counter is used to implement `len(dirstatemap)`.
              * Offset 12:
                Number of nodes in the entire tree that have a copy source,
                as a 32-bit big-endian integer.
                At the next commit, these files are recorded
                as having been copied or moved/renamed from that source.
                (A move is recorded as a copy and separate removal of the source.)
                This counter is used to implement `len(dirstatemap.copymap)`.
              * Offset 16:
                An estimation of how many bytes of the data file
                (within its used size) are unused, as a 32-bit big-endian integer.
                When appending to an existing data file,
                some existing nodes or paths can be unreachable from the new root
                but they still take up space.
                This counter is used to decide when to write a new data file from scratch
                instead of appending to an existing one,
                in order to get rid of that unreachable data
                and avoid unbounded file size growth.
              * Offset 20:
                These four bytes are currently ignored
                and reset to zero when updating a docket file.
                This is an attempt at forward compatibility:
                future Mercurial versions could use this as a bit field
                to indicate that a dirstate has additional data or constraints.
                Finding a dirstate file with the relevant bit unset indicates that
                it was written by a then-older version
                which is not aware of that future change.
              * Offset 24:
                Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
                When present, the hash is of ignore patterns
                that were used for some previous run of the `status` algorithm.
              * (Offset 44: end of tree metadata)
              Optional hash of ignore patterns
              --------------------------------
              The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
              has been optimized such that its run time is dominated by calls
              to `stat` for reading the filesystem metadata of a file or directory,
              and to `readdir` for listing the contents of a directory.
              In some cases the algorithm can skip calls to `readdir`
              (saving significant time)
              because the dirstate already contains enough of the relevant information
              to build the correct `status` results.
              The default configuration of `hg status` is to list unknown files
              but not ignored files.
              In this case, it matters for the `readdir`-skipping optimization
              if a given file used to be ignored but became unknown
              because `.hgignore` changed.
              To detect the possibility of such a change,
              the tree metadata contains an optional hash of all ignore patterns.
              We define:
              * "Root" ignore files as:
                - `.hgignore` at the root of the repository if it exists
                - And all files from `ui.ignore.*` config.
                This set of files is sorted by the string representation of their path.
              * The "expanded contents" of an ignore files is the byte string made
                by the concatenation of its contents followed by the "expanded contents"
                of other files included with `include:` or `subinclude:` directives,
                in inclusion order. This definition is recursive, as included files can
                themselves include more files.
              This hash is defined as the SHA-1 of the concatenation (in sorted
              order) of the "expanded contents" of each "root" ignore file.
              (Note that computing this does not require actually concatenating
              into a single contiguous byte sequence.
              Instead a SHA-1 hasher object can be created
              and fed separate chunks one by one.)
              The data file format
              --------------------
              This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
              and `mercurial/dirstateutils/v2.py`.
              The data file contains two types of data: paths and nodes.
              Paths and nodes can be organized in any order in the file, except that sibling
              nodes must be next to each other and sorted by their path.
              Contiguity lets the parent refer to them all
              by their count and a single pseudo-pointer,
              instead of storing one pseudo-pointer per child node.
              Sorting allows using binary seach to find a child node with a given name
              in `O(log(n))` byte sequence comparisons.
              The current implemention writes paths and child node before a given node
              for ease of figuring out the value of pseudo-pointers by the time the are to be
              written, but this is not an obligation and readers must not rely on it.
              A path is stored as a byte string anywhere in the file, without delimiter.
              It is refered to by one or more node by a pseudo-pointer to its start, and its
              length in bytes. Since there is no delimiter,
              when a path is a substring of another the same bytes could be reused,
              although the implementation does not exploit this as of this writing.
              A node is stored on 43 bytes with components at fixed offsets. Paths and
              child nodes relevant to a node are stored externally and referenced though
              pseudo-pointers.
              All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
              counting bytes from the start of the data file. Path lengths and positions
              are 16-bit integers, also counted in bytes.
              Node components are:
              * Offset 0:
                Pseudo-pointer to the full path of this node,
                from the working directory root.
              * Offset 4:
                Length of the full path.
              * Offset 6:
                Position of the last `/` path separator within the full path,
                in bytes from the start of the full path,
                or zero if there isn’t one.
                The part of the full path after this position is the "base name".
                Since sibling nodes have the same parent, only their base name vary
                and needs to be considered when doing binary search to find a given path.
              * Offset 8:
                Pseudo-pointer to the "copy source" path for this node,
                or zero if there is no copy source.
              * Offset 12:
                Length of the copy source path, or zero if there isn’t one.
              * Offset 14:
                Pseudo-pointer to the start of child nodes.
              * Offset 18:
                Number of child nodes, as a 32-bit integer.
                They occupy 43 times this number of bytes
                (not counting space for paths, and further descendants).
              * Offset 22:
                Number as a 32-bit integer of descendant nodes in this subtree,
                not including this node itself,
                that "have a dirstate entry".
                Those nodes represent files that would be present at all in `dirstate-v1`.
                This is typically less than the total number of descendants.
                This counter is used to implement `has_dir`.
              * Offset 26:
                Number as a 32-bit integer of descendant nodes in this subtree,
                not including this node itself,
                that represent files tracked in the working directory.
                (For example, `hg rm` makes a file untracked.)
                This counter is used to implement `has_tracked_dir`.
              * Offset 30:
                A `flags` fields  that packs some boolean values as bits of a 16-bit integer.
                Starting from least-significant, bit masks are::
                  WDIR_TRACKED = 1 << 0
                  P1_TRACKED = 1 << 1
                  P2_INFO = 1 << 2
                  HAS_MODE_AND_SIZE = 1 << 3
                  HAS_FILE_MTIME = 1 << 4
                  HAS_DIRECTORY_MTIME = 1 << 5
                  MODE_EXEC_PERM = 1 << 6
                  MODE_IS_SYMLINK = 1 << 7
+                 EXPECTED_STATE_IS_MODIFIED = 1 << 8
                The meaning of each bit is described below.
                Other bits are unset.
                They may be assigned meaning if the future,
                with the limitation that Mercurial versions that pre-date such meaning
                will always reset those bits to unset when writing nodes.
                (A new node is written for any mutation in its subtree,
                leaving the bytes of the old node unreachable
                until the data file is rewritten entirely.)
              * Offset 32:
                A `size` field described below, as a 32-bit integer.
                Unlike in dirstate-v1, negative values are not used.
              * Offset 36:
                The seconds component of an `mtime` field described below,
                as a 32-bit integer.
                Unlike in dirstate-v1, negative values are not used.
                When `mtime` is used, this is number of seconds since the Unix epoch
                truncated to its lower 31 bits.
              * Offset 40:
                The nanoseconds component of an `mtime` field described below,
                as a 32-bit integer.
                When `mtime` is used,
                this is the number of nanoseconds since `mtime.seconds`,
                always stritctly less than one billion.
                This may be zero if more precision is not available.
                (This can happen because of limitations in any of Mercurial, Python,
                libc, the operating system, …)
                When comparing two mtimes and either has this component set to zero,
                the sub-second precision of both should be ignored.
                False positives when checking mtime equality due to clock resolution
                are always possible and the status algorithm needs to deal with them,
                but having too many false negatives could be harmful too.
              * (Offset 44: end of this node)
              The meaning of the boolean values packed in `flags` is:
              `WDIR_TRACKED`
                  Set if the working directory contains a tracked file at this node’s path.
                  This is typically set and unset by `hg add` and `hg rm`.
              `P1_TRACKED`
                  Set if the working directory’s first parent changeset
                  (whose node identifier is found in tree metadata)
                  contains a tracked file at this node’s path.
                  This is a cache to reduce manifest lookups.
              `P2_INFO`
                  Set if the file has been involved in some merge operation.
                  Either because it was actually merged,
                  or because the version in the second parent p2 version was ahead,
                  or because some rename moved it there.
                  In either case `hg status` will want it displayed as modified.
              Files that would be mentioned at all in the `dirstate-v1` file format
              have a node with at least one of the above three bits set in `dirstate-v2`.
              Let’s call these files "tracked anywhere",
              and "untracked" the nodes with all three of these bits unset.
              Untracked nodes are typically for directories:
              they hold child nodes and form the tree structure.
              Additional untracked nodes may also exist.
              Although implementations should strive to clean up nodes
              that are entirely unused, other untracked nodes may also exist.
              For example, a future version of Mercurial might in some cases
              add nodes for untracked files or/and ignored files in the working directory
              in order to optimize `hg status`
              by enabling it to skip `readdir` in more cases.
              `HAS_MODE_AND_SIZE`
                  Must be unset for untracked nodes.
                  For files tracked anywhere, if this is set:
                  - The `size` field is the expected file size,
-                   in bytes truncated its lower to 31 bits,
-                   for the file to be clean.
+                   in bytes truncated its lower to 31 bits.
                  - The expected execute permission for the file’s owner
                    is given by `MODE_EXEC_PERM`
                  - The expected file type is given by `MODE_IS_SIMLINK`:
                    a symbolic link if set, or a normal file if unset.
                  If this is unset the expected size, permission, and file type are unknown.
                  The `size` field is unused (set to zero).
              `HAS_FILE_MTIME`
                  Must be unset for untracked nodes.
                  If this and `HAS_DIRECTORY_MTIME` are both unset,
                  the `mtime` field is unused (set to zero).
-                 If this is set, `mtime` is the modification time
-                 expected for the file to be considered clean.
+                 If this is set, `mtime` is the expected modification time.
              `HAS_DIRECTORY_MTIME`
                  Must be unset for file tracked anywhere.
                  If this and `HAS_DIRECTORY_MTIME` are both unset,
                  the `mtime` field is unused (set to zero).
                  If this is set, at some point,
                  this path in the working directory was observed:
                  - To be a directory
                  - With the modification time given in `mtime`
                  - That time was already strictly in the past when observed,
                    meaning that later changes cannot happen in the same clock tick
                    and must cause a different modification time
                    (unless the system clock jumps back and we get unlucky,
                    which is not impossible but deemed unlikely enough).
                  - All direct children of this directory
                    (as returned by `std::fs::read_dir`)
                    either have a corresponding dirstate node,
                    or are ignored by ignore patterns whose hash is in tree metadata.
                  This means that if `std::fs::symlink_metadata` later reports
                  the same modification time
                  and ignored patterns haven’t changed,
                  a run of status that is not listing ignored files
                  can skip calling `std::fs::read_dir` again for this directory,
                  and iterate child dirstate nodes instead.
              `MODE_EXEC_PERM`
                  Must be unset if `HAS_MODE_AND_SIZE` is unset.
                  If `HAS_MODE_AND_SIZE` is set,
                  this indicates whether the file’s own is expected
                  to have execute permission.
              `MODE_IS_SYMLINK`
                  Must be unset if `HAS_MODE_AND_SIZE` is unset.
                  If `HAS_MODE_AND_SIZE` is set,
                  this indicates whether the file is expected to be a symlink
                  as opposed to a normal file.
+             `EXPECTED_STATE_IS_MODIFIED`
+                 Must be unset for untracked nodes.
+                 For:
+                 - a file tracked anywhere
+                 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
+                 - if that metadata matches
+                   metadata found in the working directory with `stat`
+                 This bit indicates the status of the file.
+                 If set, the status is modified. If unset, it is clean.
+                 In cases where `hg status` needs to read the contents of a file
+                 because metadata is ambiguous, this bit lets it record the result
+                 if the result is modified so that a future run of `hg status`
+                 does not need to do the same again.
+                 It is valid to never set this bit,
+                 and consider expected metadata ambiguous if it is set.

mercurial/pure/parsers.py

0 +10 -1

              # parsers.py - Python implementation of parsers.c
              #
              # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import stat
              import struct
              import zlib
              from ..node import (
                  nullrev,
                  sha1nodeconstants,
              )
              from ..thirdparty import attr
              from .. import (
                  error,
                  pycompat,
                  revlogutils,
                  util,
              )
              from ..revlogutils import nodemap as nodemaputil
              from ..revlogutils import constants as revlog_constants
              stringio = pycompat.bytesio
              _pack = struct.pack
              _unpack = struct.unpack
              _compress = zlib.compress
              _decompress = zlib.decompress
              # a special value used internally for `size` if the file come from the other parent
              FROM_P2 = -2
              # a special value used internally for `size` if the file is modified/merged/added
              NONNORMAL = -1
              # a special value used internally for `time` if the time is ambigeous
              AMBIGUOUS_TIME = -1
              # Bits of the `flags` byte inside a node in the file format
              DIRSTATE_V2_WDIR_TRACKED = 1 << 0
              DIRSTATE_V2_P1_TRACKED = 1 << 1
              DIRSTATE_V2_P2_INFO = 1 << 2
              DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
              DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
              _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5  # Unused when Rust is not available
              DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
              DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
+             DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
              @attr.s(slots=True, init=False)
              class DirstateItem(object):
                  """represent a dirstate entry
                  It hold multiple attributes
                  # about file tracking
                  - wc_tracked: is the file tracked by the working copy
                  - p1_tracked: is the file tracked in working copy first parent
                  - p2_info: the file has been involved in some merge operation. Either
                             because it was actually merged, or because the p2 version was
                             ahead, or because some rename moved it there. In either case
                             `hg status` will want it displayed as modified.
                  # about the file state expected from p1 manifest:
                  - mode: the file mode in p1
                  - size: the file size in p1
                  These value can be set to None, which mean we don't have a meaningful value
                  to compare with. Either because we don't really care about them as there
                  `status` is known without having to look at the disk or because we don't
                  know these right now and a full comparison will be needed to find out if
                  the file is clean.
                  # about the file state on disk last time we saw it:
                  - mtime: the last known clean mtime for the file.
                  This value can be set to None if no cachable state exist. Either because we
                  do not care (see previous section) or because we could not cache something
                  yet.
                  """
                  _wc_tracked = attr.ib()
                  _p1_tracked = attr.ib()
                  _p2_info = attr.ib()
                  _mode = attr.ib()
                  _size = attr.ib()
                  _mtime = attr.ib()
                  def __init__(
                      self,
                      wc_tracked=False,
                      p1_tracked=False,
                      p2_info=False,
                      has_meaningful_data=True,
                      has_meaningful_mtime=True,
                      parentfiledata=None,
                  ):
                      self._wc_tracked = wc_tracked
                      self._p1_tracked = p1_tracked
                      self._p2_info = p2_info
                      self._mode = None
                      self._size = None
                      self._mtime = None
                      if parentfiledata is None:
                          has_meaningful_mtime = False
                          has_meaningful_data = False
                      if has_meaningful_data:
                          self._mode = parentfiledata[0]
                          self._size = parentfiledata[1]
                      if has_meaningful_mtime:
                          self._mtime = parentfiledata[2]
                  @classmethod
                  def from_v2_data(cls, flags, size, mtime):
                      """Build a new DirstateItem object from V2 data"""
                      has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
+                     has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
                      mode = None
+                     if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
+                         # we do not have support for this flag in the code yet,
+                         # force a lookup for this file.
+                         has_mode_size = False
+                         has_meaningful_mtime = False
                      if has_mode_size:
                          assert stat.S_IXUSR == 0o100
                          if flags & DIRSTATE_V2_MODE_EXEC_PERM:
                              mode = 0o755
                          else:
                              mode = 0o644
                          if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
                              mode |= stat.S_IFLNK
                          else:
                              mode |= stat.S_IFREG
                      return cls(
                          wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
                          p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
                          p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
                          has_meaningful_data=has_mode_size,
-                         has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
+                         has_meaningful_mtime=has_meaningful_mtime,
                          parentfiledata=(mode, size, mtime),
                      )
                  @classmethod
                  def from_v1_data(cls, state, mode, size, mtime):
                      """Build a new DirstateItem object from V1 data
                      Since the dirstate-v1 format is frozen, the signature of this function
                      is not expected to change, unlike the __init__ one.
                      """
                      if state == b'm':
                          return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
                      elif state == b'a':
                          return cls(wc_tracked=True)
                      elif state == b'r':
                          if size == NONNORMAL:
                              p1_tracked = True
                              p2_info = True
                          elif size == FROM_P2:
                              p1_tracked = False
                              p2_info = True
                          else:
                              p1_tracked = True
                              p2_info = False
                          return cls(p1_tracked=p1_tracked, p2_info=p2_info)
                      elif state == b'n':
                          if size == FROM_P2:
                              return cls(wc_tracked=True, p2_info=True)
                          elif size == NONNORMAL:
                              return cls(wc_tracked=True, p1_tracked=True)
                          elif mtime == AMBIGUOUS_TIME:
                              return cls(
                                  wc_tracked=True,
                                  p1_tracked=True,
                                  has_meaningful_mtime=False,
                                  parentfiledata=(mode, size, 42),
                              )
                          else:
                              return cls(
                                  wc_tracked=True,
                                  p1_tracked=True,
                                  parentfiledata=(mode, size, mtime),
                              )
                      else:
                          raise RuntimeError(b'unknown state: %s' % state)
                  def set_possibly_dirty(self):
                      """Mark a file as "possibly dirty"
                      This means the next status call will have to actually check its content
                      to make sure it is correct.
                      """
                      self._mtime = None
                  def set_clean(self, mode, size, mtime):
                      """mark a file as "clean" cancelling potential "possibly dirty call"
                      Note: this function is a descendant of `dirstate.normal` and is
                      currently expected to be call on "normal" entry only. There are not
                      reason for this to not change in the future as long as the ccode is
                      updated to preserve the proper state of the non-normal files.
                      """
                      self._wc_tracked = True
                      self._p1_tracked = True
                      self._mode = mode
                      self._size = size
                      self._mtime = mtime
                  def set_tracked(self):
                      """mark a file as tracked in the working copy
                      This will ultimately be called by command like `hg add`.
                      """
                      self._wc_tracked = True
                      # `set_tracked` is replacing various `normallookup` call. So we mark
                      # the files as needing lookup
                      #
                      # Consider dropping this in the future in favor of something less broad.
                      self._mtime = None
                  def set_untracked(self):
                      """mark a file as untracked in the working copy
                      This will ultimately be called by command like `hg remove`.
                      """
                      self._wc_tracked = False
                      self._mode = None
                      self._size = None
                      self._mtime = None
                  def drop_merge_data(self):
                      """remove all "merge-only" from a DirstateItem
                      This is to be call by the dirstatemap code when the second parent is dropped
                      """
                      if self._p2_info:
                          self._p2_info = False
                          self._mode = None
                          self._size = None
                          self._mtime = None
                  @property
                  def mode(self):
                      return self.v1_mode()
                  @property
                  def size(self):
                      return self.v1_size()
                  @property
                  def mtime(self):
                      return self.v1_mtime()
                  @property
                  def state(self):
                      """
                      States are:
                        n  normal
                        m  needs merging
                        r  marked for removal
                        a  marked for addition
                      XXX This "state" is a bit obscure and mostly a direct expression of the
                      dirstatev1 format. It would make sense to ultimately deprecate it in
                      favor of the more "semantic" attributes.
                      """
                      if not self.any_tracked:
                          return b'?'
                      return self.v1_state()
                  @property
                  def tracked(self):
                      """True is the file is tracked in the working copy"""
                      return self._wc_tracked
                  @property
                  def any_tracked(self):
                      """True is the file is tracked anywhere (wc or parents)"""
                      return self._wc_tracked or self._p1_tracked or self._p2_info
                  @property
                  def added(self):
                      """True if the file has been added"""
                      return self._wc_tracked and not (self._p1_tracked or self._p2_info)
                  @property
                  def maybe_clean(self):
                      """True if the file has a chance to be in the "clean" state"""
                      if not self._wc_tracked:
                          return False
                      elif not self._p1_tracked:
                          return False
                      elif self._p2_info:
                          return False
                      return True
                  @property
                  def p1_tracked(self):
                      """True if the file is tracked in the first parent manifest"""
                      return self._p1_tracked
                  @property
                  def p2_info(self):
                      """True if the file needed to merge or apply any input from p2
                      See the class documentation for details.
                      """
                      return self._wc_tracked and self._p2_info
                  @property
                  def removed(self):
                      """True if the file has been removed"""
                      return not self._wc_tracked and (self._p1_tracked or self._p2_info)
                  def v2_data(self):
                      """Returns (flags, mode, size, mtime) for v2 serialization"""
                      flags = 0
                      if self._wc_tracked:
                          flags |= DIRSTATE_V2_WDIR_TRACKED
                      if self._p1_tracked:
                          flags |= DIRSTATE_V2_P1_TRACKED
                      if self._p2_info:
                          flags |= DIRSTATE_V2_P2_INFO
                      if self._mode is not None and self._size is not None:
                          flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
                          if self.mode & stat.S_IXUSR:
                              flags |= DIRSTATE_V2_MODE_EXEC_PERM
                          if stat.S_ISLNK(self.mode):
                              flags |= DIRSTATE_V2_MODE_IS_SYMLINK
                      if self._mtime is not None:
                          flags |= DIRSTATE_V2_HAS_FILE_MTIME
                      return (flags, self._size or 0, self._mtime or 0)
                  def v1_state(self):
                      """return a "state" suitable for v1 serialization"""
                      if not self.any_tracked:
                          # the object has no state to record, this is -currently-
                          # unsupported
                          raise RuntimeError('untracked item')
                      elif self.removed:
                          return b'r'
                      elif self._p1_tracked and self._p2_info:
                          return b'm'
                      elif self.added:
                          return b'a'
                      else:
                          return b'n'
                  def v1_mode(self):
                      """return a "mode" suitable for v1 serialization"""
                      return self._mode if self._mode is not None else 0
                  def v1_size(self):
                      """return a "size" suitable for v1 serialization"""
                      if not self.any_tracked:
                          # the object has no state to record, this is -currently-
                          # unsupported
                          raise RuntimeError('untracked item')
                      elif self.removed and self._p1_tracked and self._p2_info:
                          return NONNORMAL
                      elif self._p2_info:
                          return FROM_P2
                      elif self.removed:
                          return 0
                      elif self.added:
                          return NONNORMAL
                      elif self._size is None:
                          return NONNORMAL
                      else:
                          return self._size
                  def v1_mtime(self):
                      """return a "mtime" suitable for v1 serialization"""
                      if not self.any_tracked:
                          # the object has no state to record, this is -currently-
                          # unsupported
                          raise RuntimeError('untracked item')
                      elif self.removed:
                          return 0
                      elif self._mtime is None:
                          return AMBIGUOUS_TIME
                      elif self._p2_info:
                          return AMBIGUOUS_TIME
                      elif not self._p1_tracked:
                          return AMBIGUOUS_TIME
                      else:
                          return self._mtime
                  def need_delay(self, now):
                      """True if the stored mtime would be ambiguous with the current time"""
                      return self.v1_state() == b'n' and self.v1_mtime() == now
              def gettype(q):
                  return int(q & 0xFFFF)
              class BaseIndexObject(object):
                  # Can I be passed to an algorithme implemented in Rust ?
                  rust_ext_compat = 0
                  # Format of an index entry according to Python's `struct` language
                  index_format = revlog_constants.INDEX_ENTRY_V1
                  # Size of a C unsigned long long int, platform independent
                  big_int_size = struct.calcsize(b'>Q')
                  # Size of a C long int, platform independent
                  int_size = struct.calcsize(b'>i')
                  # An empty index entry, used as a default value to be overridden, or nullrev
                  null_item = (
 ,
 ,
 ,
                      -1,
                      -1,
                      -1,
                      -1,
                      sha1nodeconstants.nullid,
 ,
 ,
                      revlog_constants.COMP_MODE_INLINE,
                      revlog_constants.COMP_MODE_INLINE,
                  )
                  @util.propertycache
                  def entry_size(self):
                      return self.index_format.size
                  @property
                  def nodemap(self):
                      msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
                      util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                      return self._nodemap
                  @util.propertycache
                  def _nodemap(self):
                      nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
                      for r in range(0, len(self)):
                          n = self[r][7]
                          nodemap[n] = r
                      return nodemap
                  def has_node(self, node):
                      """return True if the node exist in the index"""
                      return node in self._nodemap
                  def rev(self, node):
                      """return a revision for a node
                      If the node is unknown, raise a RevlogError"""
                      return self._nodemap[node]
                  def get_rev(self, node):
                      """return a revision for a node
                      If the node is unknown, return None"""
                      return self._nodemap.get(node)
                  def _stripnodes(self, start):
                      if '_nodemap' in vars(self):
                          for r in range(start, len(self)):
                              n = self[r][7]
                              del self._nodemap[n]
                  def clearcaches(self):
                      self.__dict__.pop('_nodemap', None)
                  def __len__(self):
                      return self._lgt + len(self._extra)
                  def append(self, tup):
                      if '_nodemap' in vars(self):
                          self._nodemap[tup[7]] = len(self)
                      data = self._pack_entry(len(self), tup)
                      self._extra.append(data)
                  def _pack_entry(self, rev, entry):
                      assert entry[8] == 0
                      assert entry[9] == 0
                      return self.index_format.pack(*entry[:8])
                  def _check_index(self, i):
                      if not isinstance(i, int):
                          raise TypeError(b"expecting int indexes")
                      if i < 0 or i >= len(self):
                          raise IndexError
                  def __getitem__(self, i):
                      if i == -1:
                          return self.null_item
                      self._check_index(i)
                      if i >= self._lgt:
                          data = self._extra[i - self._lgt]
                      else:
                          index = self._calculate_index(i)
                          data = self._data[index : index + self.entry_size]
                      r = self._unpack_entry(i, data)
                      if self._lgt and i == 0:
                          offset = revlogutils.offset_type(0, gettype(r[0]))
                          r = (offset,) + r[1:]
                      return r
                  def _unpack_entry(self, rev, data):
                      r = self.index_format.unpack(data)
                      r = r + (
 ,
 ,
                          revlog_constants.COMP_MODE_INLINE,
                          revlog_constants.COMP_MODE_INLINE,
                      )
                      return r
                  def pack_header(self, header):
                      """pack header information as binary"""
                      v_fmt = revlog_constants.INDEX_HEADER
                      return v_fmt.pack(header)
                  def entry_binary(self, rev):
                      """return the raw binary string representing a revision"""
                      entry = self[rev]
                      p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
                      if rev == 0:
                          p = p[revlog_constants.INDEX_HEADER.size :]
                      return p
              class IndexObject(BaseIndexObject):
                  def __init__(self, data):
                      assert len(data) % self.entry_size == 0, (
                          len(data),
                          self.entry_size,
                          len(data) % self.entry_size,
                      )
                      self._data = data
                      self._lgt = len(data) // self.entry_size
                      self._extra = []
                  def _calculate_index(self, i):
                      return i * self.entry_size
                  def __delitem__(self, i):
                      if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
                          raise ValueError(b"deleting slices only supports a:-1 with step 1")
                      i = i.start
                      self._check_index(i)
                      self._stripnodes(i)
                      if i < self._lgt:
                          self._data = self._data[: i * self.entry_size]
                          self._lgt = i
                          self._extra = []
                      else:
                          self._extra = self._extra[: i - self._lgt]
              class PersistentNodeMapIndexObject(IndexObject):
                  """a Debug oriented class to test persistent nodemap
                  We need a simple python object to test API and higher level behavior. See
                  the Rust implementation for  more serious usage. This should be used only
                  through the dedicated `devel.persistent-nodemap` config.
                  """
                  def nodemap_data_all(self):
                      """Return bytes containing a full serialization of a nodemap
                      The nodemap should be valid for the full set of revisions in the
                      index."""
                      return nodemaputil.persistent_data(self)
                  def nodemap_data_incremental(self):
                      """Return bytes containing a incremental update to persistent nodemap
                      This containst the data for an append-only update of the data provided
                      in the last call to `update_nodemap_data`.
                      """
                      if self._nm_root is None:
                          return None
                      docket = self._nm_docket
                      changed, data = nodemaputil.update_persistent_data(
                          self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
                      )
                      self._nm_root = self._nm_max_idx = self._nm_docket = None
                      return docket, changed, data
                  def update_nodemap_data(self, docket, nm_data):
                      """provide full block of persisted binary data for a nodemap
                      The data are expected to come from disk. See `nodemap_data_all` for a
                      produceur of such data."""
                      if nm_data is not None:
                          self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
                          if self._nm_root:
                              self._nm_docket = docket
                          else:
                              self._nm_root = self._nm_max_idx = self._nm_docket = None
              class InlinedIndexObject(BaseIndexObject):
                  def __init__(self, data, inline=0):
                      self._data = data
                      self._lgt = self._inline_scan(None)
                      self._inline_scan(self._lgt)
                      self._extra = []
                  def _inline_scan(self, lgt):
                      off = 0
                      if lgt is not None:
                          self._offsets = [0] * lgt
                      count = 0
                      while off <= len(self._data) - self.entry_size:
                          start = off + self.big_int_size
                          (s,) = struct.unpack(
                              b'>i',
                              self._data[start : start + self.int_size],
                          )
                          if lgt is not None:
                              self._offsets[count] = off
                          count += 1
                          off += self.entry_size + s
                      if off != len(self._data):
                          raise ValueError(b"corrupted data")
                      return count
                  def __delitem__(self, i):
                      if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
                          raise ValueError(b"deleting slices only supports a:-1 with step 1")
                      i = i.start
                      self._check_index(i)
                      self._stripnodes(i)
                      if i < self._lgt:
                          self._offsets = self._offsets[:i]
                          self._lgt = i
                          self._extra = []
                      else:
                          self._extra = self._extra[: i - self._lgt]
                  def _calculate_index(self, i):
                      return self._offsets[i]
              def parse_index2(data, inline, revlogv2=False):
                  if not inline:
                      cls = IndexObject2 if revlogv2 else IndexObject
                      return cls(data), None
                  cls = InlinedIndexObject
                  return cls(data, inline), (0, data)
              def parse_index_cl_v2(data):
                  return IndexChangelogV2(data), None
              class IndexObject2(IndexObject):
                  index_format = revlog_constants.INDEX_ENTRY_V2
                  def replace_sidedata_info(
                      self,
                      rev,
                      sidedata_offset,
                      sidedata_length,
                      offset_flags,
                      compression_mode,
                  ):
                      """
                      Replace an existing index entry's sidedata offset and length with new
                      ones.
                      This cannot be used outside of the context of sidedata rewriting,
                      inside the transaction that creates the revision `rev`.
                      """
                      if rev < 0:
                          raise KeyError
                      self._check_index(rev)
                      if rev < self._lgt:
                          msg = b"cannot rewrite entries outside of this transaction"
                          raise KeyError(msg)
                      else:
                          entry = list(self[rev])
                          entry[0] = offset_flags
                          entry[8] = sidedata_offset
                          entry[9] = sidedata_length
                          entry[11] = compression_mode
                          entry = tuple(entry)
                          new = self._pack_entry(rev, entry)
                          self._extra[rev - self._lgt] = new
                  def _unpack_entry(self, rev, data):
                      data = self.index_format.unpack(data)
                      entry = data[:10]
                      data_comp = data[10] & 3
                      sidedata_comp = (data[10] & (3 << 2)) >> 2
                      return entry + (data_comp, sidedata_comp)
                  def _pack_entry(self, rev, entry):
                      data = entry[:10]
                      data_comp = entry[10] & 3
                      sidedata_comp = (entry[11] & 3) << 2
                      data += (data_comp | sidedata_comp,)
                      return self.index_format.pack(*data)
                  def entry_binary(self, rev):
                      """return the raw binary string representing a revision"""
                      entry = self[rev]
                      return self._pack_entry(rev, entry)
                  def pack_header(self, header):
                      """pack header information as binary"""
                      msg = 'version header should go in the docket, not the index: %d'
                      msg %= header
                      raise error.ProgrammingError(msg)
              class IndexChangelogV2(IndexObject2):
                  index_format = revlog_constants.INDEX_ENTRY_CL_V2
                  def _unpack_entry(self, rev, data, r=True):
                      items = self.index_format.unpack(data)
                      entry = items[:3] + (rev, rev) + items[3:8]
                      data_comp = items[8] & 3
                      sidedata_comp = (items[8] >> 2) & 3
                      return entry + (data_comp, sidedata_comp)
                  def _pack_entry(self, rev, entry):
                      assert entry[3] == rev, entry[3]
                      assert entry[4] == rev, entry[4]
                      data = entry[:3] + entry[5:10]
                      data_comp = entry[10] & 3
                      sidedata_comp = (entry[11] & 3) << 2
                      data += (data_comp | sidedata_comp,)
                      return self.index_format.pack(*data)
              def parse_index_devel_nodemap(data, inline):
                  """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
                  return PersistentNodeMapIndexObject(data), None
              def parse_dirstate(dmap, copymap, st):
                  parents = [st[:20], st[20:40]]
                  # dereference fields so they will be local in loop
                  format = b">cllll"
                  e_size = struct.calcsize(format)
                  pos1 = 40
                  l = len(st)
                  # the inner loop
                  while pos1 < l:
                      pos2 = pos1 + e_size
                      e = _unpack(b">cllll", st[pos1:pos2])  # a literal here is faster
                      pos1 = pos2 + e[4]
                      f = st[pos2:pos1]
                      if b'\0' in f:
                          f, c = f.split(b'\0')
                          copymap[f] = c
                      dmap[f] = DirstateItem.from_v1_data(*e[:4])
                  return parents
              def pack_dirstate(dmap, copymap, pl, now):
                  now = int(now)
                  cs = stringio()
                  write = cs.write
                  write(b"".join(pl))
                  for f, e in pycompat.iteritems(dmap):
                      if e.need_delay(now):
                          # The file was last modified "simultaneously" with the current
                          # write to dirstate (i.e. within the same second for file-
                          # systems with a granularity of 1 sec). This commonly happens
                          # for at least a couple of files on 'update'.
                          # The user could change the file without changing its size
                          # within the same second. Invalidate the file's mtime in
                          # dirstate, forcing future 'status' calls to compare the
                          # contents of the file if the size is the same. This prevents
                          # mistakenly treating such files as clean.
                          e.set_possibly_dirty()
                      if f in copymap:
                          f = b"%s\0%s" % (f, copymap[f])
                      e = _pack(
                          b">cllll",
                          e.v1_state(),
                          e.v1_mode(),
                          e.v1_size(),
                          e.v1_mtime(),
                          len(f),
                      )
                      write(e)
                      write(f)
                  return cs.getvalue()

rust/hg-core/src/dirstate_tree/on_disk.rs

0 +7 -2

              //! The "version 2" disk representation of the dirstate
              //!
              //! See `mercurial/helptext/internals/dirstate-v2.txt`
              use crate::dirstate::TruncatedTimestamp;
              use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
              use crate::dirstate_tree::path_with_basename::WithBasename;
              use crate::errors::HgError;
              use crate::utils::hg_path::HgPath;
              use crate::DirstateEntry;
              use crate::DirstateError;
              use crate::DirstateParents;
              use bitflags::bitflags;
              use bytes_cast::unaligned::{U16Be, U32Be};
              use bytes_cast::BytesCast;
              use format_bytes::format_bytes;
              use std::borrow::Cow;
              use std::convert::{TryFrom, TryInto};
              /// Added at the start of `.hg/dirstate` when the "v2" format is used.
              /// This a redundant sanity check more than an actual "magic number" since
              /// `.hg/requires` already governs which format should be used.
              pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
              /// Keep space for 256-bit hashes
              const STORED_NODE_ID_BYTES: usize = 32;
              /// … even though only 160 bits are used for now, with SHA-1
              const USED_NODE_ID_BYTES: usize = 20;
              pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
              pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
              /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
              const TREE_METADATA_SIZE: usize = 44;
              const NODE_SIZE: usize = 44;
              /// Make sure that size-affecting changes are made knowingly
              #[allow(unused)]
              fn static_assert_size_of() {
                  let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
                  let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
                  let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
              }
              // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
              #[derive(BytesCast)]
              #[repr(C)]
              struct DocketHeader {
                  marker: [u8; V2_FORMAT_MARKER.len()],
                  parent_1: [u8; STORED_NODE_ID_BYTES],
                  parent_2: [u8; STORED_NODE_ID_BYTES],
                  metadata: TreeMetadata,
                  /// Counted in bytes
                  data_size: Size,
                  uuid_size: u8,
              }
              pub struct Docket<'on_disk> {
                  header: &'on_disk DocketHeader,
                  uuid: &'on_disk [u8],
              }
              /// Fields are documented in the *Tree metadata in the docket file*
              /// section of `mercurial/helptext/internals/dirstate-v2.txt`
              #[derive(BytesCast)]
              #[repr(C)]
              struct TreeMetadata {
                  root_nodes: ChildNodes,
                  nodes_with_entry_count: Size,
                  nodes_with_copy_source_count: Size,
                  unreachable_bytes: Size,
                  unused: [u8; 4],
                  /// See *Optional hash of ignore patterns* section of
                  /// `mercurial/helptext/internals/dirstate-v2.txt`
                  ignore_patterns_hash: IgnorePatternsHash,
              }
              /// Fields are documented in the *The data file format*
              /// section of `mercurial/helptext/internals/dirstate-v2.txt`
              #[derive(BytesCast)]
              #[repr(C)]
              pub(super) struct Node {
                  full_path: PathSlice,
                  /// In bytes from `self.full_path.start`
                  base_name_start: PathSize,
                  copy_source: OptPathSlice,
                  children: ChildNodes,
                  pub(super) descendants_with_entry_count: Size,
                  pub(super) tracked_descendants_count: Size,
                  flags: U16Be,
                  size: U32Be,
                  mtime: PackedTruncatedTimestamp,
              }
              bitflags! {
                  #[repr(C)]
                  struct Flags: u16 {
                      const WDIR_TRACKED = 1 << 0;
                      const P1_TRACKED = 1 << 1;
                      const P2_INFO = 1 << 2;
                      const HAS_MODE_AND_SIZE = 1 << 3;
                      const HAS_FILE_MTIME = 1 << 4;
                      const HAS_DIRECTORY_MTIME = 1 << 5;
                      const MODE_EXEC_PERM = 1 << 6;
                      const MODE_IS_SYMLINK = 1 << 7;
+                     const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
                  }
              }
              /// Duration since the Unix epoch
              #[derive(BytesCast, Copy, Clone)]
              #[repr(C)]
              struct PackedTruncatedTimestamp {
                  truncated_seconds: U32Be,
                  nanoseconds: U32Be,
              }
              /// Counted in bytes from the start of the file
              ///
              /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
              type Offset = U32Be;
              /// Counted in number of items
              ///
              /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
              type Size = U32Be;
              /// Counted in bytes
              ///
              /// NOTE: we choose not to support file names/paths longer than 64 KiB.
              type PathSize = U16Be;
              /// A contiguous sequence of `len` times `Node`, representing the child nodes
              /// of either some other node or of the repository root.
              ///
              /// Always sorted by ascending `full_path`, to allow binary search.
              /// Since nodes with the same parent nodes also have the same parent path,
              /// only the `base_name`s need to be compared during binary search.
              #[derive(BytesCast, Copy, Clone)]
              #[repr(C)]
              struct ChildNodes {
                  start: Offset,
                  len: Size,
              }
              /// A `HgPath` of `len` bytes
              #[derive(BytesCast, Copy, Clone)]
              #[repr(C)]
              struct PathSlice {
                  start: Offset,
                  len: PathSize,
              }
              /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
              type OptPathSlice = PathSlice;
              /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
              ///
              /// This should only happen if Mercurial is buggy or a repository is corrupted.
              #[derive(Debug)]
              pub struct DirstateV2ParseError;
              impl From<DirstateV2ParseError> for HgError {
                  fn from(_: DirstateV2ParseError) -> Self {
                      HgError::corrupted("dirstate-v2 parse error")
                  }
              }
              impl From<DirstateV2ParseError> for crate::DirstateError {
                  fn from(error: DirstateV2ParseError) -> Self {
                      HgError::from(error).into()
                  }
              }
              impl<'on_disk> Docket<'on_disk> {
                  pub fn parents(&self) -> DirstateParents {
                      use crate::Node;
                      let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
                          .unwrap()
                          .clone();
                      let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
                          .unwrap()
                          .clone();
                      DirstateParents { p1, p2 }
                  }
                  pub fn tree_metadata(&self) -> &[u8] {
                      self.header.metadata.as_bytes()
                  }
                  pub fn data_size(&self) -> usize {
                      // This `unwrap` could only panic on a 16-bit CPU
                      self.header.data_size.get().try_into().unwrap()
                  }
                  pub fn data_filename(&self) -> String {
                      String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
                  }
              }
              pub fn read_docket(
                  on_disk: &[u8],
              ) -> Result<Docket<'_>, DirstateV2ParseError> {
                  let (header, uuid) =
                      DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
                  let uuid_size = header.uuid_size as usize;
                  if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
                      Ok(Docket { header, uuid })
                  } else {
                      Err(DirstateV2ParseError)
                  }
              }
              pub(super) fn read<'on_disk>(
                  on_disk: &'on_disk [u8],
                  metadata: &[u8],
              ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
                  if on_disk.is_empty() {
                      return Ok(DirstateMap::empty(on_disk));
                  }
                  let (meta, _) = TreeMetadata::from_bytes(metadata)
                      .map_err(|_| DirstateV2ParseError)?;
                  let dirstate_map = DirstateMap {
                      on_disk,
                      root: dirstate_map::ChildNodes::OnDisk(read_nodes(
                          on_disk,
                          meta.root_nodes,
                      )?),
                      nodes_with_entry_count: meta.nodes_with_entry_count.get(),
                      nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
                      ignore_patterns_hash: meta.ignore_patterns_hash,
                      unreachable_bytes: meta.unreachable_bytes.get(),
                  };
                  Ok(dirstate_map)
              }
              impl Node {
                  pub(super) fn full_path<'on_disk>(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                      read_hg_path(on_disk, self.full_path)
                  }
                  pub(super) fn base_name_start<'on_disk>(
                      &self,
                  ) -> Result<usize, DirstateV2ParseError> {
                      let start = self.base_name_start.get();
                      if start < self.full_path.len.get() {
                          let start = usize::try_from(start)
                              // u32 -> usize, could only panic on a 16-bit CPU
                              .expect("dirstate-v2 base_name_start out of bounds");
                          Ok(start)
                      } else {
                          Err(DirstateV2ParseError)
                      }
                  }
                  pub(super) fn base_name<'on_disk>(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                      let full_path = self.full_path(on_disk)?;
                      let base_name_start = self.base_name_start()?;
                      Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
                  }
                  pub(super) fn path<'on_disk>(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
                      Ok(WithBasename::from_raw_parts(
                          Cow::Borrowed(self.full_path(on_disk)?),
                          self.base_name_start()?,
                      ))
                  }
                  pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
                      self.copy_source.start.get() != 0
                  }
                  pub(super) fn copy_source<'on_disk>(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
                      Ok(if self.has_copy_source() {
                          Some(read_hg_path(on_disk, self.copy_source)?)
                      } else {
                          None
                      })
                  }
                  fn flags(&self) -> Flags {
                      Flags::from_bits_truncate(self.flags.get())
                  }
                  fn has_entry(&self) -> bool {
                      self.flags().intersects(
                          Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
                      )
                  }
                  pub(super) fn node_data(
                      &self,
                  ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
                      if self.has_entry() {
                          Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
                      } else if let Some(mtime) = self.cached_directory_mtime()? {
                          Ok(dirstate_map::NodeData::CachedDirectory { mtime })
                      } else {
                          Ok(dirstate_map::NodeData::None)
                      }
                  }
                  pub(super) fn cached_directory_mtime(
                      &self,
                  ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
                      if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) {
                          if self.flags().contains(Flags::HAS_FILE_MTIME) {
                              Err(DirstateV2ParseError)
                          } else {
                              Ok(Some(self.mtime.try_into()?))
                          }
                      } else {
                          Ok(None)
                      }
                  }
                  fn synthesize_unix_mode(&self) -> u32 {
                      let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
                          libc::S_IFLNK
                      } else {
                          libc::S_IFREG
                      };
                      let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
 o755
                      } else {
 o644
                      };
                      file_type | permisions
                  }
                  fn assume_entry(&self) -> DirstateEntry {
                      // TODO: convert through raw bits instead?
                      let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
                      let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
                      let p2_info = self.flags().contains(Flags::P2_INFO);
-                     let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
+                     let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
+                         && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
+                     {
                          Some((self.synthesize_unix_mode(), self.size.into()))
                      } else {
                          None
                      };
-                     let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
+                     let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
+                         && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
+                     {
                          Some(self.mtime.truncated_seconds.into())
                      } else {
                          None
                      };
                      DirstateEntry::from_v2_data(
                          wdir_tracked,
                          p1_tracked,
                          p2_info,
                          mode_size,
                          mtime,
                      )
                  }
                  pub(super) fn entry(
                      &self,
                  ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                      if self.has_entry() {
                          Ok(Some(self.assume_entry()))
                      } else {
                          Ok(None)
                      }
                  }
                  pub(super) fn children<'on_disk>(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
                      read_nodes(on_disk, self.children)
                  }
                  pub(super) fn to_in_memory_node<'on_disk>(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
                      Ok(dirstate_map::Node {
                          children: dirstate_map::ChildNodes::OnDisk(
                              self.children(on_disk)?,
                          ),
                          copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
                          data: self.node_data()?,
                          descendants_with_entry_count: self
                              .descendants_with_entry_count
                              .get(),
                          tracked_descendants_count: self.tracked_descendants_count.get(),
                      })
                  }
                  fn from_dirstate_entry(
                      entry: &DirstateEntry,
                  ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
                      let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
                          entry.v2_data();
                      // TODO: convert throug raw flag bits instead?
                      let mut flags = Flags::empty();
                      flags.set(Flags::WDIR_TRACKED, wdir_tracked);
                      flags.set(Flags::P1_TRACKED, p1_tracked);
                      flags.set(Flags::P2_INFO, p2_info);
                      let size = if let Some((m, s)) = mode_size_opt {
                          let exec_perm = m & libc::S_IXUSR != 0;
                          let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
                          flags.set(Flags::MODE_EXEC_PERM, exec_perm);
                          flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
                          flags.insert(Flags::HAS_MODE_AND_SIZE);
                          s.into()
                      } else {
 .into()
                      };
                      let mtime = if let Some(m) = mtime_opt {
                          flags.insert(Flags::HAS_FILE_MTIME);
                          PackedTruncatedTimestamp {
                              truncated_seconds: m.into(),
                              nanoseconds: 0.into(),
                          }
                      } else {
                          PackedTruncatedTimestamp::null()
                      };
                      (flags, size, mtime)
                  }
              }
              fn read_hg_path(
                  on_disk: &[u8],
                  slice: PathSlice,
              ) -> Result<&HgPath, DirstateV2ParseError> {
                  read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
              }
              fn read_nodes(
                  on_disk: &[u8],
                  slice: ChildNodes,
              ) -> Result<&[Node], DirstateV2ParseError> {
                  read_slice(on_disk, slice.start, slice.len.get())
              }
              fn read_slice<T, Len>(
                  on_disk: &[u8],
                  start: Offset,
                  len: Len,
              ) -> Result<&[T], DirstateV2ParseError>
              where
                  T: BytesCast,
                  Len: TryInto<usize>,
              {
                  // Either `usize::MAX` would result in "out of bounds" error since a single
                  // `&[u8]` cannot occupy the entire addess space.
                  let start = start.get().try_into().unwrap_or(std::usize::MAX);
                  let len = len.try_into().unwrap_or(std::usize::MAX);
                  on_disk
                      .get(start..)
                      .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
                      .map(|(slice, _rest)| slice)
                      .ok_or_else(|| DirstateV2ParseError)
              }
              pub(crate) fn for_each_tracked_path<'on_disk>(
                  on_disk: &'on_disk [u8],
                  metadata: &[u8],
                  mut f: impl FnMut(&'on_disk HgPath),
              ) -> Result<(), DirstateV2ParseError> {
                  let (meta, _) = TreeMetadata::from_bytes(metadata)
                      .map_err(|_| DirstateV2ParseError)?;
                  fn recur<'on_disk>(
                      on_disk: &'on_disk [u8],
                      nodes: ChildNodes,
                      f: &mut impl FnMut(&'on_disk HgPath),
                  ) -> Result<(), DirstateV2ParseError> {
                      for node in read_nodes(on_disk, nodes)? {
                          if let Some(entry) = node.entry()? {
                              if entry.state().is_tracked() {
                                  f(node.full_path(on_disk)?)
                              }
                          }
                          recur(on_disk, node.children, f)?
                      }
                      Ok(())
                  }
                  recur(on_disk, meta.root_nodes, &mut f)
              }
              /// Returns new data and metadata, together with whether that data should be
              /// appended to the existing data file whose content is at
              /// `dirstate_map.on_disk` (true), instead of written to a new data file
              /// (false).
              pub(super) fn write(
                  dirstate_map: &mut DirstateMap,
                  can_append: bool,
              ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
                  let append = can_append && dirstate_map.write_should_append();
                  // This ignores the space for paths, and for nodes without an entry.
                  // TODO: better estimate? Skip the `Vec` and write to a file directly?
                  let size_guess = std::mem::size_of::<Node>()
                      * dirstate_map.nodes_with_entry_count as usize;
                  let mut writer = Writer {
                      dirstate_map,
                      append,
                      out: Vec::with_capacity(size_guess),
                  };
                  let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
                  let meta = TreeMetadata {
                      root_nodes,
                      nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
                      nodes_with_copy_source_count: dirstate_map
                          .nodes_with_copy_source_count
                          .into(),
                      unreachable_bytes: dirstate_map.unreachable_bytes.into(),
                      unused: [0; 4],
                      ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
                  };
                  Ok((writer.out, meta.as_bytes().to_vec(), append))
              }
              struct Writer<'dmap, 'on_disk> {
                  dirstate_map: &'dmap DirstateMap<'on_disk>,
                  append: bool,
                  out: Vec<u8>,
              }
              impl Writer<'_, '_> {
                  fn write_nodes(
                      &mut self,
                      nodes: dirstate_map::ChildNodesRef,
                  ) -> Result<ChildNodes, DirstateError> {
                      // Reuse already-written nodes if possible
                      if self.append {
                          if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                              let start = self.on_disk_offset_of(nodes_slice).expect(
                                  "dirstate-v2 OnDisk nodes not found within on_disk",
                              );
                              let len = child_nodes_len_from_usize(nodes_slice.len());
                              return Ok(ChildNodes { start, len });
                          }
                      }
                      // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
                      // undefined iteration order. Sort to enable binary search in the
                      // written file.
                      let nodes = nodes.sorted();
                      let nodes_len = nodes.len();
                      // First accumulate serialized nodes in a `Vec`
                      let mut on_disk_nodes = Vec::with_capacity(nodes_len);
                      for node in nodes {
                          let children =
                              self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
                          let full_path = node.full_path(self.dirstate_map.on_disk)?;
                          let full_path = self.write_path(full_path.as_bytes());
                          let copy_source = if let Some(source) =
                              node.copy_source(self.dirstate_map.on_disk)?
                          {
                              self.write_path(source.as_bytes())
                          } else {
                              PathSlice {
                                  start: 0.into(),
                                  len: 0.into(),
                              }
                          };
                          on_disk_nodes.push(match node {
                              NodeRef::InMemory(path, node) => {
                                  let (flags, size, mtime) = match &node.data {
                                      dirstate_map::NodeData::Entry(entry) => {
                                          Node::from_dirstate_entry(entry)
                                      }
                                      dirstate_map::NodeData::CachedDirectory { mtime } => (
                                          Flags::HAS_DIRECTORY_MTIME,
 .into(),
                                          (*mtime).into(),
                                      ),
                                      dirstate_map::NodeData::None => (
                                          Flags::empty(),
 .into(),
                                          PackedTruncatedTimestamp::null(),
                                      ),
                                  };
                                  Node {
                                      children,
                                      copy_source,
                                      full_path,
                                      base_name_start: u16::try_from(path.base_name_start())
                                          // Could only panic for paths over 64 KiB
                                          .expect("dirstate-v2 path length overflow")
                                          .into(),
                                      descendants_with_entry_count: node
                                          .descendants_with_entry_count
                                          .into(),
                                      tracked_descendants_count: node
                                          .tracked_descendants_count
                                          .into(),
                                      flags: flags.bits().into(),
                                      size,
                                      mtime,
                                  }
                              }
                              NodeRef::OnDisk(node) => Node {
                                  children,
                                  copy_source,
                                  full_path,
                                  ..*node
                              },
                          })
                      }
                      // … so we can write them contiguously, after writing everything else
                      // they refer to.
                      let start = self.current_offset();
                      let len = child_nodes_len_from_usize(nodes_len);
                      self.out.extend(on_disk_nodes.as_bytes());
                      Ok(ChildNodes { start, len })
                  }
                  /// If the given slice of items is within `on_disk`, returns its offset
                  /// from the start of `on_disk`.
                  fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
                  where
                      T: BytesCast,
                  {
                      fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
                          let start = slice.as_ptr() as usize;
                          let end = start + slice.len();
                          start..=end
                      }
                      let slice_addresses = address_range(slice.as_bytes());
                      let on_disk_addresses = address_range(self.dirstate_map.on_disk);
                      if on_disk_addresses.contains(slice_addresses.start())
                          && on_disk_addresses.contains(slice_addresses.end())
                      {
                          let offset = slice_addresses.start() - on_disk_addresses.start();
                          Some(offset_from_usize(offset))
                      } else {
                          None
                      }
                  }
                  fn current_offset(&mut self) -> Offset {
                      let mut offset = self.out.len();
                      if self.append {
                          offset += self.dirstate_map.on_disk.len()
                      }
                      offset_from_usize(offset)
                  }
                  fn write_path(&mut self, slice: &[u8]) -> PathSlice {
                      let len = path_len_from_usize(slice.len());
                      // Reuse an already-written path if possible
                      if self.append {
                          if let Some(start) = self.on_disk_offset_of(slice) {
                              return PathSlice { start, len };
                          }
                      }
                      let start = self.current_offset();
                      self.out.extend(slice.as_bytes());
                      PathSlice { start, len }
                  }
              }
              fn offset_from_usize(x: usize) -> Offset {
                  u32::try_from(x)
                      // Could only panic for a dirstate file larger than 4 GiB
                      .expect("dirstate-v2 offset overflow")
                      .into()
              }
              fn child_nodes_len_from_usize(x: usize) -> Size {
                  u32::try_from(x)
                      // Could only panic with over 4 billion nodes
                      .expect("dirstate-v2 slice length overflow")
                      .into()
              }
              fn path_len_from_usize(x: usize) -> PathSize {
                  u16::try_from(x)
                      // Could only panic for paths over 64 KiB
                      .expect("dirstate-v2 path length overflow")
                      .into()
              }
              impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
                  fn from(timestamp: TruncatedTimestamp) -> Self {
                      Self {
                          truncated_seconds: timestamp.truncated_seconds().into(),
                          nanoseconds: timestamp.nanoseconds().into(),
                      }
                  }
              }
              impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
                  type Error = DirstateV2ParseError;
                  fn try_from(
                      timestamp: PackedTruncatedTimestamp,
                  ) -> Result<Self, Self::Error> {
                      Self::from_already_truncated(
                          timestamp.truncated_seconds.get(),
                          timestamp.nanoseconds.get(),
                      )
                  }
              }
              impl PackedTruncatedTimestamp {
                  fn null() -> Self {
                      Self {
                          truncated_seconds: 0.into(),
                          nanoseconds: 0.into(),
                      }
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages