# HG changeset patch # User Simon Sapin # Date 2021-10-18 09:23:07 # Node ID 269ff8978086005153f854f4917ff45fd711618c # Parent 84f6b0c41b9093f3726a0b69735735c5f110b7ba dirstate: store mtimes with nanosecond precision in memory Keep integer seconds since the Unix epoch, together with integer nanoseconds in the `0 <= n < 1e9` range. For now, nanoseconds are still always zero. This commit is about data structure changes. Differential Revision: https://phab.mercurial-scm.org/D11684 diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c +++ b/mercurial/cext/parsers.c @@ -57,7 +57,8 @@ static PyObject *dirstate_item_new(PyTyp int has_meaningful_mtime; int mode; int size; - int mtime; + int mtime_s; + int mtime_ns; PyObject *parentfiledata; PyObject *fallback_exec; PyObject *fallback_symlink; @@ -111,15 +112,10 @@ static PyObject *dirstate_item_new(PyTyp } if (parentfiledata != Py_None) { - if (!PyTuple_CheckExact(parentfiledata)) { - PyErr_SetString( - PyExc_TypeError, - "parentfiledata should be a Tuple or None"); + if (!PyArg_ParseTuple(parentfiledata, "ii(ii)", &mode, &size, + &mtime_s, &mtime_ns)) { return NULL; } - mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0)); - size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1)); - mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2)); } else { has_meaningful_data = 0; has_meaningful_mtime = 0; @@ -134,9 +130,11 @@ static PyObject *dirstate_item_new(PyTyp } if (has_meaningful_mtime) { t->flags |= dirstate_flag_has_file_mtime; - t->mtime = mtime; + t->mtime_s = mtime_s; + t->mtime_ns = mtime_ns; } else { - t->mtime = 0; + t->mtime_s = 0; + t->mtime_ns = 0; } return (PyObject *)t; } @@ -254,7 +252,7 @@ static inline int dirstate_item_c_v1_mti (self->flags & dirstate_flag_p2_info)) { return ambiguous_time; } else { - return self->mtime; + return self->mtime_s; } } @@ -272,7 +270,8 @@ static PyObject *dirstate_item_v2_data(d } else { flags &= ~dirstate_flag_mode_is_symlink; } - return Py_BuildValue("iii", flags, self->size, self->mtime); + return Py_BuildValue("iiii", flags, self->size, self->mtime_s, + self->mtime_ns); }; static PyObject *dirstate_item_v1_state(dirstateItemObject *self) @@ -297,14 +296,30 @@ static PyObject *dirstate_item_v1_mtime( }; static PyObject *dirstate_item_need_delay(dirstateItemObject *self, - PyObject *value) + PyObject *now) { - long now; - if (!pylong_to_long(value, &now)) { + int now_s; + int now_ns; + if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) { return NULL; } - if (dirstate_item_c_v1_state(self) == 'n' && - dirstate_item_c_v1_mtime(self) == now) { + if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +}; + +static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self, + PyObject *other) +{ + int other_s; + int other_ns; + if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) { + return NULL; + } + if ((self->flags & dirstate_flag_has_file_mtime) && + self->mtime_s == other_s && self->mtime_ns == other_ns) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; @@ -324,7 +339,8 @@ dirstate_item_from_v1_data(char state, i t->flags = 0; t->mode = 0; t->size = 0; - t->mtime = 0; + t->mtime_s = 0; + t->mtime_ns = 0; if (state == 'm') { t->flags = (dirstate_flag_wc_tracked | @@ -360,7 +376,7 @@ dirstate_item_from_v1_data(char state, i dirstate_flag_has_file_mtime); t->mode = mode; t->size = size; - t->mtime = mtime; + t->mtime_s = mtime; } } else { PyErr_Format(PyExc_RuntimeError, @@ -395,7 +411,8 @@ static PyObject *dirstate_item_from_v2_m if (!t) { return NULL; } - if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) { + if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s, + &t->mtime_ns)) { return NULL; } if (t->flags & dirstate_flag_expected_state_is_modified) { @@ -431,8 +448,9 @@ static PyObject *dirstate_item_set_possi static PyObject *dirstate_item_set_clean(dirstateItemObject *self, PyObject *args) { - int size, mode, mtime; - if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) { + int size, mode, mtime_s, mtime_ns; + if (!PyArg_ParseTuple(args, "ii(ii)", &mode, &size, &mtime_s, + &mtime_ns)) { return NULL; } self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked | @@ -440,7 +458,8 @@ static PyObject *dirstate_item_set_clean dirstate_flag_has_file_mtime; self->mode = mode; self->size = size; - self->mtime = mtime; + self->mtime_s = mtime_s; + self->mtime_ns = mtime_ns; Py_RETURN_NONE; } @@ -455,8 +474,9 @@ static PyObject *dirstate_item_set_untra { self->flags &= ~dirstate_flag_wc_tracked; self->mode = 0; - self->mtime = 0; self->size = 0; + self->mtime_s = 0; + self->mtime_ns = 0; Py_RETURN_NONE; } @@ -467,8 +487,9 @@ static PyObject *dirstate_item_drop_merg dirstate_flag_has_meaningful_data | dirstate_flag_has_file_mtime); self->mode = 0; - self->mtime = 0; self->size = 0; + self->mtime_s = 0; + self->mtime_ns = 0; } Py_RETURN_NONE; } @@ -485,6 +506,8 @@ static PyMethodDef dirstate_item_methods "return a \"mtime\" suitable for v1 serialization"}, {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O, "True if the stored mtime would be ambiguous with the current time"}, + {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to, + METH_O, "True if the stored mtime is likely equal to the given mtime"}, {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth, METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"}, {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth, @@ -855,11 +878,12 @@ static PyObject *pack_dirstate(PyObject Py_ssize_t nbytes, pos, l; PyObject *k, *v = NULL, *pn; char *p, *s; - int now; + int now_s; + int now_ns; - if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map, - &PyDict_Type, ©map, &PyTuple_Type, &pl, - &now)) { + if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type, + &map, &PyDict_Type, ©map, &PyTuple_Type, &pl, + &now_s, &now_ns)) { return NULL; } @@ -928,7 +952,7 @@ static PyObject *pack_dirstate(PyObject mode = dirstate_item_c_v1_mode(tuple); size = dirstate_item_c_v1_size(tuple); mtime = dirstate_item_c_v1_mtime(tuple); - if (state == 'n' && mtime == now) { + if (state == 'n' && tuple->mtime_s == now_s) { /* See pure/parsers.py:pack_dirstate for why we do * this. */ mtime = -1; diff --git a/mercurial/cext/util.h b/mercurial/cext/util.h --- a/mercurial/cext/util.h +++ b/mercurial/cext/util.h @@ -27,7 +27,8 @@ typedef struct { int flags; int mode; int size; - int mtime; + int mtime_s; + int mtime_ns; } dirstateItemObject; /* clang-format on */ diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -31,6 +31,10 @@ from . import ( util, ) +from .dirstateutils import ( + timestamp, +) + from .interfaces import ( dirstate as intdirstate, util as interfaceutil, @@ -66,7 +70,7 @@ def _getfsnow(vfs): '''Get "now" timestamp on filesystem''' tmpfd, tmpname = vfs.mkstemp() try: - return os.fstat(tmpfd)[stat.ST_MTIME] + return timestamp.mtime_of(os.fstat(tmpfd)) finally: os.close(tmpfd) vfs.unlink(tmpname) @@ -122,7 +126,7 @@ class dirstate(object): # UNC path pointing to root share (issue4557) self._rootdir = pathutil.normasprefix(root) self._dirty = False - self._lastnormaltime = 0 + self._lastnormaltime = timestamp.zero() self._ui = ui self._filecache = {} self._parentwriters = 0 @@ -440,7 +444,7 @@ class dirstate(object): for a in ("_map", "_branch", "_ignore"): if a in self.__dict__: delattr(self, a) - self._lastnormaltime = 0 + self._lastnormaltime = timestamp.zero() self._dirty = False self._parentwriters = 0 self._origpl = None @@ -639,7 +643,7 @@ class dirstate(object): s = os.lstat(self._join(filename)) mode = s.st_mode size = s.st_size - mtime = s[stat.ST_MTIME] + mtime = timestamp.mtime_of(s) return (mode, size, mtime) def _discoverpath(self, path, normed, ignoremissing, exists, storemap): @@ -720,7 +724,7 @@ class dirstate(object): def clear(self): self._map.clear() - self._lastnormaltime = 0 + self._lastnormaltime = timestamp.zero() self._dirty = True def rebuild(self, parent, allfiles, changedfiles=None): @@ -823,7 +827,7 @@ class dirstate(object): if now is None: # use the modification time of the newly created temporary file as the # filesystem's notion of 'now' - now = util.fstat(st)[stat.ST_MTIME] & _rangemask + now = timestamp.mtime_of(util.fstat(st)) # enough 'delaywrite' prevents 'pack_dirstate' from dropping # timestamp of each entries in dirstate, because of 'now > mtime' @@ -840,11 +844,12 @@ class dirstate(object): start = int(clock) - (int(clock) % delaywrite) end = start + delaywrite time.sleep(end - clock) - now = end # trust our estimate that the end is near now + # trust our estimate that the end is near now + now = timestamp.timestamp((end, 0)) break self._map.write(tr, st, now) - self._lastnormaltime = 0 + self._lastnormaltime = timestamp.zero() self._dirty = False def _dirignore(self, f): @@ -1377,17 +1382,9 @@ class dirstate(object): uadd(fn) continue - # This is equivalent to 'state, mode, size, time = dmap[fn]' but not - # written like that for performance reasons. dmap[fn] is not a - # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE - # opcode has fast paths when the value to be unpacked is a tuple or - # a list, but falls back to creating a full-fledged iterator in - # general. That is much slower than simply accessing and storing the - # tuple members one by one. t = dget(fn) mode = t.mode size = t.size - time = t.mtime if not st and t.tracked: dadd(fn) @@ -1412,12 +1409,9 @@ class dirstate(object): ladd(fn) else: madd(fn) - elif ( - time != st[stat.ST_MTIME] - and time != st[stat.ST_MTIME] & _rangemask - ): + elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)): ladd(fn) - elif st[stat.ST_MTIME] == lastnormaltime: + elif timestamp.mtime_of(st) == lastnormaltime: # fn may have just been marked as normal and it may have # changed in the same second without changing its size. # This can happen if we quickly do multiple commits. diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py --- a/mercurial/dirstatemap.py +++ b/mercurial/dirstatemap.py @@ -127,7 +127,6 @@ class _dirstatemapcommon(object): def set_clean(self, filename, mode, size, mtime): """mark a file as back to a clean state""" entry = self[filename] - mtime = mtime & rangemask size = size & rangemask entry.set_clean(mode, size, mtime) self._refresh_entry(filename, entry) diff --git a/mercurial/dirstateutils/timestamp.py b/mercurial/dirstateutils/timestamp.py new file mode 100644 --- /dev/null +++ b/mercurial/dirstateutils/timestamp.py @@ -0,0 +1,53 @@ +# Copyright Mercurial Contributors +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import stat + + +rangemask = 0x7FFFFFFF + + +class timestamp(tuple): + """ + A Unix timestamp with nanoseconds precision, + modulo 2**31 seconds. + + A 2-tuple containing: + + `truncated_seconds`: seconds since the Unix epoch, + truncated to its lower 31 bits + + `subsecond_nanoseconds`: number of nanoseconds since `truncated_seconds`. + """ + + def __new__(cls, value): + truncated_seconds, subsec_nanos = value + value = (truncated_seconds & rangemask, subsec_nanos) + return super(timestamp, cls).__new__(cls, value) + + +def zero(): + """ + Returns the `timestamp` at the Unix epoch. + """ + return tuple.__new__(timestamp, (0, 0)) + + +def mtime_of(stat_result): + """ + Takes an `os.stat_result`-like object and returns a `timestamp` object + for its modification time. + """ + # https://docs.python.org/2/library/os.html#os.stat_float_times + # "For compatibility with older Python versions, + # accessing stat_result as a tuple always returns integers." + secs = stat_result[stat.ST_MTIME] + + # For now + subsec_nanos = 0 + + return timestamp((secs, subsec_nanos)) diff --git a/mercurial/dirstateutils/v2.py b/mercurial/dirstateutils/v2.py --- a/mercurial/dirstateutils/v2.py +++ b/mercurial/dirstateutils/v2.py @@ -107,7 +107,10 @@ def parse_nodes(map, copy_map, data, sta # Parse child nodes of this node recursively parse_nodes(map, copy_map, data, children_start, children_count) - item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s) + # Don’t yet use sub-second precision if it exists in the file, + # since other parts of the code still set it to zero. + mtime_ns = 0 + item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns) if not item.any_tracked: continue path = slice_with_len(data, path_start, path_len) @@ -147,8 +150,7 @@ class Node(object): copy_source_start = 0 copy_source_len = 0 if entry is not None: - flags, size, mtime_s = entry.v2_data() - mtime_ns = 0 + flags, size, mtime_s, mtime_ns = entry.v2_data() else: # There are no mtime-cached directories in the Python implementation flags = 0 @@ -249,7 +251,6 @@ def pack_dirstate(map, copy_map, now): written to the docket. Again, see more details on the on-disk format in `mercurial/helptext/internals/dirstate-v2`. """ - now = int(now) data = bytearray() root_nodes_start = 0 root_nodes_len = 0 diff --git a/mercurial/merge.py b/mercurial/merge.py --- a/mercurial/merge.py +++ b/mercurial/merge.py @@ -9,13 +9,13 @@ from __future__ import absolute_import import collections import errno -import stat import struct from .i18n import _ from .node import nullrev from .thirdparty import attr from .utils import stringutil +from .dirstateutils import timestamp from . import ( copies, encoding, @@ -1406,8 +1406,9 @@ def batchget(repo, mctx, wctx, wantfiled if wantfiledata: s = wfctx.lstat() mode = s.st_mode - mtime = s[stat.ST_MTIME] - filedata[f] = (mode, size, mtime) # for dirstate.normal + mtime = timestamp.mtime_of(s) + # for dirstate.update_file's parentfiledata argument: + filedata[f] = (mode, size, mtime) if i == 100: yield False, (i, f) i = 0 diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -99,7 +99,8 @@ class DirstateItem(object): _p2_info = attr.ib() _mode = attr.ib() _size = attr.ib() - _mtime = attr.ib() + _mtime_s = attr.ib() + _mtime_ns = attr.ib() _fallback_exec = attr.ib() _fallback_symlink = attr.ib() @@ -123,7 +124,8 @@ class DirstateItem(object): self._mode = None self._size = None - self._mtime = None + self._mtime_s = None + self._mtime_ns = None if parentfiledata is None: has_meaningful_mtime = False has_meaningful_data = False @@ -131,10 +133,10 @@ class DirstateItem(object): self._mode = parentfiledata[0] self._size = parentfiledata[1] if has_meaningful_mtime: - self._mtime = parentfiledata[2] + self._mtime_s, self._mtime_ns = parentfiledata[2] @classmethod - def from_v2_data(cls, flags, size, mtime): + def from_v2_data(cls, flags, size, mtime_s, mtime_ns): """Build a new DirstateItem object from V2 data""" has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE) has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME) @@ -170,7 +172,7 @@ class DirstateItem(object): p2_info=bool(flags & DIRSTATE_V2_P2_INFO), has_meaningful_data=has_mode_size, has_meaningful_mtime=has_meaningful_mtime, - parentfiledata=(mode, size, mtime), + parentfiledata=(mode, size, (mtime_s, mtime_ns)), fallback_exec=fallback_exec, fallback_symlink=fallback_symlink, ) @@ -207,13 +209,13 @@ class DirstateItem(object): wc_tracked=True, p1_tracked=True, has_meaningful_mtime=False, - parentfiledata=(mode, size, 42), + parentfiledata=(mode, size, (42, 0)), ) else: return cls( wc_tracked=True, p1_tracked=True, - parentfiledata=(mode, size, mtime), + parentfiledata=(mode, size, (mtime, 0)), ) else: raise RuntimeError(b'unknown state: %s' % state) @@ -224,7 +226,8 @@ class DirstateItem(object): This means the next status call will have to actually check its content to make sure it is correct. """ - self._mtime = None + self._mtime_s = None + self._mtime_ns = None def set_clean(self, mode, size, mtime): """mark a file as "clean" cancelling potential "possibly dirty call" @@ -238,7 +241,7 @@ class DirstateItem(object): self._p1_tracked = True self._mode = mode self._size = size - self._mtime = mtime + self._mtime_s, self._mtime_ns = mtime def set_tracked(self): """mark a file as tracked in the working copy @@ -250,7 +253,8 @@ class DirstateItem(object): # the files as needing lookup # # Consider dropping this in the future in favor of something less broad. - self._mtime = None + self._mtime_s = None + self._mtime_ns = None def set_untracked(self): """mark a file as untracked in the working copy @@ -260,7 +264,8 @@ class DirstateItem(object): self._wc_tracked = False self._mode = None self._size = None - self._mtime = None + self._mtime_s = None + self._mtime_ns = None def drop_merge_data(self): """remove all "merge-only" from a DirstateItem @@ -271,7 +276,8 @@ class DirstateItem(object): self._p2_info = False self._mode = None self._size = None - self._mtime = None + self._mtime_s = None + self._mtime_ns = None @property def mode(self): @@ -285,6 +291,14 @@ class DirstateItem(object): def mtime(self): return self.v1_mtime() + def mtime_likely_equal_to(self, other_mtime): + self_sec = self._mtime_s + if self_sec is None: + return False + self_ns = self._mtime_ns + other_sec, other_ns = other_mtime + return self_sec == other_sec and self_ns == other_ns + @property def state(self): """ @@ -440,7 +454,7 @@ class DirstateItem(object): flags |= DIRSTATE_V2_MODE_EXEC_PERM if stat.S_ISLNK(self.mode): flags |= DIRSTATE_V2_MODE_IS_SYMLINK - if self._mtime is not None: + if self._mtime_s is not None: flags |= DIRSTATE_V2_HAS_FILE_MTIME if self._fallback_exec is not None: @@ -456,7 +470,7 @@ class DirstateItem(object): # Note: we do not need to do anything regarding # DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED # since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME - return (flags, self._size or 0, self._mtime or 0) + return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0) def v1_state(self): """return a "state" suitable for v1 serialization""" @@ -504,18 +518,18 @@ class DirstateItem(object): raise RuntimeError('untracked item') elif self.removed: return 0 - elif self._mtime is None: + elif self._mtime_s is None: return AMBIGUOUS_TIME elif self._p2_info: return AMBIGUOUS_TIME elif not self._p1_tracked: return AMBIGUOUS_TIME else: - return self._mtime + return self._mtime_s def need_delay(self, now): """True if the stored mtime would be ambiguous with the current time""" - return self.v1_state() == b'n' and self.v1_mtime() == now + return self.v1_state() == b'n' and self._mtime_s == now[0] def gettype(q): @@ -883,7 +897,6 @@ def parse_dirstate(dmap, copymap, st): def pack_dirstate(dmap, copymap, pl, now): - now = int(now) cs = stringio() write = cs.write write(b"".join(pl)) diff --git a/rust/hg-core/src/dirstate/entry.rs b/rust/hg-core/src/dirstate/entry.rs --- a/rust/hg-core/src/dirstate/entry.rs +++ b/rust/hg-core/src/dirstate/entry.rs @@ -14,14 +14,15 @@ pub enum EntryState { Merged, } -/// The C implementation uses all signed types. This will be an issue -/// either when 4GB+ source files are commonplace or in 2038, whichever -/// comes first. -#[derive(Debug, PartialEq, Copy, Clone)] +/// `size` and `mtime.seconds` are truncated to 31 bits. +/// +/// TODO: double-check status algorithm correctness for files +/// larger than 2 GiB or modified after 2038. +#[derive(Debug, Copy, Clone)] pub struct DirstateEntry { pub(crate) flags: Flags, mode_size: Option<(u32, u32)>, - mtime: Option, + mtime: Option, } bitflags! { @@ -37,7 +38,7 @@ bitflags! { } /// A Unix timestamp with nanoseconds precision -#[derive(Copy, Clone)] +#[derive(Debug, Copy, Clone)] pub struct TruncatedTimestamp { truncated_seconds: u32, /// Always in the `0 .. 1_000_000_000` range. @@ -90,6 +91,11 @@ impl TruncatedTimestamp { } } + pub fn to_integer_second(mut self) -> Self { + self.nanoseconds = 0; + self + } + /// The lower 31 bits of the number of seconds since the epoch. pub fn truncated_seconds(&self) -> u32 { self.truncated_seconds @@ -182,7 +188,7 @@ impl DirstateEntry { p1_tracked: bool, p2_info: bool, mode_size: Option<(u32, u32)>, - mtime: Option, + mtime: Option, fallback_exec: Option, fallback_symlink: Option, ) -> Self { @@ -191,9 +197,6 @@ impl DirstateEntry { assert!(mode & !RANGE_MASK_31BIT == 0); assert!(size & !RANGE_MASK_31BIT == 0); } - if let Some(mtime) = mtime { - assert!(mtime & !RANGE_MASK_31BIT == 0); - } let mut flags = Flags::empty(); flags.set(Flags::WDIR_TRACKED, wdir_tracked); flags.set(Flags::P1_TRACKED, p1_tracked); @@ -252,6 +255,9 @@ impl DirstateEntry { let mode = u32::try_from(mode).unwrap(); let size = u32::try_from(size).unwrap(); let mtime = u32::try_from(mtime).unwrap(); + let mtime = + TruncatedTimestamp::from_already_truncated(mtime, 0) + .unwrap(); Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED, mode_size: Some((mode, size)), @@ -344,7 +350,7 @@ impl DirstateEntry { bool, bool, Option<(u32, u32)>, - Option, + Option, Option, Option, ) { @@ -429,7 +435,7 @@ impl DirstateEntry { } else if !self.flags.contains(Flags::P1_TRACKED) { MTIME_UNSET } else if let Some(mtime) = self.mtime { - i32::try_from(mtime).unwrap() + i32::try_from(mtime.truncated_seconds()).unwrap() } else { MTIME_UNSET } @@ -501,6 +507,10 @@ impl DirstateEntry { } } + pub fn truncated_mtime(&self) -> Option { + self.mtime + } + pub fn drop_merge_data(&mut self) { if self.flags.contains(Flags::P2_INFO) { self.flags.remove(Flags::P2_INFO); @@ -513,9 +523,13 @@ impl DirstateEntry { self.mtime = None } - pub fn set_clean(&mut self, mode: u32, size: u32, mtime: u32) { + pub fn set_clean( + &mut self, + mode: u32, + size: u32, + mtime: TruncatedTimestamp, + ) { let size = size & RANGE_MASK_31BIT; - let mtime = mtime & RANGE_MASK_31BIT; self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED); self.mode_size = Some((mode, size)); self.mtime = Some(mtime); @@ -577,8 +591,13 @@ impl DirstateEntry { } /// True if the stored mtime would be ambiguous with the current time - pub fn need_delay(&self, now: i32) -> bool { - self.state() == EntryState::Normal && self.mtime() == now + pub fn need_delay(&self, now: TruncatedTimestamp) -> bool { + if let Some(mtime) = self.mtime { + self.state() == EntryState::Normal + && mtime.truncated_seconds() == now.truncated_seconds() + } else { + false + } } } diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs --- a/rust/hg-core/src/dirstate/parsers.rs +++ b/rust/hg-core/src/dirstate/parsers.rs @@ -135,6 +135,3 @@ pub fn pack_entry( packed.extend(source.as_bytes()); } } - -/// Seconds since the Unix epoch -pub struct Timestamp(pub i64); diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs --- a/rust/hg-core/src/dirstate/status.rs +++ b/rust/hg-core/src/dirstate/status.rs @@ -12,6 +12,7 @@ use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::{ + dirstate::TruncatedTimestamp, utils::hg_path::{HgPath, HgPathError}, PatternError, }; @@ -64,7 +65,7 @@ pub struct StatusOptions { /// Remember the most recent modification timeslot for status, to make /// sure we won't miss future size-preserving file content modifications /// that happen within the same timeslot. - pub last_normal_time: i64, + pub last_normal_time: TruncatedTimestamp, /// Whether we are on a filesystem with UNIX-like exec flags pub check_exec: bool, pub list_clean: bool, diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -1,7 +1,6 @@ use bytes_cast::BytesCast; use micro_timer::timed; use std::borrow::Cow; -use std::convert::TryInto; use std::path::PathBuf; use super::on_disk; @@ -11,7 +10,6 @@ use super::path_with_basename::WithBasen use crate::dirstate::parsers::pack_entry; use crate::dirstate::parsers::packed_entry_size; use crate::dirstate::parsers::parse_dirstate_entries; -use crate::dirstate::parsers::Timestamp; use crate::dirstate::CopyMapIter; use crate::dirstate::StateMapIter; use crate::dirstate::TruncatedTimestamp; @@ -932,10 +930,9 @@ impl OwningDirstateMap { pub fn pack_v1( &mut self, parents: DirstateParents, - now: Timestamp, + now: TruncatedTimestamp, ) -> Result, DirstateError> { let map = self.get_map_mut(); - let now: i32 = now.0.try_into().expect("time overflow"); let mut ambiguous_mtimes = Vec::new(); // Optizimation (to be measured?): pre-compute size to avoid `Vec` // reallocations @@ -981,12 +978,10 @@ impl OwningDirstateMap { #[timed] pub fn pack_v2( &mut self, - now: Timestamp, + now: TruncatedTimestamp, can_append: bool, ) -> Result<(Vec, Vec, bool), DirstateError> { let map = self.get_map_mut(); - // TODO: how do we want to handle this in 2038? - let now: i32 = now.0.try_into().expect("time overflow"); let mut paths = Vec::new(); for node in map.iter_nodes() { let node = node?; diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -317,7 +317,7 @@ impl Node { &self, ) -> Result { if self.has_entry() { - Ok(dirstate_map::NodeData::Entry(self.assume_entry())) + Ok(dirstate_map::NodeData::Entry(self.assume_entry()?)) } else if let Some(mtime) = self.cached_directory_mtime()? { Ok(dirstate_map::NodeData::CachedDirectory { mtime }) } else { @@ -357,7 +357,7 @@ impl Node { file_type | permisions } - fn assume_entry(&self) -> DirstateEntry { + fn assume_entry(&self) -> Result { // TODO: convert through raw bits instead? let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED); let p1_tracked = self.flags().contains(Flags::P1_TRACKED); @@ -372,11 +372,19 @@ impl Node { let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) { - Some(self.mtime.truncated_seconds.into()) + // TODO: replace this by `self.mtime.try_into()?` to use + // sub-second precision from the file. + // We don’t do this yet because other parts of the code + // always set it to zero. + let mtime = TruncatedTimestamp::from_already_truncated( + self.mtime.truncated_seconds.get(), + 0, + )?; + Some(mtime) } else { None }; - DirstateEntry::from_v2_data( + Ok(DirstateEntry::from_v2_data( wdir_tracked, p1_tracked, p2_info, @@ -384,14 +392,14 @@ impl Node { mtime, None, None, - ) + )) } pub(super) fn entry( &self, ) -> Result, DirstateV2ParseError> { if self.has_entry() { - Ok(Some(self.assume_entry())) + Ok(Some(self.assume_entry()?)) } else { Ok(None) } @@ -450,10 +458,7 @@ impl Node { }; let mtime = if let Some(m) = mtime_opt { flags.insert(Flags::HAS_FILE_MTIME); - PackedTruncatedTimestamp { - truncated_seconds: m.into(), - nanoseconds: 0.into(), - } + m.into() } else { PackedTruncatedTimestamp::null() }; diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs +++ b/rust/hg-core/src/dirstate_tree/status.rs @@ -501,9 +501,6 @@ impl<'a, 'tree, 'on_disk> StatusCommon<' fn truncate_u64(value: u64) -> i32 { (value & 0x7FFF_FFFF) as i32 } - fn truncate_i64(value: i64) -> i32 { - (value & 0x7FFF_FFFF) as i32 - } let entry = dirstate_node .entry()? @@ -531,10 +528,19 @@ impl<'a, 'tree, 'on_disk> StatusCommon<' .modified .push(hg_path.detach_from_tree()) } else { - let mtime = mtime_seconds(fs_metadata); - if truncate_i64(mtime) != entry.mtime() - || mtime == self.options.last_normal_time - { + let mtime_looks_clean; + if let Some(dirstate_mtime) = entry.truncated_mtime() { + let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata) + .expect("OS/libc does not support mtime?") + // For now don’t use sub-second precision for file mtimes + .to_integer_second(); + mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime) + && !fs_mtime.likely_equal(self.options.last_normal_time) + } else { + // No mtime in the dirstate entry + mtime_looks_clean = false + }; + if !mtime_looks_clean { self.outcome .lock() .unwrap() @@ -690,15 +696,6 @@ impl<'a, 'tree, 'on_disk> StatusCommon<' } } -#[cfg(unix)] // TODO -fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 { - // Going through `Metadata::modified()` would be portable, but would take - // care to construct a `SystemTime` value with sub-second precision just - // for us to throw that away here. - use std::os::unix::fs::MetadataExt; - metadata.mtime() -} - struct DirEntry { base_name: HgPathBuf, full_path: PathBuf, diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs --- a/rust/hg-cpython/src/dirstate.rs +++ b/rust/hg-cpython/src/dirstate.rs @@ -54,7 +54,7 @@ pub fn init_module(py: Python, package: matcher: PyObject, ignorefiles: PyList, check_exec: bool, - last_normal_time: i64, + last_normal_time: (u32, u32), list_clean: bool, list_ignored: bool, list_unknown: bool, diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -18,11 +18,10 @@ use cpython::{ use crate::{ dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator}, - dirstate::item::DirstateItem, + dirstate::item::{timestamp, DirstateItem}, pybytes_deref::PyBytesDeref, }; use hg::{ - dirstate::parsers::Timestamp, dirstate::StateMapIter, dirstate_tree::dirstate_map::DirstateMap as TreeDirstateMap, dirstate_tree::on_disk::DirstateV2ParseError, @@ -195,9 +194,9 @@ py_class!(pub class DirstateMap |py| { &self, p1: PyObject, p2: PyObject, - now: PyObject + now: (u32, u32) ) -> PyResult { - let now = Timestamp(now.extract(py)?); + let now = timestamp(py, now)?; let mut inner = self.inner(py).borrow_mut(); let parents = DirstateParents { @@ -219,10 +218,10 @@ py_class!(pub class DirstateMap |py| { /// instead of written to a new data file (False). def write_v2( &self, - now: PyObject, + now: (u32, u32), can_append: bool, ) -> PyResult { - let now = Timestamp(now.extract(py)?); + let now = timestamp(py, now)?; let mut inner = self.inner(py).borrow_mut(); let result = inner.pack_v2(now, can_append); diff --git a/rust/hg-cpython/src/dirstate/item.rs b/rust/hg-cpython/src/dirstate/item.rs --- a/rust/hg-cpython/src/dirstate/item.rs +++ b/rust/hg-cpython/src/dirstate/item.rs @@ -9,6 +9,7 @@ use cpython::Python; use cpython::PythonObject; use hg::dirstate::DirstateEntry; use hg::dirstate::EntryState; +use hg::dirstate::TruncatedTimestamp; use std::cell::Cell; use std::convert::TryFrom; @@ -22,7 +23,7 @@ py_class!(pub class DirstateItem |py| { p2_info: bool = false, has_meaningful_data: bool = true, has_meaningful_mtime: bool = true, - parentfiledata: Option<(u32, u32, u32)> = None, + parentfiledata: Option<(u32, u32, (u32, u32))> = None, fallback_exec: Option = None, fallback_symlink: Option = None, @@ -34,7 +35,7 @@ py_class!(pub class DirstateItem |py| { mode_size_opt = Some((mode, size)) } if has_meaningful_mtime { - mtime_opt = Some(mtime) + mtime_opt = Some(timestamp(py, mtime)?) } } let entry = DirstateEntry::from_v2_data( @@ -191,10 +192,19 @@ py_class!(pub class DirstateItem |py| { Ok(mtime) } - def need_delay(&self, now: i32) -> PyResult { + def need_delay(&self, now: (u32, u32)) -> PyResult { + let now = timestamp(py, now)?; Ok(self.entry(py).get().need_delay(now)) } + def mtime_likely_equal_to(&self, other: (u32, u32)) -> PyResult { + if let Some(mtime) = self.entry(py).get().truncated_mtime() { + Ok(mtime.likely_equal(timestamp(py, other)?)) + } else { + Ok(false) + } + } + @classmethod def from_v1_data( _cls, @@ -220,8 +230,9 @@ py_class!(pub class DirstateItem |py| { &self, mode: u32, size: u32, - mtime: u32, + mtime: (u32, u32), ) -> PyResult { + let mtime = timestamp(py, mtime)?; self.update(py, |entry| entry.set_clean(mode, size, mtime)); Ok(PyNone) } @@ -261,3 +272,15 @@ impl DirstateItem { self.entry(py).set(entry) } } + +pub(crate) fn timestamp( + py: Python<'_>, + (s, ns): (u32, u32), +) -> PyResult { + TruncatedTimestamp::from_already_truncated(s, ns).map_err(|_| { + PyErr::new::( + py, + "expected mtime truncated to 31 bits", + ) + }) +} diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs --- a/rust/hg-cpython/src/dirstate/status.rs +++ b/rust/hg-cpython/src/dirstate/status.rs @@ -9,6 +9,7 @@ //! `hg-core` crate. From Python, this will be seen as //! `rustext.dirstate.status`. +use crate::dirstate::item::timestamp; use crate::{dirstate::DirstateMap, exceptions::FallbackError}; use cpython::exc::OSError; use cpython::{ @@ -102,12 +103,13 @@ pub fn status_wrapper( root_dir: PyObject, ignore_files: PyList, check_exec: bool, - last_normal_time: i64, + last_normal_time: (u32, u32), list_clean: bool, list_ignored: bool, list_unknown: bool, collect_traversed_dirs: bool, ) -> PyResult { + let last_normal_time = timestamp(py, last_normal_time)?; let bytes = root_dir.extract::(py)?; let root_dir = get_path_from_bytes(bytes.data(py)); diff --git a/rust/rhg/src/commands/status.rs b/rust/rhg/src/commands/status.rs --- a/rust/rhg/src/commands/status.rs +++ b/rust/rhg/src/commands/status.rs @@ -11,6 +11,7 @@ use crate::utils::path_utils::relativize use clap::{Arg, SubCommand}; use hg; use hg::config::Config; +use hg::dirstate::TruncatedTimestamp; use hg::errors::HgError; use hg::manifest::Manifest; use hg::matchers::AlwaysMatcher; @@ -180,7 +181,7 @@ pub fn run(invocation: &crate::CliInvoca // hence be stored on dmap. Using a value that assumes we aren't // below the time resolution granularity of the FS and the // dirstate. - last_normal_time: 0, + last_normal_time: TruncatedTimestamp::new_truncate(0, 0), // we're currently supporting file systems with exec flags only // anyway check_exec: true, diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py --- a/tests/fakedirstatewritetime.py +++ b/tests/fakedirstatewritetime.py @@ -15,6 +15,7 @@ from mercurial import ( policy, registrar, ) +from mercurial.dirstateutils import timestamp from mercurial.utils import dateutil try: @@ -40,9 +41,8 @@ has_rust_dirstate = policy.importrust('d def pack_dirstate(fakenow, orig, dmap, copymap, pl, now): # execute what original parsers.pack_dirstate should do actually # for consistency - actualnow = int(now) for f, e in dmap.items(): - if e.need_delay(actualnow): + if e.need_delay(now): e.set_possibly_dirty() return orig(dmap, copymap, pl, fakenow) @@ -62,6 +62,7 @@ def fakewrite(ui, func): # parsing 'fakenow' in YYYYmmddHHMM format makes comparison between # 'fakenow' value and 'touch -t YYYYmmddHHMM' argument easy fakenow = dateutil.parsedate(fakenow, [b'%Y%m%d%H%M'])[0] + fakenow = timestamp.timestamp((fakenow, 0)) if has_rust_dirstate: # The Rust implementation does not use public parse/pack dirstate