# HG changeset patch # User Simon Sapin # Date 2021-10-15 14:12:00 # Node ID 1730b2fceaa1d75eb6b63bd2e7c556138fee5ad7 # Parent e9faae0f445c9f22757a3ded3265c4c273c33d5c dirstate-v2: adds a flag to mark a file as modified Right now, a files with a file system state that requires a lookup (same size, different mtime) will requires a lookup. If the result of that lookup is a modified files, it will remains ambiguous, requiring a lookup on the next status run too. To fix this, we introduce a dedicated flag in the new format. Such flag will allow to record such file as "known modified" avoiding an extra lookup later. As None of the associate code currently exist in the status code, we do the minimal implementation: if we read a dirstate entry with this flag set, we make it as "ambiguous" so that the next status code has to look it up. The same as it would have to without this flag existing anyway. Differential Revision: https://phab.mercurial-scm.org/D11681 diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c +++ b/mercurial/cext/parsers.c @@ -139,18 +139,16 @@ static inline bool dirstate_item_c_track static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self) { - const unsigned char mask = dirstate_flag_wc_tracked | - dirstate_flag_p1_tracked | - dirstate_flag_p2_info; + const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked | + dirstate_flag_p2_info; return (self->flags & mask); } static inline bool dirstate_item_c_added(dirstateItemObject *self) { - const unsigned char mask = - (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked | - dirstate_flag_p2_info); - const unsigned char target = dirstate_flag_wc_tracked; + const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked | + dirstate_flag_p2_info); + const int target = dirstate_flag_wc_tracked; return (self->flags & mask) == target; } @@ -237,7 +235,7 @@ static inline int dirstate_item_c_v1_mti static PyObject *dirstate_item_v2_data(dirstateItemObject *self) { - unsigned char flags = self->flags; + int flags = self->flags; int mode = dirstate_item_c_v1_mode(self); if ((mode & S_IXUSR) != 0) { flags |= dirstate_flag_mode_exec_perm; @@ -249,7 +247,7 @@ static PyObject *dirstate_item_v2_data(d } else { flags &= ~dirstate_flag_mode_is_symlink; } - return Py_BuildValue("Bii", flags, self->size, self->mtime); + return Py_BuildValue("iii", flags, self->size, self->mtime); }; static PyObject *dirstate_item_v1_state(dirstateItemObject *self) @@ -372,9 +370,14 @@ static PyObject *dirstate_item_from_v2_m if (!t) { return NULL; } - if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) { + if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) { return NULL; } + if (t->flags & dirstate_flag_expected_state_is_modified) { + t->flags &= ~(dirstate_flag_expected_state_is_modified | + dirstate_flag_has_meaningful_data | + dirstate_flag_has_file_mtime); + } t->mode = 0; if (t->flags & dirstate_flag_has_meaningful_data) { if (t->flags & dirstate_flag_mode_exec_perm) { diff --git a/mercurial/cext/util.h b/mercurial/cext/util.h --- a/mercurial/cext/util.h +++ b/mercurial/cext/util.h @@ -24,21 +24,22 @@ /* clang-format off */ typedef struct { PyObject_HEAD - unsigned char flags; + int flags; int mode; int size; int mtime; } dirstateItemObject; /* clang-format on */ -static const unsigned char dirstate_flag_wc_tracked = 1; -static const unsigned char dirstate_flag_p1_tracked = 1 << 1; -static const unsigned char dirstate_flag_p2_info = 1 << 2; -static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3; -static const unsigned char dirstate_flag_has_file_mtime = 1 << 4; -static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5; -static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6; -static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7; +static const int dirstate_flag_wc_tracked = 1; +static const int dirstate_flag_p1_tracked = 1 << 1; +static const int dirstate_flag_p2_info = 1 << 2; +static const int dirstate_flag_has_meaningful_data = 1 << 3; +static const int dirstate_flag_has_file_mtime = 1 << 4; +static const int dirstate_flag_has_directory_mtime = 1 << 5; +static const int dirstate_flag_mode_exec_perm = 1 << 6; +static const int dirstate_flag_mode_is_symlink = 1 << 7; +static const int dirstate_flag_expected_state_is_modified = 1 << 8; extern PyTypeObject dirstateItemType; #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType) diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt --- a/mercurial/helptext/internals/dirstate-v2.txt +++ b/mercurial/helptext/internals/dirstate-v2.txt @@ -383,6 +383,7 @@ Node components are: HAS_DIRECTORY_MTIME = 1 << 5 MODE_EXEC_PERM = 1 << 6 MODE_IS_SYMLINK = 1 << 7 + EXPECTED_STATE_IS_MODIFIED = 1 << 8 The meaning of each bit is described below. @@ -461,8 +462,7 @@ by enabling it to skip `readdir` in more Must be unset for untracked nodes. For files tracked anywhere, if this is set: - The `size` field is the expected file size, - in bytes truncated its lower to 31 bits, - for the file to be clean. + in bytes truncated its lower to 31 bits. - The expected execute permission for the file’s owner is given by `MODE_EXEC_PERM` - The expected file type is given by `MODE_IS_SIMLINK`: @@ -474,8 +474,7 @@ by enabling it to skip `readdir` in more Must be unset for untracked nodes. If this and `HAS_DIRECTORY_MTIME` are both unset, the `mtime` field is unused (set to zero). - If this is set, `mtime` is the modification time - expected for the file to be considered clean. + If this is set, `mtime` is the expected modification time. `HAS_DIRECTORY_MTIME` Must be unset for file tracked anywhere. @@ -514,3 +513,20 @@ by enabling it to skip `readdir` in more If `HAS_MODE_AND_SIZE` is set, this indicates whether the file is expected to be a symlink as opposed to a normal file. + +`EXPECTED_STATE_IS_MODIFIED` + Must be unset for untracked nodes. + For: + - a file tracked anywhere + - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`) + - if that metadata matches + metadata found in the working directory with `stat` + This bit indicates the status of the file. + If set, the status is modified. If unset, it is clean. + + In cases where `hg status` needs to read the contents of a file + because metadata is ambiguous, this bit lets it record the result + if the result is modified so that a future run of `hg status` + does not need to do the same again. + It is valid to never set this bit, + and consider expected metadata ambiguous if it is set. diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -53,6 +53,7 @@ DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7 +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8 @attr.s(slots=True, init=False) @@ -123,7 +124,15 @@ class DirstateItem(object): def from_v2_data(cls, flags, size, mtime): """Build a new DirstateItem object from V2 data""" has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE) + has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME) mode = None + + if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED: + # we do not have support for this flag in the code yet, + # force a lookup for this file. + has_mode_size = False + has_meaningful_mtime = False + if has_mode_size: assert stat.S_IXUSR == 0o100 if flags & DIRSTATE_V2_MODE_EXEC_PERM: @@ -139,7 +148,7 @@ class DirstateItem(object): p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED), p2_info=bool(flags & DIRSTATE_V2_P2_INFO), has_meaningful_data=has_mode_size, - has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME), + has_meaningful_mtime=has_meaningful_mtime, parentfiledata=(mode, size, mtime), ) diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -110,6 +110,7 @@ bitflags! { const HAS_DIRECTORY_MTIME = 1 << 5; const MODE_EXEC_PERM = 1 << 6; const MODE_IS_SYMLINK = 1 << 7; + const EXPECTED_STATE_IS_MODIFIED = 1 << 8; } } @@ -351,12 +352,16 @@ impl Node { let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED); let p1_tracked = self.flags().contains(Flags::P1_TRACKED); let p2_info = self.flags().contains(Flags::P2_INFO); - let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) { + let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) + && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) + { Some((self.synthesize_unix_mode(), self.size.into())) } else { None }; - let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) { + let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) + && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) + { Some(self.mtime.truncated_seconds.into()) } else { None