# HG changeset patch # User Spencer Baugh # Date 2023-08-02 14:33:11 # Node ID 76387f79befe2cd8d55bd32ac5ec19f3a275e467 # Parent 26407210710544ff7f14ac6135369fd249265578 rust-status: only visit parts of the tree requested by the matcher This is an optimization that the matcher is designed to support, but we weren't doing it until now. This is primarily relevant for supporting "hg status [FILES]", where this optimization is crucial for getting good performance (without this optimization, that command will still scan the entire tree, and just filter it down after the fact). When this optimization fires we have to return false from traverse_fs_directory_and_dirstate, representing that that part of the tree *might* have new files which we didn't see because we skipped parts of it. This only affects the cached result of the status, and is necessary to make future status operations (which might use a different matcher) work properly. diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs +++ b/rust/hg-core/src/dirstate_tree/status.rs @@ -8,7 +8,7 @@ use crate::dirstate_tree::dirstate_map:: use crate::dirstate_tree::dirstate_map::NodeRef; use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::matchers::get_ignore_function; -use crate::matchers::Matcher; +use crate::matchers::{Matcher, VisitChildrenSet}; use crate::utils::files::get_bytes_from_os_string; use crate::utils::files::get_bytes_from_path; use crate::utils::files::get_path_from_bytes; @@ -382,6 +382,16 @@ impl<'a, 'tree, 'on_disk> StatusCommon<' false } + fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool { + match set { + VisitChildrenSet::This | VisitChildrenSet::Recursive => true, + VisitChildrenSet::Empty => false, + VisitChildrenSet::Set(children_to_visit) => { + children_to_visit.contains(basename) + } + } + } + /// Returns whether all child entries of the filesystem directory have a /// corresponding dirstate node or are ignored. fn traverse_fs_directory_and_dirstate<'ancestor>( @@ -393,14 +403,24 @@ impl<'a, 'tree, 'on_disk> StatusCommon<' cached_directory_mtime: Option, is_at_repo_root: bool, ) -> Result { + let children_set = self.matcher.visit_children_set(directory_hg_path); + if let VisitChildrenSet::Empty = children_set { + return Ok(false); + } if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) { dirstate_nodes .par_iter() .map(|dirstate_node| { let fs_path = &directory_entry.fs_path; - let fs_path = fs_path.join(get_path_from_bytes( - dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), - )); + let basename = + dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(); + let fs_path = fs_path.join(get_path_from_bytes(basename)); + if !Self::should_visit( + &children_set, + HgPath::new(basename), + ) { + return Ok(()); + } match std::fs::symlink_metadata(&fs_path) { Ok(fs_metadata) => { let file_type = fs_metadata.file_type().into(); @@ -483,6 +503,15 @@ impl<'a, 'tree, 'on_disk> StatusCommon<' .par_bridge() .map(|pair| { use itertools::EitherOrBoth::*; + let basename = match &pair { + Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new( + dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), + ), + Right(fs_entry) => &fs_entry.hg_path, + }; + if !Self::should_visit(&children_set, basename) { + return Ok(false); + } let has_dirstate_node_or_is_ignored = match pair { Both(dirstate_node, fs_entry) => { self.traverse_fs_and_dirstate(