# HG changeset patch # User Raphaël Gomès # Date 2020-05-12 09:36:52 # Node ID c802ec4f71967b8775ae8f48fd3a0961c1338f68 # Parent dc60ba32d43b19938c952dec6942a1ece03068e9 rust-status: collect traversed directories if required Some commands (`hg purge` notably) register the `traversedir` callback on their matcher to run said callback every time a directory is traversed. This is the first of three patches, further broadening Rust support for status. Unfortunately, there is no way around collecting a full `Vec` (or any other owned datastructure, like a radix tree) and pushing it back up the Python layer since keeping the Python callback in a closure would mean giving up multithreading because of the GIL, which is obviously unacceptable. Performance is still a lot better than the Python+C path. Running `hg clean/purge` on Netbeans' repo (100k files): ``` | No-op | 30% unknown -------------------------- Rust | 1.0s | 1.67s C | 2.0s | 2.87s ``` Differential Revision: https://phab.mercurial-scm.org/D8518 diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs --- a/rust/hg-core/src/dirstate/status.rs +++ b/rust/hg-core/src/dirstate/status.rs @@ -221,6 +221,7 @@ fn walk_explicit<'a>( dmap: &'a DirstateMap, root_dir: impl AsRef + Sync + Send + 'a, options: StatusOptions, + traversed_sender: crossbeam::Sender, ) -> impl ParallelIterator> { files .unwrap_or(&DEFAULT_WORK) @@ -255,6 +256,13 @@ fn walk_explicit<'a>( Some(Ok((normalized, Dispatch::Unknown))) } else { if file_type.is_dir() { + if options.collect_traversed_dirs { + // The receiver always outlives the sender, + // so unwrap. + traversed_sender + .send(normalized.to_owned()) + .unwrap() + } Some(Ok(( normalized, Dispatch::Directory { @@ -302,6 +310,9 @@ pub struct StatusOptions { pub list_clean: bool, pub list_unknown: bool, pub list_ignored: bool, + /// Whether to collect traversed dirs for applying a callback later. + /// Used by `hg purge` for example. + pub collect_traversed_dirs: bool, } /// Dispatch a single entry (file, folder, symlink...) found during `traverse`. @@ -319,6 +330,7 @@ fn handle_traversed_entry<'a>( options: StatusOptions, filename: HgPathBuf, dir_entry: DirEntry, + traversed_sender: crossbeam::Sender, ) -> IoResult<()> { let file_type = dir_entry.file_type()?; let entry_option = dmap.get(&filename); @@ -341,6 +353,7 @@ fn handle_traversed_entry<'a>( options, entry_option, filename, + traversed_sender, ); } else if file_type.is_file() || file_type.is_symlink() { if let Some(entry) = entry_option { @@ -407,6 +420,7 @@ fn handle_traversed_dir<'a>( options: StatusOptions, entry_option: Option<&'a DirstateEntry>, directory: HgPathBuf, + traversed_sender: crossbeam::Sender, ) { scope.spawn(move |_| { // Nested `if` until `rust-lang/rust#53668` is stable @@ -433,6 +447,7 @@ fn handle_traversed_dir<'a>( ignore_fn, dir_ignore_fn, options, + traversed_sender, ) .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap()) } @@ -451,9 +466,15 @@ fn traverse_dir<'a>( ignore_fn: &IgnoreFnType, dir_ignore_fn: &IgnoreFnType, options: StatusOptions, + traversed_sender: crossbeam::Sender, ) -> IoResult<()> { let directory = directory.as_ref(); + if options.collect_traversed_dirs { + // The receiver always outlives the sender, so unwrap. + traversed_sender.send(directory.to_owned()).unwrap() + } + let visit_entries = match matcher.visit_children_set(directory) { VisitChildrenSet::Empty => return Ok(()), VisitChildrenSet::This | VisitChildrenSet::Recursive => None, @@ -510,6 +531,7 @@ fn traverse_dir<'a>( options, filename, dir_entry, + traversed_sender.clone(), )?; } } @@ -533,6 +555,7 @@ fn traverse<'a>( dir_ignore_fn: &IgnoreFnType, options: StatusOptions, results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>, + traversed_sender: crossbeam::Sender, ) -> IoResult<()> { let root_dir = root_dir.as_ref(); @@ -550,6 +573,7 @@ fn traverse<'a>( &ignore_fn, &dir_ignore_fn, options, + traversed_sender, )?; // Disconnect the channel so the receiver stops waiting @@ -640,11 +664,14 @@ pub struct DirstateStatus<'a> { pub ignored: Vec>, pub unknown: Vec>, pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>, + /// Only filled if `collect_traversed_dirs` is `true` + pub traversed: Vec, } #[timed] fn build_response<'a>( results: impl IntoIterator, Dispatch)>, + traversed: Vec, ) -> (Vec>, DirstateStatus<'a>) { let mut lookup = vec![]; let mut modified = vec![]; @@ -683,6 +710,7 @@ fn build_response<'a>( ignored, unknown, bad, + traversed, }, ) } @@ -849,8 +877,17 @@ pub fn status<'a: 'c, 'b: 'c, 'c>( let files = matcher.file_set(); + // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not. + let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded(); + // Step 1: check the files explicitly mentioned by the user - let explicit = walk_explicit(files, &dmap, root_dir, options); + let explicit = walk_explicit( + files, + &dmap, + root_dir, + options, + traversed_sender.clone(), + ); // Collect results into a `Vec` because we do very few lookups in most // cases. @@ -888,6 +925,7 @@ pub fn status<'a: 'c, 'b: 'c, 'c>( &dir_ignore_fn, options, &mut results, + traversed_sender.clone(), )?; } } @@ -911,5 +949,9 @@ pub fn status<'a: 'c, 'b: 'c, 'c>( } } - Ok((build_response(results), warnings)) + // Close the channel + drop(traversed_sender); + let traversed_dirs = traversed_recv.into_iter().collect(); + + Ok((build_response(results, traversed_dirs), warnings)) }