# HG changeset patch # User Simon Sapin # Date 2021-03-30 12:15:23 # Node ID 787ff5d21bcdd68720931767b0b07ce3d0a9aa40 # Parent 102a50746bc55342e39c4823b5de42a25b6c0132 dirstate-tree: Make Rust DirstateMap bindings go through a trait object This changeset starts a series that adds an experiment to make status faster by changing the dirstate (first only in memory and later also on disk) to be shaped as a tree matching the directory structure, instead of the current flat collection of entries. The status algorithm can then traverse this tree dirstate at the same time as it traverses the filesystem. We (Octobus) have made prototypes that show promising results but are prone to bitrot. We would like to start upstreaming some experimental Rust code that goes in this direction, but to avoid disrupting users it should only be enabled by some run-time opt-in while keeping the existing dirstate structure and status algorithm as-is. The `DirstateMap` type and `status` function look like the appropriate boundary. This adds a new trait that abstracts everything Python bindings need and makes those bindings go through a `dyn` trait object. Later we’ll have two implementations of this trait, and the same bindings can use either. Differential Revision: https://phab.mercurial-scm.org/D10362 diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs --- a/rust/hg-core/src/dirstate.rs +++ b/rust/hg-core/src/dirstate.rs @@ -9,7 +9,6 @@ use crate::errors::HgError; use crate::revlog::Node; use crate::{utils::hg_path::HgPathBuf, FastHashMap}; use bytes_cast::{unaligned, BytesCast}; -use std::collections::hash_map; use std::convert::TryFrom; pub mod dirs_multiset; @@ -51,10 +50,12 @@ struct RawEntry { pub const SIZE_FROM_OTHER_PARENT: i32 = -2; pub type StateMap = FastHashMap; -pub type StateMapIter<'a> = hash_map::Iter<'a, HgPathBuf, DirstateEntry>; +pub type StateMapIter<'a> = + Box + Send + 'a>; pub type CopyMap = FastHashMap; -pub type CopyMapIter<'a> = hash_map::Iter<'a, HgPathBuf, HgPathBuf>; +pub type CopyMapIter<'a> = + Box + Send + 'a>; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum EntryState { diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs --- a/rust/hg-core/src/dirstate/dirs_multiset.rs +++ b/rust/hg-core/src/dirstate/dirs_multiset.rs @@ -14,7 +14,7 @@ use crate::{ files, hg_path::{HgPath, HgPathBuf, HgPathError}, }, - DirstateEntry, DirstateMapError, FastHashMap, StateMap, + DirstateEntry, DirstateMapError, FastHashMap, }; use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet}; @@ -30,14 +30,14 @@ impl DirsMultiset { /// Initializes the multiset from a dirstate. /// /// If `skip_state` is provided, skips dirstate entries with equal state. - pub fn from_dirstate( - dirstate: &StateMap, + pub fn from_dirstate<'a>( + dirstate: impl IntoIterator, skip_state: Option, ) -> Result { let mut multiset = DirsMultiset { inner: FastHashMap::default(), }; - for (filename, DirstateEntry { state, .. }) in dirstate.iter() { + for (filename, DirstateEntry { state, .. }) in dirstate { // This `if` is optimized out of the loop if let Some(skip) = skip_state { if skip != *state { @@ -207,6 +207,7 @@ impl<'a> DirsChildrenMultiset<'a> { #[cfg(test)] mod tests { use super::*; + use crate::StateMap; #[test] fn test_delete_path_path_not_found() { @@ -356,7 +357,7 @@ mod tests { }; assert_eq!(expected, new); - let input_map = ["b/x", "a/c", "a/d/x"] + let input_map: HashMap<_, _> = ["b/x", "a/c", "a/d/x"] .iter() .map(|f| { ( @@ -384,7 +385,7 @@ mod tests { #[test] fn test_dirsmultiset_new_skip() { - let input_map = [ + let input_map: HashMap<_, _> = [ ("a/", EntryState::Normal), ("a/b", EntryState::Normal), ("a/c", EntryState::Removed), diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs --- a/rust/hg-core/src/dirstate/dirstate_map.rs +++ b/rust/hg-core/src/dirstate/dirstate_map.rs @@ -289,8 +289,10 @@ impl DirstateMap { /// good idea. pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { if self.all_dirs.is_none() { - self.all_dirs = - Some(DirsMultiset::from_dirstate(&self.state_map, None)?); + self.all_dirs = Some(DirsMultiset::from_dirstate( + self.state_map.iter(), + None, + )?); } Ok(()) } diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs --- a/rust/hg-core/src/dirstate/status.rs +++ b/rust/hg-core/src/dirstate/status.rs @@ -292,7 +292,7 @@ impl fmt::Display for StatusError { /// Gives information about which files are changed in the working directory /// and how, compared to the revision we're based on -pub struct Status<'a, M: Matcher + Sync> { +pub struct Status<'a, M: ?Sized + Matcher + Sync> { dmap: &'a DirstateMap, pub(crate) matcher: &'a M, root_dir: PathBuf, @@ -302,7 +302,7 @@ pub struct Status<'a, M: Matcher + Sync> impl<'a, M> Status<'a, M> where - M: Matcher + Sync, + M: ?Sized + Matcher + Sync, { pub fn new( dmap: &'a DirstateMap, @@ -898,7 +898,7 @@ pub fn build_response<'a>( #[timed] pub fn status<'a>( dmap: &'a DirstateMap, - matcher: &'a (impl Matcher + Sync), + matcher: &'a (dyn Matcher + Sync), root_dir: PathBuf, ignore_files: Vec, options: StatusOptions, diff --git a/rust/hg-core/src/dirstate_tree.rs b/rust/hg-core/src/dirstate_tree.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/dirstate_tree.rs @@ -0,0 +1,1 @@ +pub mod dispatch; diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs @@ -0,0 +1,310 @@ +use std::collections::HashSet; +use std::path::PathBuf; +use std::time::Duration; + +use crate::matchers::Matcher; +use crate::utils::hg_path::{HgPath, HgPathBuf}; +use crate::CopyMapIter; +use crate::DirstateEntry; +use crate::DirstateError; +use crate::DirstateMap; +use crate::DirstateMapError; +use crate::DirstateParents; +use crate::DirstateStatus; +use crate::EntryState; +use crate::FastHashMap; +use crate::HgPathCow; +use crate::PatternFileWarning; +use crate::StateMapIter; +use crate::StatusError; +use crate::StatusOptions; + +pub trait DirstateMapMethods { + fn clear(&mut self); + + fn add_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + entry: DirstateEntry, + ) -> Result<(), DirstateMapError>; + + fn remove_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + size: i32, + ) -> Result<(), DirstateMapError>; + + fn drop_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + ) -> Result; + + fn clear_ambiguous_times(&mut self, filenames: Vec, now: i32); + + fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool; + + fn non_normal_entries_union( + &mut self, + other: HashSet, + ) -> Vec; + + fn set_non_normal_other_parent_entries(&mut self, force: bool); + + fn get_non_normal_other_parent_entries_panic( + &self, + ) -> (&HashSet, &HashSet); + + fn get_non_normal_other_parent_entries( + &mut self, + ) -> (&mut HashSet, &mut HashSet); + + fn has_tracked_dir( + &mut self, + directory: &HgPath, + ) -> Result; + + fn has_dir( + &mut self, + directory: &HgPath, + ) -> Result; + + fn parents( + &mut self, + file_contents: &[u8], + ) -> Result<&DirstateParents, DirstateError>; + + fn set_parents(&mut self, parents: &DirstateParents); + + fn read<'a>( + &mut self, + file_contents: &'a [u8], + ) -> Result, DirstateError>; + + fn pack( + &mut self, + parents: DirstateParents, + now: Duration, + ) -> Result, DirstateError>; + + fn build_file_fold_map(&mut self) -> &FastHashMap; + + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError>; + + fn set_dirs(&mut self) -> Result<(), DirstateMapError>; + + fn status<'a>( + &'a self, + matcher: &'a (dyn Matcher + Sync), + root_dir: PathBuf, + ignore_files: Vec, + options: StatusOptions, + ) -> Result< + ( + (Vec>, DirstateStatus<'a>), + Vec, + ), + StatusError, + >; + + fn copy_map_len(&self) -> usize; + + fn copy_map_iter(&self) -> CopyMapIter<'_>; + + fn copy_map_contains_key(&self, key: &HgPath) -> bool; + + fn copy_map_get(&self, key: &HgPath) -> Option<&HgPathBuf>; + + fn copy_map_remove(&mut self, key: &HgPath) -> Option; + + fn copy_map_insert( + &mut self, + key: HgPathBuf, + value: HgPathBuf, + ) -> Option; + + fn len(&self) -> usize; + + fn contains_key(&self, key: &HgPath) -> bool; + + fn get(&self, key: &HgPath) -> Option<&DirstateEntry>; + + fn iter(&self) -> StateMapIter<'_>; +} + +impl DirstateMapMethods for DirstateMap { + fn clear(&mut self) { + self.clear() + } + + fn add_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + entry: DirstateEntry, + ) -> Result<(), DirstateMapError> { + self.add_file(filename, old_state, entry) + } + + fn remove_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + size: i32, + ) -> Result<(), DirstateMapError> { + self.remove_file(filename, old_state, size) + } + + fn drop_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + ) -> Result { + self.drop_file(filename, old_state) + } + + fn clear_ambiguous_times(&mut self, filenames: Vec, now: i32) { + self.clear_ambiguous_times(filenames, now) + } + + fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool { + self.non_normal_entries_remove(key) + } + + fn non_normal_entries_union( + &mut self, + other: HashSet, + ) -> Vec { + self.non_normal_entries_union(other) + } + + fn set_non_normal_other_parent_entries(&mut self, force: bool) { + self.set_non_normal_other_parent_entries(force) + } + + fn get_non_normal_other_parent_entries_panic( + &self, + ) -> (&HashSet, &HashSet) { + self.get_non_normal_other_parent_entries_panic() + } + + fn get_non_normal_other_parent_entries( + &mut self, + ) -> (&mut HashSet, &mut HashSet) { + self.get_non_normal_other_parent_entries() + } + + fn has_tracked_dir( + &mut self, + directory: &HgPath, + ) -> Result { + self.has_tracked_dir(directory) + } + + fn has_dir( + &mut self, + directory: &HgPath, + ) -> Result { + self.has_dir(directory) + } + + fn parents( + &mut self, + file_contents: &[u8], + ) -> Result<&DirstateParents, DirstateError> { + self.parents(file_contents) + } + + fn set_parents(&mut self, parents: &DirstateParents) { + self.set_parents(parents) + } + + fn read<'a>( + &mut self, + file_contents: &'a [u8], + ) -> Result, DirstateError> { + self.read(file_contents) + } + + fn pack( + &mut self, + parents: DirstateParents, + now: Duration, + ) -> Result, DirstateError> { + self.pack(parents, now) + } + + fn build_file_fold_map(&mut self) -> &FastHashMap { + self.build_file_fold_map() + } + + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { + self.set_all_dirs() + } + + fn set_dirs(&mut self) -> Result<(), DirstateMapError> { + self.set_dirs() + } + + fn status<'a>( + &'a self, + matcher: &'a (dyn Matcher + Sync), + root_dir: PathBuf, + ignore_files: Vec, + options: StatusOptions, + ) -> Result< + ( + (Vec>, DirstateStatus<'a>), + Vec, + ), + StatusError, + > { + crate::status(self, matcher, root_dir, ignore_files, options) + } + + fn copy_map_len(&self) -> usize { + self.copy_map.len() + } + + fn copy_map_iter(&self) -> CopyMapIter<'_> { + Box::new(self.copy_map.iter()) + } + + fn copy_map_contains_key(&self, key: &HgPath) -> bool { + self.copy_map.contains_key(key) + } + + fn copy_map_get(&self, key: &HgPath) -> Option<&HgPathBuf> { + self.copy_map.get(key) + } + + fn copy_map_remove(&mut self, key: &HgPath) -> Option { + self.copy_map.remove(key) + } + + fn copy_map_insert( + &mut self, + key: HgPathBuf, + value: HgPathBuf, + ) -> Option { + self.copy_map.insert(key, value) + } + + fn len(&self) -> usize { + (&**self).len() + } + + fn contains_key(&self, key: &HgPath) -> bool { + (&**self).contains_key(key) + } + + fn get(&self, key: &HgPath) -> Option<&DirstateEntry> { + (&**self).get(key) + } + + fn iter(&self) -> StateMapIter<'_> { + Box::new((&**self).iter()) + } +} diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -9,6 +9,7 @@ pub mod dagops; pub mod errors; pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; mod dirstate; +pub mod dirstate_tree; pub mod discovery; pub mod requirements; pub mod testing; // unconditionally built, for use from integration tests diff --git a/rust/hg-core/src/operations/dirstate_status.rs b/rust/hg-core/src/operations/dirstate_status.rs --- a/rust/hg-core/src/operations/dirstate_status.rs +++ b/rust/hg-core/src/operations/dirstate_status.rs @@ -14,7 +14,7 @@ use crate::{DirstateStatus, StatusError} /// files. pub type LookupAndStatus<'a> = (Vec>, DirstateStatus<'a>); -impl<'a, M: Matcher + Sync> Status<'a, M> { +impl<'a, M: ?Sized + Matcher + Sync> Status<'a, M> { pub(crate) fn run(&self) -> Result, StatusError> { let (traversed_sender, traversed_receiver) = crossbeam_channel::unbounded(); diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -27,6 +27,7 @@ use crate::{ parsers::dirstate_parents_to_pytuple, }; use hg::{ + dirstate_tree::dispatch::DirstateMapMethods, errors::HgError, revlog::Node, utils::hg_path::{HgPath, HgPathBuf}, @@ -47,10 +48,10 @@ use hg::{ // All attributes also have to have a separate refcount data attribute for // leaks, with all methods that go along for reference sharing. py_class!(pub class DirstateMap |py| { - @shared data inner: RustDirstateMap; + @shared data inner: Box; def __new__(_cls, _root: PyObject) -> PyResult { - let inner = RustDirstateMap::default(); + let inner = Box::new(RustDirstateMap::default()); Self::create_instance(py, inner) } @@ -404,7 +405,7 @@ py_class!(pub class DirstateMap |py| { Dirs::from_inner( py, DirsMultiset::from_dirstate( - &self.inner(py).borrow(), + self.inner(py).borrow().iter(), Some(EntryState::Removed), ) .map_err(|e| { @@ -421,7 +422,7 @@ py_class!(pub class DirstateMap |py| { Dirs::from_inner( py, DirsMultiset::from_dirstate( - &self.inner(py).borrow(), + self.inner(py).borrow().iter(), None, ).map_err(|e| { PyErr::new::(py, e.to_string()) @@ -432,7 +433,7 @@ py_class!(pub class DirstateMap |py| { // TODO all copymap* methods, see docstring above def copymapcopy(&self) -> PyResult { let dict = PyDict::new(py); - for (key, value) in self.inner(py).borrow().copy_map.iter() { + for (key, value) in self.inner(py).borrow().copy_map_iter() { dict.set_item( py, PyBytes::new(py, key.as_bytes()), @@ -444,7 +445,7 @@ py_class!(pub class DirstateMap |py| { def copymapgetitem(&self, key: PyObject) -> PyResult { let key = key.extract::(py)?; - match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) { + match self.inner(py).borrow().copy_map_get(HgPath::new(key.data(py))) { Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())), None => Err(PyErr::new::( py, @@ -457,15 +458,14 @@ py_class!(pub class DirstateMap |py| { } def copymaplen(&self) -> PyResult { - Ok(self.inner(py).borrow().copy_map.len()) + Ok(self.inner(py).borrow().copy_map_len()) } def copymapcontains(&self, key: PyObject) -> PyResult { let key = key.extract::(py)?; Ok(self .inner(py) .borrow() - .copy_map - .contains_key(HgPath::new(key.data(py)))) + .copy_map_contains_key(HgPath::new(key.data(py)))) } def copymapget( &self, @@ -476,8 +476,7 @@ py_class!(pub class DirstateMap |py| { match self .inner(py) .borrow() - .copy_map - .get(HgPath::new(key.data(py))) + .copy_map_get(HgPath::new(key.data(py))) { Some(copy) => Ok(Some( PyBytes::new(py, copy.as_bytes()).into_object(), @@ -492,7 +491,7 @@ py_class!(pub class DirstateMap |py| { ) -> PyResult { let key = key.extract::(py)?; let value = value.extract::(py)?; - self.inner(py).borrow_mut().copy_map.insert( + self.inner(py).borrow_mut().copy_map_insert( HgPathBuf::from_bytes(key.data(py)), HgPathBuf::from_bytes(value.data(py)), ); @@ -507,8 +506,7 @@ py_class!(pub class DirstateMap |py| { match self .inner(py) .borrow_mut() - .copy_map - .remove(HgPath::new(key.data(py))) + .copy_map_remove(HgPath::new(key.data(py))) { Some(_) => Ok(None), None => Ok(default), @@ -519,7 +517,7 @@ py_class!(pub class DirstateMap |py| { let leaked_ref = self.inner(py).leak_immutable(); CopyMapKeysIterator::from_inner( py, - unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) }, + unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) }, ) } @@ -527,7 +525,7 @@ py_class!(pub class DirstateMap |py| { let leaked_ref = self.inner(py).leak_immutable(); CopyMapItemsIterator::from_inner( py, - unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) }, + unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) }, ) } @@ -537,7 +535,7 @@ impl DirstateMap { pub fn get_inner<'a>( &'a self, py: Python<'a>, - ) -> Ref<'a, RustDirstateMap> { + ) -> Ref<'a, Box> { self.inner(py).borrow() } fn translate_key( diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs --- a/rust/hg-cpython/src/dirstate/status.rs +++ b/rust/hg-cpython/src/dirstate/status.rs @@ -17,7 +17,7 @@ use cpython::{ }; use hg::{ matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher}, - parse_pattern_syntax, status, + parse_pattern_syntax, utils::{ files::{get_bytes_from_path, get_path_from_bytes}, hg_path::{HgPath, HgPathBuf}, @@ -126,21 +126,21 @@ pub fn status_wrapper( match matcher.get_type(py).name(py).borrow() { "alwaysmatcher" => { let matcher = AlwaysMatcher; - let ((lookup, status_res), warnings) = status( - &dmap, - &matcher, - root_dir.to_path_buf(), - ignore_files, - StatusOptions { - check_exec, - last_normal_time, - list_clean, - list_ignored, - list_unknown, - collect_traversed_dirs, - }, - ) - .map_err(|e| handle_fallback(py, e))?; + let ((lookup, status_res), warnings) = dmap + .status( + &matcher, + root_dir.to_path_buf(), + ignore_files, + StatusOptions { + check_exec, + last_normal_time, + list_clean, + list_ignored, + list_unknown, + collect_traversed_dirs, + }, + ) + .map_err(|e| handle_fallback(py, e))?; build_response(py, lookup, status_res, warnings) } "exactmatcher" => { @@ -163,21 +163,21 @@ pub fn status_wrapper( let files = files?; let matcher = FileMatcher::new(files.as_ref()) .map_err(|e| PyErr::new::(py, e.to_string()))?; - let ((lookup, status_res), warnings) = status( - &dmap, - &matcher, - root_dir.to_path_buf(), - ignore_files, - StatusOptions { - check_exec, - last_normal_time, - list_clean, - list_ignored, - list_unknown, - collect_traversed_dirs, - }, - ) - .map_err(|e| handle_fallback(py, e))?; + let ((lookup, status_res), warnings) = dmap + .status( + &matcher, + root_dir.to_path_buf(), + ignore_files, + StatusOptions { + check_exec, + last_normal_time, + list_clean, + list_ignored, + list_unknown, + collect_traversed_dirs, + }, + ) + .map_err(|e| handle_fallback(py, e))?; build_response(py, lookup, status_res, warnings) } "includematcher" => { @@ -218,21 +218,21 @@ pub fn status_wrapper( .map_err(|e| handle_fallback(py, e.into()))?; all_warnings.extend(warnings); - let ((lookup, status_res), warnings) = status( - &dmap, - &matcher, - root_dir.to_path_buf(), - ignore_files, - StatusOptions { - check_exec, - last_normal_time, - list_clean, - list_ignored, - list_unknown, - collect_traversed_dirs, - }, - ) - .map_err(|e| handle_fallback(py, e))?; + let ((lookup, status_res), warnings) = dmap + .status( + &matcher, + root_dir.to_path_buf(), + ignore_files, + StatusOptions { + check_exec, + last_normal_time, + list_clean, + list_ignored, + list_unknown, + collect_traversed_dirs, + }, + ) + .map_err(|e| handle_fallback(py, e))?; all_warnings.extend(warnings);