# HG changeset patch # User Raphaël Gomès # Date 2020-09-30 16:10:29 # Node ID 496537c9c1b4b250406f2cf423cf3cb99479346e # Parent e604a3c03ab90d0c49264c75314c535f0cff96b2 rust: start plugging the dirstate tree behind a feature gate The previous patch added the `dirstate-tree` feature gate to enable the two dirstate implementations to co-habit while the tree-based one gets better. This patch copies over the code that differs, be it because the algorithm changed or because the borrowing rules are different. Indeed, `DirstateTree` is not observationally equivalent to the std `HashMap` in the APIs we use: it does not have the `Entry` API (yet?) and its iterator returns owned values instead of references. This last point is because the implementation needs to be changed to a more clever and efficient solution. Differential Revision: https://phab.mercurial-scm.org/D9133 diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs --- a/rust/hg-core/src/dirstate.rs +++ b/rust/hg-core/src/dirstate.rs @@ -38,8 +38,15 @@ pub struct DirstateEntry { /// merge. pub const SIZE_FROM_OTHER_PARENT: i32 = -2; +#[cfg(not(feature = "dirstate-tree"))] pub type StateMap = FastHashMap; +#[cfg(not(feature = "dirstate-tree"))] pub type StateMapIter<'a> = hash_map::Iter<'a, HgPathBuf, DirstateEntry>; + +#[cfg(feature = "dirstate-tree")] +pub type StateMap = dirstate_tree::tree::Tree; +#[cfg(feature = "dirstate-tree")] +pub type StateMapIter<'a> = dirstate_tree::iter::Iter<'a>; pub type CopyMap = FastHashMap; pub type CopyMapIter<'a> = hash_map::Iter<'a, HgPathBuf, HgPathBuf>; diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs --- a/rust/hg-core/src/dirstate/dirs_multiset.rs +++ b/rust/hg-core/src/dirstate/dirs_multiset.rs @@ -14,7 +14,7 @@ use crate::{ files, hg_path::{HgPath, HgPathBuf, HgPathError}, }, - DirstateEntry, DirstateMapError, FastHashMap, + DirstateEntry, DirstateMapError, FastHashMap, StateMap, }; use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet}; @@ -30,15 +30,15 @@ impl DirsMultiset { /// Initializes the multiset from a dirstate. /// /// If `skip_state` is provided, skips dirstate entries with equal state. + #[cfg(not(feature = "dirstate-tree"))] pub fn from_dirstate( - dirstate: &FastHashMap, + dirstate: &StateMap, skip_state: Option, ) -> Result { let mut multiset = DirsMultiset { inner: FastHashMap::default(), }; - - for (filename, DirstateEntry { state, .. }) in dirstate { + for (filename, DirstateEntry { state, .. }) in dirstate.iter() { // This `if` is optimized out of the loop if let Some(skip) = skip_state { if skip != *state { @@ -51,6 +51,30 @@ impl DirsMultiset { Ok(multiset) } + /// Initializes the multiset from a dirstate. + /// + /// If `skip_state` is provided, skips dirstate entries with equal state. + #[cfg(feature = "dirstate-tree")] + pub fn from_dirstate( + dirstate: &StateMap, + skip_state: Option, + ) -> Result { + let mut multiset = DirsMultiset { + inner: FastHashMap::default(), + }; + for (filename, DirstateEntry { state, .. }) in dirstate.iter() { + // This `if` is optimized out of the loop + if let Some(skip) = skip_state { + if skip != state { + multiset.add_path(filename)?; + } + } else { + multiset.add_path(filename)?; + } + } + + Ok(multiset) + } /// Initializes the multiset from a manifest. pub fn from_manifest( @@ -332,8 +356,8 @@ mod tests { }; assert_eq!(expected, new); - let new = DirsMultiset::from_dirstate(&FastHashMap::default(), None) - .unwrap(); + let new = + DirsMultiset::from_dirstate(&StateMap::default(), None).unwrap(); let expected = DirsMultiset { inner: FastHashMap::default(), }; @@ -357,7 +381,7 @@ mod tests { }; assert_eq!(expected, new); - let input_map = ["a/", "b/", "a/c", "a/d/"] + let input_map = ["b/x", "a/c", "a/d/x"] .iter() .map(|f| { ( @@ -371,7 +395,7 @@ mod tests { ) }) .collect(); - let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)] + let expected_inner = [("", 2), ("a", 2), ("b", 1), ("a/d", 1)] .iter() .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v)) .collect(); @@ -387,9 +411,9 @@ mod tests { fn test_dirsmultiset_new_skip() { let input_map = [ ("a/", EntryState::Normal), - ("a/b/", EntryState::Normal), + ("a/b", EntryState::Normal), ("a/c", EntryState::Removed), - ("a/d/", EntryState::Merged), + ("a/d", EntryState::Merged), ] .iter() .map(|(f, state)| { @@ -406,7 +430,7 @@ mod tests { .collect(); // "a" incremented with "a/c" and "a/d/" - let expected_inner = [("", 1), ("a", 2), ("a/d", 1)] + let expected_inner = [("", 1), ("a", 2)] .iter() .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v)) .collect(); diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs --- a/rust/hg-core/src/dirstate/dirstate_map.rs +++ b/rust/hg-core/src/dirstate/dirstate_map.rs @@ -16,7 +16,6 @@ use crate::{ CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateMapError, DirstateParents, DirstateParseError, FastHashMap, StateMap, }; -use core::borrow::Borrow; use micro_timer::timed; use std::collections::HashSet; use std::convert::TryInto; @@ -67,7 +66,7 @@ impl DirstateMap { } pub fn clear(&mut self) { - self.state_map.clear(); + self.state_map = StateMap::default(); self.copy_map.clear(); self.file_fold_map = None; self.non_normal_set = None; @@ -189,18 +188,15 @@ impl DirstateMap { ) { for filename in filenames { let mut changed = false; - self.state_map - .entry(filename.to_owned()) - .and_modify(|entry| { - if entry.state == EntryState::Normal && entry.mtime == now - { - changed = true; - *entry = DirstateEntry { - mtime: MTIME_UNSET, - ..*entry - }; - } - }); + if let Some(entry) = self.state_map.get_mut(&filename) { + if entry.state == EntryState::Normal && entry.mtime == now { + changed = true; + *entry = DirstateEntry { + mtime: MTIME_UNSET, + ..*entry + }; + } + } if changed { self.get_non_normal_other_parent_entries() .0 @@ -257,6 +253,7 @@ impl DirstateMap { ) } + #[cfg(not(feature = "dirstate-tree"))] pub fn set_non_normal_other_parent_entries(&mut self, force: bool) { if !force && self.non_normal_set.is_some() @@ -285,6 +282,34 @@ impl DirstateMap { self.non_normal_set = Some(non_normal); self.other_parent_set = Some(other_parent); } + #[cfg(feature = "dirstate-tree")] + pub fn set_non_normal_other_parent_entries(&mut self, force: bool) { + if !force + && self.non_normal_set.is_some() + && self.other_parent_set.is_some() + { + return; + } + let mut non_normal = HashSet::new(); + let mut other_parent = HashSet::new(); + + for ( + filename, + DirstateEntry { + state, size, mtime, .. + }, + ) in self.state_map.iter() + { + if state != EntryState::Normal || mtime == MTIME_UNSET { + non_normal.insert(filename.to_owned()); + } + if state == EntryState::Normal && size == SIZE_FROM_OTHER_PARENT { + other_parent.insert(filename.to_owned()); + } + } + self.non_normal_set = Some(non_normal); + self.other_parent_set = Some(other_parent); + } /// Both of these setters and their uses appear to be the simplest way to /// emulate a Python lazy property, but it is ugly and unidiomatic. @@ -398,17 +423,33 @@ impl DirstateMap { self.set_non_normal_other_parent_entries(true); Ok(packed) } - + #[cfg(not(feature = "dirstate-tree"))] pub fn build_file_fold_map(&mut self) -> &FileFoldMap { if let Some(ref file_fold_map) = self.file_fold_map { return file_fold_map; } let mut new_file_fold_map = FileFoldMap::default(); - for (filename, DirstateEntry { state, .. }) in self.state_map.borrow() - { + + for (filename, DirstateEntry { state, .. }) in self.state_map.iter() { if *state == EntryState::Removed { new_file_fold_map - .insert(normalize_case(filename), filename.to_owned()); + .insert(normalize_case(&filename), filename.to_owned()); + } + } + self.file_fold_map = Some(new_file_fold_map); + self.file_fold_map.as_ref().unwrap() + } + #[cfg(feature = "dirstate-tree")] + pub fn build_file_fold_map(&mut self) -> &FileFoldMap { + if let Some(ref file_fold_map) = self.file_fold_map { + return file_fold_map; + } + let mut new_file_fold_map = FileFoldMap::default(); + + for (filename, DirstateEntry { state, .. }) in self.state_map.iter() { + if state == EntryState::Removed { + new_file_fold_map + .insert(normalize_case(&filename), filename.to_owned()); } } self.file_fold_map = Some(new_file_fold_map); diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs --- a/rust/hg-core/src/dirstate/parsers.rs +++ b/rust/hg-core/src/dirstate/parsers.rs @@ -80,11 +80,11 @@ pub fn parse_dirstate( )); curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); } - Ok((parents, entries, copies)) } /// `now` is the duration in seconds since the Unix epoch +#[cfg(not(feature = "dirstate-tree"))] pub fn pack_dirstate( state_map: &mut StateMap, copy_map: &CopyMap, @@ -156,15 +156,89 @@ pub fn pack_dirstate( Ok(packed) } +/// `now` is the duration in seconds since the Unix epoch +#[cfg(feature = "dirstate-tree")] +pub fn pack_dirstate( + state_map: &mut StateMap, + copy_map: &CopyMap, + parents: DirstateParents, + now: Duration, +) -> Result, DirstatePackError> { + // TODO move away from i32 before 2038. + let now: i32 = now.as_secs().try_into().expect("time overflow"); + + let expected_size: usize = state_map + .iter() + .map(|(filename, _)| { + let mut length = MIN_ENTRY_SIZE + filename.len(); + if let Some(copy) = copy_map.get(&filename) { + length += copy.len() + 1; + } + length + }) + .sum(); + let expected_size = expected_size + PARENT_SIZE * 2; + + let mut packed = Vec::with_capacity(expected_size); + let mut new_state_map = vec![]; + + packed.extend(&parents.p1); + packed.extend(&parents.p2); + + for (filename, entry) in state_map.iter() { + let new_filename = filename.to_owned(); + let mut new_mtime: i32 = entry.mtime; + if entry.state == EntryState::Normal && entry.mtime == now { + // The file was last modified "simultaneously" with the current + // write to dirstate (i.e. within the same second for file- + // systems with a granularity of 1 sec). This commonly happens + // for at least a couple of files on 'update'. + // The user could change the file without changing its size + // within the same second. Invalidate the file's mtime in + // dirstate, forcing future 'status' calls to compare the + // contents of the file if the size is the same. This prevents + // mistakenly treating such files as clean. + new_mtime = -1; + new_state_map.push(( + filename.to_owned(), + DirstateEntry { + mtime: new_mtime, + ..entry + }, + )); + } + let mut new_filename = new_filename.into_vec(); + if let Some(copy) = copy_map.get(&filename) { + new_filename.push(b'\0'); + new_filename.extend(copy.bytes()); + } + + packed.write_u8(entry.state.into())?; + packed.write_i32::(entry.mode)?; + packed.write_i32::(entry.size)?; + packed.write_i32::(new_mtime)?; + packed.write_i32::(new_filename.len() as i32)?; + packed.extend(new_filename) + } + + if packed.len() != expected_size { + return Err(DirstatePackError::BadSize(expected_size, packed.len())); + } + + state_map.extend(new_state_map); + + Ok(packed) +} #[cfg(test)] mod tests { use super::*; use crate::{utils::hg_path::HgPathBuf, FastHashMap}; + use pretty_assertions::assert_eq; #[test] fn test_pack_dirstate_empty() { - let mut state_map: StateMap = FastHashMap::default(); + let mut state_map = StateMap::default(); let copymap = FastHashMap::default(); let parents = DirstateParents { p1: *b"12345678910111213141", diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs --- a/rust/hg-core/src/dirstate/status.rs +++ b/rust/hg-core/src/dirstate/status.rs @@ -9,6 +9,10 @@ //! It is currently missing a lot of functionality compared to the Python one //! and will only be triggered in narrow cases. +#[cfg(feature = "dirstate-tree")] +use crate::dirstate::dirstate_tree::iter::StatusShortcut; +#[cfg(not(feature = "dirstate-tree"))] +use crate::utils::path_auditor::PathAuditor; use crate::{ dirstate::SIZE_FROM_OTHER_PARENT, filepatterns::PatternFileWarning, @@ -19,7 +23,6 @@ use crate::{ hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf, HgPathError, }, - path_auditor::PathAuditor, }, CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap, PatternError, @@ -701,12 +704,131 @@ where }) } + /// Add the files in the dirstate to the results. + /// + /// This takes a mutable reference to the results to account for the + /// `extend` in timings + #[cfg(feature = "dirstate-tree")] + #[timed] + pub fn extend_from_dmap(&self, results: &mut Vec>) { + results.par_extend( + self.dmap + .fs_iter(self.root_dir.clone()) + .par_bridge() + .filter(|(path, _)| self.matcher.matches(path)) + .flat_map(move |(filename, shortcut)| { + let entry = match shortcut { + StatusShortcut::Entry(e) => e, + StatusShortcut::Dispatch(d) => { + return Ok((Cow::Owned(filename), d)) + } + }; + let filename_as_path = hg_path_to_path_buf(&filename)?; + let meta = self + .root_dir + .join(filename_as_path) + .symlink_metadata(); + + match meta { + Ok(ref m) + if !(m.file_type().is_file() + || m.file_type().is_symlink()) => + { + Ok(( + Cow::Owned(filename), + dispatch_missing(entry.state), + )) + } + Ok(m) => { + let dispatch = dispatch_found( + &filename, + entry, + HgMetadata::from_metadata(m), + &self.dmap.copy_map, + self.options, + ); + Ok((Cow::Owned(filename), dispatch)) + } + Err(ref e) + if e.kind() == ErrorKind::NotFound + || e.raw_os_error() == Some(20) => + { + // Rust does not yet have an `ErrorKind` for + // `NotADirectory` (errno 20) + // It happens if the dirstate contains `foo/bar` + // and foo is not a + // directory + Ok(( + Cow::Owned(filename), + dispatch_missing(entry.state), + )) + } + Err(e) => Err(e), + } + }), + ); + } + + /// Add the files in the dirstate to the results. + /// + /// This takes a mutable reference to the results to account for the + /// `extend` in timings + #[cfg(not(feature = "dirstate-tree"))] + #[timed] + pub fn extend_from_dmap(&self, results: &mut Vec>) { + results.par_extend(self.dmap.par_iter().flat_map( + move |(filename, entry)| { + let filename: &HgPath = filename; + let filename_as_path = hg_path_to_path_buf(filename)?; + let meta = + self.root_dir.join(filename_as_path).symlink_metadata(); + match meta { + Ok(ref m) + if !(m.file_type().is_file() + || m.file_type().is_symlink()) => + { + Ok(( + Cow::Borrowed(filename), + dispatch_missing(entry.state), + )) + } + Ok(m) => Ok(( + Cow::Borrowed(filename), + dispatch_found( + filename, + *entry, + HgMetadata::from_metadata(m), + &self.dmap.copy_map, + self.options, + ), + )), + Err(ref e) + if e.kind() == ErrorKind::NotFound + || e.raw_os_error() == Some(20) => + { + // Rust does not yet have an `ErrorKind` for + // `NotADirectory` (errno 20) + // It happens if the dirstate contains `foo/bar` + // and foo is not a + // directory + Ok(( + Cow::Borrowed(filename), + dispatch_missing(entry.state), + )) + } + Err(e) => Err(e), + } + }, + )); + } + /// Checks all files that are in the dirstate but were not found during the /// working directory traversal. This means that the rest must /// be either ignored, under a symlink or under a new nested repo. /// /// This takes a mutable reference to the results to account for the /// `extend` in timings + #[cfg(not(feature = "dirstate-tree"))] #[timed] pub fn handle_unknowns( &self, @@ -781,59 +903,6 @@ where Ok(()) } - - /// Add the files in the dirstate to the results. - /// - /// This takes a mutable reference to the results to account for the - /// `extend` in timings - #[timed] - pub fn extend_from_dmap(&self, results: &mut Vec>) { - results.par_extend(self.dmap.par_iter().flat_map( - move |(filename, entry)| { - let filename: &HgPath = filename; - let filename_as_path = hg_path_to_path_buf(filename)?; - let meta = - self.root_dir.join(filename_as_path).symlink_metadata(); - - match meta { - Ok(ref m) - if !(m.file_type().is_file() - || m.file_type().is_symlink()) => - { - Ok(( - Cow::Borrowed(filename), - dispatch_missing(entry.state), - )) - } - Ok(m) => Ok(( - Cow::Borrowed(filename), - dispatch_found( - filename, - *entry, - HgMetadata::from_metadata(m), - &self.dmap.copy_map, - self.options, - ), - )), - Err(ref e) - if e.kind() == ErrorKind::NotFound - || e.raw_os_error() == Some(20) => - { - // Rust does not yet have an `ErrorKind` for - // `NotADirectory` (errno 20) - // It happens if the dirstate contains `foo/bar` - // and foo is not a - // directory - Ok(( - Cow::Borrowed(filename), - dispatch_missing(entry.state), - )) - } - Err(e) => Err(e), - } - }, - )); - } } #[timed] diff --git a/rust/hg-core/src/operations/dirstate_status.rs b/rust/hg-core/src/operations/dirstate_status.rs --- a/rust/hg-core/src/operations/dirstate_status.rs +++ b/rust/hg-core/src/operations/dirstate_status.rs @@ -14,6 +14,66 @@ use crate::{DirstateStatus, StatusError} /// files. pub type LookupAndStatus<'a> = (Vec>, DirstateStatus<'a>); +#[cfg(feature = "dirstate-tree")] +impl<'a, M: Matcher + Sync> Status<'a, M> { + pub(crate) fn run(&self) -> Result, StatusError> { + let (traversed_sender, traversed_receiver) = + crossbeam::channel::unbounded(); + + // Step 1: check the files explicitly mentioned by the user + let (work, mut results) = self.walk_explicit(traversed_sender.clone()); + + // Step 2: Check files in the dirstate + if !self.matcher.is_exact() { + self.extend_from_dmap(&mut results); + } + // Step 3: Check the working directory if listing unknowns + if !work.is_empty() { + // Hashmaps are quite a bit slower to build than vecs, so only + // build it if needed. + let mut old_results = None; + + // Step 2: recursively check the working directory for changes if + // needed + for (dir, dispatch) in work { + match dispatch { + Dispatch::Directory { was_file } => { + if was_file { + results.push((dir.to_owned(), Dispatch::Removed)); + } + if self.options.list_ignored + || self.options.list_unknown + && !self.dir_ignore(&dir) + { + if old_results.is_none() { + old_results = + Some(results.iter().cloned().collect()); + } + self.traverse( + &dir, + old_results + .as_ref() + .expect("old results should exist"), + &mut results, + traversed_sender.clone(), + )?; + } + } + _ => { + unreachable!("There can only be directories in `work`") + } + } + } + } + + drop(traversed_sender); + let traversed = traversed_receiver.into_iter().collect(); + + Ok(build_response(results, traversed)) + } +} + +#[cfg(not(feature = "dirstate-tree"))] impl<'a, M: Matcher + Sync> Status<'a, M> { pub(crate) fn run(&self) -> Result, StatusError> { let (traversed_sender, traversed_receiver) = diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -142,10 +142,10 @@ py_class!(pub class DirstateMap |py| { })?, ) .and_then(|b| Ok(b.to_py_object(py))) - .or_else(|_| { + .or_else(|e| { Err(PyErr::new::( py, - "Dirstate error".to_string(), + format!("Dirstate error: {}", e.to_string()), )) }) } @@ -549,12 +549,14 @@ impl DirstateMap { ) -> Ref<'a, RustDirstateMap> { self.inner(py).borrow() } + #[cfg(not(feature = "dirstate-tree"))] fn translate_key( py: Python, res: (&HgPathBuf, &DirstateEntry), ) -> PyResult> { Ok(Some(PyBytes::new(py, res.0.as_bytes()))) } + #[cfg(not(feature = "dirstate-tree"))] fn translate_key_value( py: Python, res: (&HgPathBuf, &DirstateEntry), @@ -562,7 +564,25 @@ impl DirstateMap { let (f, entry) = res; Ok(Some(( PyBytes::new(py, f.as_bytes()), - make_dirstate_tuple(py, entry)?, + make_dirstate_tuple(py, &entry)?, + ))) + } + #[cfg(feature = "dirstate-tree")] + fn translate_key( + py: Python, + res: (HgPathBuf, DirstateEntry), + ) -> PyResult> { + Ok(Some(PyBytes::new(py, res.0.as_bytes()))) + } + #[cfg(feature = "dirstate-tree")] + fn translate_key_value( + py: Python, + res: (HgPathBuf, DirstateEntry), + ) -> PyResult> { + let (f, entry) = res; + Ok(Some(( + PyBytes::new(py, f.as_bytes()), + make_dirstate_tuple(py, &entry)?, ))) } } diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs --- a/rust/hg-cpython/src/dirstate/status.rs +++ b/rust/hg-cpython/src/dirstate/status.rs @@ -159,7 +159,7 @@ pub fn status_wrapper( .collect(); let files = files?; - let matcher = FileMatcher::new(&files) + let matcher = FileMatcher::new(files.as_ref()) .map_err(|e| PyErr::new::(py, e.to_string()))?; let ((lookup, status_res), warnings) = status( &dmap, diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs --- a/rust/hg-cpython/src/parsers.rs +++ b/rust/hg-cpython/src/parsers.rs @@ -119,11 +119,11 @@ fn pack_dirstate_wrapper( Duration::from_secs(now.as_object().extract::(py)?), ) { Ok(packed) => { - for (filename, entry) in &dirstate_map { + for (filename, entry) in dirstate_map.iter() { dmap.set_item( py, PyBytes::new(py, filename.as_bytes()), - make_dirstate_tuple(py, entry)?, + make_dirstate_tuple(py, &entry)?, )?; } Ok(PyBytes::new(py, &packed))