// tree.rs // // Copyright 2020, Raphaël Gomès // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. use super::iter::Iter; use super::node::{Directory, Node, NodeKind}; use crate::dirstate::dirstate_tree::iter::FsIter; use crate::dirstate::dirstate_tree::node::{InsertResult, RemoveResult}; use crate::utils::hg_path::{HgPath, HgPathBuf}; use crate::DirstateEntry; use std::path::PathBuf; /// A specialized tree to represent the Mercurial dirstate. /// /// # Advantages over a flat structure /// /// The dirstate is inherently hierarchical, since it's a representation of the /// file structure of the project. The current dirstate format is flat, and /// while that affords us potentially great (unordered) iteration speeds, the /// need to retrieve a given path is great enough that you need some kind of /// hashmap or tree in a lot of cases anyway. /// /// Going with a tree allows us to be smarter: /// - Skipping an ignored directory means we don't visit its entire subtree /// - Security auditing does not need to reconstruct paths backwards to check /// for symlinked directories, this can be done during the iteration in a /// very efficient fashion /// - We don't need to build the directory information in another struct, /// simplifying the code a lot, reducing the memory footprint and /// potentially going faster depending on the implementation. /// - We can use it to store a (platform-dependent) caching mechanism [1] /// - And probably other types of optimizations. /// /// Only the first two items in this list are implemented as of this commit. /// /// [1]: https://www.mercurial-scm.org/wiki/DirsCachePlan /// /// /// # Structure /// /// It's a prefix (radix) tree with no fixed arity, with a granularity of a /// folder, allowing it to mimic a filesystem hierarchy: /// /// ```text /// foo/bar /// foo/baz /// test /// ``` /// Will be represented (simplified) by: /// /// ```text /// Directory(root): /// - File("test") /// - Directory("foo"): /// - File("bar") /// - File("baz") /// ``` /// /// Moreover, it is special-cased for storing the dirstate and as such handles /// cases that a simple `HashMap` would handle, but while preserving the /// hierarchy. /// For example: /// /// ```shell /// $ touch foo /// $ hg add foo /// $ hg commit -m "foo" /// $ hg remove foo /// $ rm foo /// $ mkdir foo /// $ touch foo/a /// $ hg add foo/a /// $ hg status /// R foo /// A foo/a /// ``` /// To represent this in a tree, one needs to keep track of whether any given /// file was a directory and whether any given directory was a file at the last /// dirstate update. This tree stores that information, but only in the right /// circumstances by respecting the high-level rules that prevent nonsensical /// structures to exist: /// - a file can only be added as a child of another file if the latter is /// marked as `Removed` /// - a file cannot replace a folder unless all its descendents are removed /// /// This second rule is not checked by the tree for performance reasons, and /// because high-level logic already prevents that state from happening. /// /// # Ordering /// /// It makes no guarantee of ordering for now. #[derive(Debug, Default, Clone, PartialEq)] pub struct Tree { pub root: Node, files_count: usize, } impl Tree { pub fn new() -> Self { Self { root: Node { kind: NodeKind::Directory(Directory { was_file: None, children: Default::default(), }), }, files_count: 0, } } /// How many files (not directories) are stored in the tree, including ones /// marked as `Removed`. pub fn len(&self) -> usize { self.files_count } pub fn is_empty(&self) -> bool { self.len() == 0 } /// Inserts a file in the tree and returns the previous entry if any. pub fn insert( &mut self, path: impl AsRef, kind: DirstateEntry, ) -> Option { let old = self.insert_node(path, kind); match old?.kind { NodeKind::Directory(_) => None, NodeKind::File(f) => Some(f.entry), } } /// Low-level insertion method that returns the previous node (directories /// included). fn insert_node( &mut self, path: impl AsRef, kind: DirstateEntry, ) -> Option { let InsertResult { did_insert, old_entry, } = self.root.insert(path.as_ref().as_bytes(), kind); self.files_count += if did_insert { 1 } else { 0 }; old_entry } /// Returns a reference to a node if it exists. pub fn get_node(&self, path: impl AsRef) -> Option<&Node> { self.root.get(path.as_ref().as_bytes()) } /// Returns a reference to the entry corresponding to `path` if it exists. pub fn get(&self, path: impl AsRef) -> Option<&DirstateEntry> { if let Some(node) = self.get_node(&path) { return match &node.kind { NodeKind::Directory(d) => { d.was_file.as_ref().map(|f| &f.entry) } NodeKind::File(f) => Some(&f.entry), }; } None } /// Returns `true` if an entry is found for the given `path`. pub fn contains_key(&self, path: impl AsRef) -> bool { self.get(path).is_some() } /// Returns a mutable reference to the entry corresponding to `path` if it /// exists. pub fn get_mut( &mut self, path: impl AsRef, ) -> Option<&mut DirstateEntry> { if let Some(kind) = self.root.get_mut(path.as_ref().as_bytes()) { return match kind { NodeKind::Directory(d) => { d.was_file.as_mut().map(|f| &mut f.entry) } NodeKind::File(f) => Some(&mut f.entry), }; } None } /// Returns an iterator over the paths and corresponding entries in the /// tree. pub fn iter(&self) -> Iter { Iter::new(&self.root) } /// Returns an iterator of all entries in the tree, with a special /// filesystem handling for the directories containing said entries. See /// the documentation of `FsIter` for more. pub fn fs_iter(&self, root_dir: PathBuf) -> FsIter { FsIter::new(&self.root, root_dir) } /// Remove the entry at `path` and returns it, if it exists. pub fn remove( &mut self, path: impl AsRef, ) -> Option { let RemoveResult { old_entry, .. } = self.root.remove(path.as_ref().as_bytes()); self.files_count = self .files_count .checked_sub(if old_entry.is_some() { 1 } else { 0 }) .expect("removed too many files"); old_entry } } impl> Extend<(P, DirstateEntry)> for Tree { fn extend>(&mut self, iter: T) { for (path, entry) in iter { self.insert(path, entry); } } } impl<'a> IntoIterator for &'a Tree { type Item = (HgPathBuf, DirstateEntry); type IntoIter = Iter<'a>; fn into_iter(self) -> Self::IntoIter { self.iter() } } #[cfg(test)] mod tests { use super::*; use crate::dirstate::dirstate_tree::node::File; use crate::{EntryState, FastHashMap}; use pretty_assertions::assert_eq; impl Node { /// Shortcut for getting children of a node in tests. fn children(&self) -> Option<&FastHashMap, Node>> { match &self.kind { NodeKind::Directory(d) => Some(&d.children), NodeKind::File(_) => None, } } } #[test] fn test_dirstate_tree() { let mut tree = Tree::new(); assert_eq!( tree.insert_node( HgPath::new(b"we/p"), DirstateEntry { state: EntryState::Normal, mode: 0, mtime: 0, size: 0 } ), None ); dbg!(&tree); assert!(tree.get_node(HgPath::new(b"we")).is_some()); let entry = DirstateEntry { state: EntryState::Merged, mode: 41, mtime: 42, size: 43, }; assert_eq!(tree.insert_node(HgPath::new(b"foo/bar"), entry), None); assert_eq!( tree.get_node(HgPath::new(b"foo/bar")), Some(&Node { kind: NodeKind::File(File { was_directory: None, entry }) }) ); // We didn't override the first entry we made assert!(tree.get_node(HgPath::new(b"we")).is_some(),); // Inserting the same key again assert_eq!( tree.insert_node(HgPath::new(b"foo/bar"), entry), Some(Node { kind: NodeKind::File(File { was_directory: None, entry }), }) ); // Inserting the two levels deep assert_eq!(tree.insert_node(HgPath::new(b"foo/bar/baz"), entry), None); // Getting a file "inside a file" should return `None` assert_eq!(tree.get_node(HgPath::new(b"foo/bar/baz/bap"),), None); assert_eq!( tree.insert_node(HgPath::new(b"wasdir/subfile"), entry), None, ); let removed_entry = DirstateEntry { state: EntryState::Removed, mode: 0, mtime: 0, size: 0, }; assert!(tree .insert_node(HgPath::new(b"wasdir"), removed_entry) .is_some()); assert_eq!( tree.get_node(HgPath::new(b"wasdir")), Some(&Node { kind: NodeKind::File(File { was_directory: Some(Box::new(Directory { was_file: None, children: [( b"subfile".to_vec(), Node { kind: NodeKind::File(File { was_directory: None, entry, }) } )] .to_vec() .into_iter() .collect() })), entry: removed_entry }) }) ); assert!(tree.get(HgPath::new(b"wasdir/subfile")).is_some()) } #[test] fn test_insert_removed() { let mut tree = Tree::new(); let entry = DirstateEntry { state: EntryState::Merged, mode: 1, mtime: 2, size: 3, }; let removed_entry = DirstateEntry { state: EntryState::Removed, mode: 10, mtime: 20, size: 30, }; assert_eq!(tree.insert_node(HgPath::new(b"foo"), entry), None); assert_eq!( tree.insert_node(HgPath::new(b"foo/a"), removed_entry), None ); // The insert should not turn `foo` into a directory as `foo` is not // `Removed`. match tree.get_node(HgPath::new(b"foo")).unwrap().kind { NodeKind::Directory(_) => panic!("should be a file"), NodeKind::File(_) => {} } let mut tree = Tree::new(); let entry = DirstateEntry { state: EntryState::Merged, mode: 1, mtime: 2, size: 3, }; let removed_entry = DirstateEntry { state: EntryState::Removed, mode: 10, mtime: 20, size: 30, }; // The insert *should* turn `foo` into a directory as it is `Removed`. assert_eq!(tree.insert_node(HgPath::new(b"foo"), removed_entry), None); assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), entry), None); match tree.get_node(HgPath::new(b"foo")).unwrap().kind { NodeKind::Directory(_) => {} NodeKind::File(_) => panic!("should be a directory"), } } #[test] fn test_get() { let mut tree = Tree::new(); let entry = DirstateEntry { state: EntryState::Merged, mode: 1, mtime: 2, size: 3, }; assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); assert_eq!(tree.files_count, 1); assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry)); assert_eq!(tree.get(HgPath::new(b"a/b")), None); assert_eq!(tree.get(HgPath::new(b"a")), None); assert_eq!(tree.get(HgPath::new(b"a/b/c/d")), None); let entry2 = DirstateEntry { state: EntryState::Removed, mode: 0, mtime: 5, size: 1, }; // was_directory assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None); assert_eq!(tree.files_count, 2); assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2)); assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry)); let mut tree = Tree::new(); // was_file assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None); assert_eq!(tree.files_count, 1); assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None); assert_eq!(tree.files_count, 2); assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2)); } #[test] fn test_get_mut() { let mut tree = Tree::new(); let mut entry = DirstateEntry { state: EntryState::Merged, mode: 1, mtime: 2, size: 3, }; assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); assert_eq!(tree.files_count, 1); assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry)); assert_eq!(tree.get_mut(HgPath::new(b"a/b")), None); assert_eq!(tree.get_mut(HgPath::new(b"a")), None); assert_eq!(tree.get_mut(HgPath::new(b"a/b/c/d")), None); let mut entry2 = DirstateEntry { state: EntryState::Removed, mode: 0, mtime: 5, size: 1, }; // was_directory assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None); assert_eq!(tree.files_count, 2); assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2)); assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry)); let mut tree = Tree::new(); // was_file assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None); assert_eq!(tree.files_count, 1); assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None); assert_eq!(tree.files_count, 2); assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2)); } #[test] fn test_remove() { let mut tree = Tree::new(); assert_eq!(tree.files_count, 0); assert_eq!(tree.remove(HgPath::new(b"foo")), None); assert_eq!(tree.files_count, 0); let entry = DirstateEntry { state: EntryState::Normal, mode: 0, mtime: 0, size: 0, }; assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); assert_eq!(tree.files_count, 1); assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry)); assert_eq!(tree.files_count, 0); assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None); assert_eq!(tree.insert_node(HgPath::new(b"a/b/y"), entry), None); assert_eq!(tree.insert_node(HgPath::new(b"a/b/z"), entry), None); assert_eq!(tree.insert_node(HgPath::new(b"x"), entry), None); assert_eq!(tree.insert_node(HgPath::new(b"y"), entry), None); assert_eq!(tree.files_count, 5); assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry)); assert_eq!(tree.files_count, 4); assert_eq!(tree.remove(HgPath::new(b"a/b/x")), None); assert_eq!(tree.files_count, 4); assert_eq!(tree.remove(HgPath::new(b"a/b/y")), Some(entry)); assert_eq!(tree.files_count, 3); assert_eq!(tree.remove(HgPath::new(b"a/b/z")), Some(entry)); assert_eq!(tree.files_count, 2); assert_eq!(tree.remove(HgPath::new(b"x")), Some(entry)); assert_eq!(tree.files_count, 1); assert_eq!(tree.remove(HgPath::new(b"y")), Some(entry)); assert_eq!(tree.files_count, 0); // `a` should have been cleaned up, no more files anywhere in its // descendents assert_eq!(tree.get_node(HgPath::new(b"a")), None); assert_eq!(tree.root.children().unwrap().len(), 0); let removed_entry = DirstateEntry { state: EntryState::Removed, ..entry }; assert_eq!(tree.insert(HgPath::new(b"a"), removed_entry), None); assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None); assert_eq!(tree.files_count, 2); dbg!(&tree); assert_eq!(tree.remove(HgPath::new(b"a")), Some(removed_entry)); assert_eq!(tree.files_count, 1); dbg!(&tree); assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry)); assert_eq!(tree.files_count, 0); // The entire tree should have been cleaned up, no more files anywhere // in its descendents assert_eq!(tree.root.children().unwrap().len(), 0); let removed_entry = DirstateEntry { state: EntryState::Removed, ..entry }; assert_eq!(tree.insert(HgPath::new(b"a"), entry), None); assert_eq!( tree.insert_node(HgPath::new(b"a/b/x"), removed_entry), None ); assert_eq!(tree.files_count, 2); dbg!(&tree); assert_eq!(tree.remove(HgPath::new(b"a")), Some(entry)); assert_eq!(tree.files_count, 1); dbg!(&tree); assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(removed_entry)); assert_eq!(tree.files_count, 0); dbg!(&tree); // The entire tree should have been cleaned up, no more files anywhere // in its descendents assert_eq!(tree.root.children().unwrap().len(), 0); assert_eq!(tree.insert(HgPath::new(b"d"), entry), None); assert_eq!(tree.insert(HgPath::new(b"d/d/d"), entry), None); assert_eq!(tree.files_count, 2); // Deleting the nested file should not delete the top directory as it // used to be a file assert_eq!(tree.remove(HgPath::new(b"d/d/d")), Some(entry)); assert_eq!(tree.files_count, 1); assert!(tree.get_node(HgPath::new(b"d")).is_some()); assert!(tree.remove(HgPath::new(b"d")).is_some()); assert_eq!(tree.files_count, 0); // Deleting the nested file should not delete the top file (other way // around from the last case) assert_eq!(tree.insert(HgPath::new(b"a/a"), entry), None); assert_eq!(tree.files_count, 1); assert_eq!(tree.insert(HgPath::new(b"a"), entry), None); assert_eq!(tree.files_count, 2); dbg!(&tree); assert_eq!(tree.remove(HgPath::new(b"a/a")), Some(entry)); assert_eq!(tree.files_count, 1); dbg!(&tree); assert!(tree.get_node(HgPath::new(b"a")).is_some()); assert!(tree.get_node(HgPath::new(b"a/a")).is_none()); } #[test] fn test_was_directory() { let mut tree = Tree::new(); let entry = DirstateEntry { state: EntryState::Removed, mode: 0, mtime: 0, size: 0, }; assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); assert_eq!(tree.files_count, 1); assert!(tree.insert_node(HgPath::new(b"a"), entry).is_some()); let new_a = tree.root.children().unwrap().get(&b"a".to_vec()).unwrap(); match &new_a.kind { NodeKind::Directory(_) => panic!(), NodeKind::File(f) => { let dir = f.was_directory.clone().unwrap(); let c = dir .children .get(&b"b".to_vec()) .unwrap() .children() .unwrap() .get(&b"c".to_vec()) .unwrap(); assert_eq!( match &c.kind { NodeKind::Directory(_) => panic!(), NodeKind::File(f) => f.entry, }, entry ); } } assert_eq!(tree.files_count, 2); dbg!(&tree); assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry)); assert_eq!(tree.files_count, 1); dbg!(&tree); let a = tree.get_node(HgPath::new(b"a")).unwrap(); match &a.kind { NodeKind::Directory(_) => panic!(), NodeKind::File(f) => { // Directory in `was_directory` was emptied, should be removed assert_eq!(f.was_directory, None); } } } #[test] fn test_extend() { let insertions = [ ( HgPathBuf::from_bytes(b"d"), DirstateEntry { state: EntryState::Added, mode: 0, mtime: -1, size: -1, }, ), ( HgPathBuf::from_bytes(b"b"), DirstateEntry { state: EntryState::Normal, mode: 33188, mtime: 1599647984, size: 2, }, ), ( HgPathBuf::from_bytes(b"a/a"), DirstateEntry { state: EntryState::Normal, mode: 33188, mtime: 1599647984, size: 2, }, ), ( HgPathBuf::from_bytes(b"d/d/d"), DirstateEntry { state: EntryState::Removed, mode: 0, mtime: 0, size: 0, }, ), ] .to_vec(); let mut tree = Tree::new(); tree.extend(insertions.clone().into_iter()); for (path, _) in &insertions { assert!(tree.contains_key(path), true); } assert_eq!(tree.files_count, 4); } }