##// END OF EJS Templates
tests: add some more debugmergestate calls in `test-merge-criss-cross.t`...
tests: add some more debugmergestate calls in `test-merge-criss-cross.t` Differential Revision: https://phab.mercurial-scm.org/D9029

File last commit:

r46136:b51167d7 default
r46160:8c02c626 default
Show More
tree.rs
661 lines | 22.2 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 // tree.rs
//
// Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
use super::iter::Iter;
use super::node::{Directory, Node, NodeKind};
use crate::dirstate::dirstate_tree::iter::FsIter;
use crate::dirstate::dirstate_tree::node::{InsertResult, RemoveResult};
use crate::utils::hg_path::{HgPath, HgPathBuf};
use crate::DirstateEntry;
use std::path::PathBuf;
/// A specialized tree to represent the Mercurial dirstate.
///
/// # Advantages over a flat structure
///
/// The dirstate is inherently hierarchical, since it's a representation of the
/// file structure of the project. The current dirstate format is flat, and
/// while that affords us potentially great (unordered) iteration speeds, the
/// need to retrieve a given path is great enough that you need some kind of
/// hashmap or tree in a lot of cases anyway.
///
/// Going with a tree allows us to be smarter:
/// - Skipping an ignored directory means we don't visit its entire subtree
/// - Security auditing does not need to reconstruct paths backwards to check
/// for symlinked directories, this can be done during the iteration in a
/// very efficient fashion
/// - We don't need to build the directory information in another struct,
/// simplifying the code a lot, reducing the memory footprint and
/// potentially going faster depending on the implementation.
/// - We can use it to store a (platform-dependent) caching mechanism [1]
/// - And probably other types of optimizations.
///
/// Only the first two items in this list are implemented as of this commit.
///
/// [1]: https://www.mercurial-scm.org/wiki/DirsCachePlan
///
///
/// # Structure
///
/// It's a prefix (radix) tree with no fixed arity, with a granularity of a
/// folder, allowing it to mimic a filesystem hierarchy:
///
/// ```text
/// foo/bar
/// foo/baz
/// test
/// ```
/// Will be represented (simplified) by:
///
/// ```text
/// Directory(root):
/// - File("test")
/// - Directory("foo"):
/// - File("bar")
/// - File("baz")
/// ```
///
/// Moreover, it is special-cased for storing the dirstate and as such handles
/// cases that a simple `HashMap` would handle, but while preserving the
/// hierarchy.
/// For example:
///
/// ```shell
/// $ touch foo
/// $ hg add foo
/// $ hg commit -m "foo"
/// $ hg remove foo
/// $ rm foo
/// $ mkdir foo
/// $ touch foo/a
/// $ hg add foo/a
/// $ hg status
/// R foo
/// A foo/a
/// ```
/// To represent this in a tree, one needs to keep track of whether any given
/// file was a directory and whether any given directory was a file at the last
/// dirstate update. This tree stores that information, but only in the right
/// circumstances by respecting the high-level rules that prevent nonsensical
/// structures to exist:
/// - a file can only be added as a child of another file if the latter is
/// marked as `Removed`
/// - a file cannot replace a folder unless all its descendents are removed
///
/// This second rule is not checked by the tree for performance reasons, and
/// because high-level logic already prevents that state from happening.
///
/// # Ordering
///
/// It makes no guarantee of ordering for now.
#[derive(Debug, Default, Clone, PartialEq)]
pub struct Tree {
pub root: Node,
files_count: usize,
}
impl Tree {
pub fn new() -> Self {
Self {
root: Node {
kind: NodeKind::Directory(Directory {
was_file: None,
children: Default::default(),
}),
},
files_count: 0,
}
}
/// How many files (not directories) are stored in the tree, including ones
/// marked as `Removed`.
pub fn len(&self) -> usize {
self.files_count
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Inserts a file in the tree and returns the previous entry if any.
pub fn insert(
&mut self,
path: impl AsRef<HgPath>,
kind: DirstateEntry,
) -> Option<DirstateEntry> {
let old = self.insert_node(path, kind);
match old?.kind {
NodeKind::Directory(_) => None,
NodeKind::File(f) => Some(f.entry),
}
}
/// Low-level insertion method that returns the previous node (directories
/// included).
fn insert_node(&mut self, path: impl AsRef<HgPath>, kind: DirstateEntry) -> Option<Node> {
let InsertResult {
did_insert,
old_entry,
} = self.root.insert(path.as_ref().as_bytes(), kind);
self.files_count += if did_insert { 1 } else { 0 };
old_entry
}
/// Returns a reference to a node if it exists.
pub fn get_node(&self, path: impl AsRef<HgPath>) -> Option<&Node> {
self.root.get(path.as_ref().as_bytes())
}
/// Returns a reference to the entry corresponding to `path` if it exists.
pub fn get(&self, path: impl AsRef<HgPath>) -> Option<&DirstateEntry> {
if let Some(node) = self.get_node(&path) {
return match &node.kind {
NodeKind::Directory(d) => d.was_file.as_ref().map(|f| &f.entry),
NodeKind::File(f) => Some(&f.entry),
};
}
None
}
/// Returns `true` if an entry is found for the given `path`.
pub fn contains_key(&self, path: impl AsRef<HgPath>) -> bool {
self.get(path).is_some()
}
/// Returns a mutable reference to the entry corresponding to `path` if it
/// exists.
pub fn get_mut(&mut self, path: impl AsRef<HgPath>) -> Option<&mut DirstateEntry> {
if let Some(kind) = self.root.get_mut(path.as_ref().as_bytes()) {
return match kind {
NodeKind::Directory(d) => d.was_file.as_mut().map(|f| &mut f.entry),
NodeKind::File(f) => Some(&mut f.entry),
};
}
None
}
/// Returns an iterator over the paths and corresponding entries in the
/// tree.
pub fn iter(&self) -> Iter {
Iter::new(&self.root)
}
/// Returns an iterator of all entries in the tree, with a special
/// filesystem handling for the directories containing said entries. See
/// the documentation of `FsIter` for more.
pub fn fs_iter(&self, root_dir: PathBuf) -> FsIter {
FsIter::new(&self.root, root_dir)
}
/// Remove the entry at `path` and returns it, if it exists.
pub fn remove(&mut self, path: impl AsRef<HgPath>) -> Option<DirstateEntry> {
let RemoveResult { old_entry, .. } = self.root.remove(path.as_ref().as_bytes());
self.files_count = self
.files_count
.checked_sub(if old_entry.is_some() { 1 } else { 0 })
.expect("removed too many files");
old_entry
}
}
impl<P: AsRef<HgPath>> Extend<(P, DirstateEntry)> for Tree {
fn extend<T: IntoIterator<Item = (P, DirstateEntry)>>(&mut self, iter: T) {
for (path, entry) in iter {
self.insert(path, entry);
}
}
}
impl<'a> IntoIterator for &'a Tree {
type Item = (HgPathBuf, DirstateEntry);
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dirstate::dirstate_tree::node::File;
use crate::{EntryState, FastHashMap};
use pretty_assertions::assert_eq;
impl Node {
/// Shortcut for getting children of a node in tests.
fn children(&self) -> Option<&FastHashMap<Vec<u8>, Node>> {
match &self.kind {
NodeKind::Directory(d) => Some(&d.children),
NodeKind::File(_) => None,
}
}
}
#[test]
fn test_dirstate_tree() {
let mut tree = Tree::new();
assert_eq!(
tree.insert_node(
HgPath::new(b"we/p"),
DirstateEntry {
state: EntryState::Normal,
mode: 0,
mtime: 0,
size: 0
}
),
None
);
dbg!(&tree);
assert!(tree.get_node(HgPath::new(b"we")).is_some());
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 41,
mtime: 42,
size: 43,
};
assert_eq!(tree.insert_node(HgPath::new(b"foo/bar"), entry), None);
assert_eq!(
tree.get_node(HgPath::new(b"foo/bar")),
Some(&Node {
kind: NodeKind::File(File {
was_directory: None,
entry
})
})
);
// We didn't override the first entry we made
assert!(tree.get_node(HgPath::new(b"we")).is_some(),);
// Inserting the same key again
assert_eq!(
tree.insert_node(HgPath::new(b"foo/bar"), entry),
Some(Node {
kind: NodeKind::File(File {
was_directory: None,
entry
}),
})
);
// Inserting the two levels deep
assert_eq!(tree.insert_node(HgPath::new(b"foo/bar/baz"), entry), None);
// Getting a file "inside a file" should return `None`
assert_eq!(tree.get_node(HgPath::new(b"foo/bar/baz/bap"),), None);
assert_eq!(
tree.insert_node(HgPath::new(b"wasdir/subfile"), entry),
None,
);
let removed_entry = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 0,
size: 0,
};
assert!(tree
.insert_node(HgPath::new(b"wasdir"), removed_entry)
.is_some());
assert_eq!(
tree.get_node(HgPath::new(b"wasdir")),
Some(&Node {
kind: NodeKind::File(File {
was_directory: Some(Box::new(Directory {
was_file: None,
children: [(
b"subfile".to_vec(),
Node {
kind: NodeKind::File(File {
was_directory: None,
entry,
})
}
)]
.to_vec()
.into_iter()
.collect()
})),
entry: removed_entry
})
})
);
assert!(tree.get(HgPath::new(b"wasdir/subfile")).is_some())
}
#[test]
fn test_insert_removed() {
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
let removed_entry = DirstateEntry {
state: EntryState::Removed,
mode: 10,
mtime: 20,
size: 30,
};
assert_eq!(tree.insert_node(HgPath::new(b"foo"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), removed_entry), None);
// The insert should not turn `foo` into a directory as `foo` is not
// `Removed`.
match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
NodeKind::Directory(_) => panic!("should be a file"),
NodeKind::File(_) => {}
}
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
let removed_entry = DirstateEntry {
state: EntryState::Removed,
mode: 10,
mtime: 20,
size: 30,
};
// The insert *should* turn `foo` into a directory as it is `Removed`.
assert_eq!(tree.insert_node(HgPath::new(b"foo"), removed_entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), entry), None);
match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
NodeKind::Directory(_) => {}
NodeKind::File(_) => panic!("should be a directory"),
}
}
#[test]
fn test_get() {
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
assert_eq!(tree.get(HgPath::new(b"a/b")), None);
assert_eq!(tree.get(HgPath::new(b"a")), None);
assert_eq!(tree.get(HgPath::new(b"a/b/c/d")), None);
let entry2 = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 5,
size: 1,
};
// was_directory
assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
let mut tree = Tree::new();
// was_file
assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
}
#[test]
fn test_get_mut() {
let mut tree = Tree::new();
let mut entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
assert_eq!(tree.get_mut(HgPath::new(b"a/b")), None);
assert_eq!(tree.get_mut(HgPath::new(b"a")), None);
assert_eq!(tree.get_mut(HgPath::new(b"a/b/c/d")), None);
let mut entry2 = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 5,
size: 1,
};
// was_directory
assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
let mut tree = Tree::new();
// was_file
assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
}
#[test]
fn test_remove() {
let mut tree = Tree::new();
assert_eq!(tree.files_count, 0);
assert_eq!(tree.remove(HgPath::new(b"foo")), None);
assert_eq!(tree.files_count, 0);
let entry = DirstateEntry {
state: EntryState::Normal,
mode: 0,
mtime: 0,
size: 0,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
assert_eq!(tree.files_count, 0);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/y"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/z"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"x"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"y"), entry), None);
assert_eq!(tree.files_count, 5);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
assert_eq!(tree.files_count, 4);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), None);
assert_eq!(tree.files_count, 4);
assert_eq!(tree.remove(HgPath::new(b"a/b/y")), Some(entry));
assert_eq!(tree.files_count, 3);
assert_eq!(tree.remove(HgPath::new(b"a/b/z")), Some(entry));
assert_eq!(tree.files_count, 2);
assert_eq!(tree.remove(HgPath::new(b"x")), Some(entry));
assert_eq!(tree.files_count, 1);
assert_eq!(tree.remove(HgPath::new(b"y")), Some(entry));
assert_eq!(tree.files_count, 0);
// `a` should have been cleaned up, no more files anywhere in its
// descendents
assert_eq!(tree.get_node(HgPath::new(b"a")), None);
assert_eq!(tree.root.children().unwrap().len(), 0);
let removed_entry = DirstateEntry {
state: EntryState::Removed,
..entry
};
assert_eq!(tree.insert(HgPath::new(b"a"), removed_entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a")), Some(removed_entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
assert_eq!(tree.files_count, 0);
// The entire tree should have been cleaned up, no more files anywhere
// in its descendents
assert_eq!(tree.root.children().unwrap().len(), 0);
let removed_entry = DirstateEntry {
state: EntryState::Removed,
..entry
};
assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), removed_entry), None);
assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a")), Some(entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(removed_entry));
assert_eq!(tree.files_count, 0);
dbg!(&tree);
// The entire tree should have been cleaned up, no more files anywhere
// in its descendents
assert_eq!(tree.root.children().unwrap().len(), 0);
assert_eq!(tree.insert(HgPath::new(b"d"), entry), None);
assert_eq!(tree.insert(HgPath::new(b"d/d/d"), entry), None);
assert_eq!(tree.files_count, 2);
// Deleting the nested file should not delete the top directory as it
// used to be a file
assert_eq!(tree.remove(HgPath::new(b"d/d/d")), Some(entry));
assert_eq!(tree.files_count, 1);
assert!(tree.get_node(HgPath::new(b"d")).is_some());
assert!(tree.remove(HgPath::new(b"d")).is_some());
assert_eq!(tree.files_count, 0);
// Deleting the nested file should not delete the top file (other way
// around from the last case)
assert_eq!(tree.insert(HgPath::new(b"a/a"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/a")), Some(entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
assert!(tree.get_node(HgPath::new(b"a")).is_some());
assert!(tree.get_node(HgPath::new(b"a/a")).is_none());
}
#[test]
fn test_was_directory() {
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 0,
size: 0,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert!(tree.insert_node(HgPath::new(b"a"), entry).is_some());
let new_a = tree.root.children().unwrap().get(&b"a".to_vec()).unwrap();
match &new_a.kind {
NodeKind::Directory(_) => panic!(),
NodeKind::File(f) => {
let dir = f.was_directory.clone().unwrap();
let c = dir
.children
.get(&b"b".to_vec())
.unwrap()
.children()
.unwrap()
.get(&b"c".to_vec())
.unwrap();
assert_eq!(
match &c.kind {
NodeKind::Directory(_) => panic!(),
NodeKind::File(f) => f.entry,
},
entry
);
}
}
assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
let a = tree.get_node(HgPath::new(b"a")).unwrap();
match &a.kind {
NodeKind::Directory(_) => panic!(),
NodeKind::File(f) => {
// Directory in `was_directory` was emptied, should be removed
assert_eq!(f.was_directory, None);
}
}
}
#[test]
fn test_extend() {
let insertions = [
(
HgPathBuf::from_bytes(b"d"),
DirstateEntry {
state: EntryState::Added,
mode: 0,
mtime: -1,
size: -1,
},
),
(
HgPathBuf::from_bytes(b"b"),
DirstateEntry {
state: EntryState::Normal,
mode: 33188,
mtime: 1599647984,
size: 2,
},
),
(
HgPathBuf::from_bytes(b"a/a"),
DirstateEntry {
state: EntryState::Normal,
mode: 33188,
mtime: 1599647984,
size: 2,
},
),
(
HgPathBuf::from_bytes(b"d/d/d"),
DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 0,
size: 0,
},
),
]
.to_vec();
let mut tree = Tree::new();
tree.extend(insertions.clone().into_iter());
for (path, _) in &insertions {
assert!(tree.contains_key(path), true);
}
assert_eq!(tree.files_count, 4);
}
}