##// END OF EJS Templates
upgrade: implement partial upgrade for upgrading persistent-nodemap...
upgrade: implement partial upgrade for upgrading persistent-nodemap Upgrading repositories to use persistent nodemap should be fast and easy as it requires only two things: 1) Updating the requirements 2) Writing a persistent-nodemap on disk For both of the steps above, we don't need to edit existing revlogs. This patch makes upgrade only do the above mentioned two steps if we are only upgarding to use persistent-nodemap feature. Since `nodemap.persist_nodemap()` assumes that there exists a nodemap file for the given revlog if we are trying to call it, this patch adds `force` argument to create a file if does not exist which is true in our upgrade case. The test changes demonstrate that we no longer write nodemap files for manifest after upgrade which I think is desirable. Differential Revision: https://phab.mercurial-scm.org/D9936

File last commit:

r46301:91fafafd default
r47199:98e39f04 default
Show More
tree.rs
682 lines | 22.4 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 // tree.rs
//
// Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
use super::iter::Iter;
use super::node::{Directory, Node, NodeKind};
use crate::dirstate::dirstate_tree::iter::FsIter;
use crate::dirstate::dirstate_tree::node::{InsertResult, RemoveResult};
use crate::utils::hg_path::{HgPath, HgPathBuf};
use crate::DirstateEntry;
use std::path::PathBuf;
/// A specialized tree to represent the Mercurial dirstate.
///
/// # Advantages over a flat structure
///
/// The dirstate is inherently hierarchical, since it's a representation of the
/// file structure of the project. The current dirstate format is flat, and
/// while that affords us potentially great (unordered) iteration speeds, the
/// need to retrieve a given path is great enough that you need some kind of
/// hashmap or tree in a lot of cases anyway.
///
/// Going with a tree allows us to be smarter:
/// - Skipping an ignored directory means we don't visit its entire subtree
/// - Security auditing does not need to reconstruct paths backwards to check
/// for symlinked directories, this can be done during the iteration in a
/// very efficient fashion
/// - We don't need to build the directory information in another struct,
/// simplifying the code a lot, reducing the memory footprint and
/// potentially going faster depending on the implementation.
/// - We can use it to store a (platform-dependent) caching mechanism [1]
/// - And probably other types of optimizations.
///
/// Only the first two items in this list are implemented as of this commit.
///
/// [1]: https://www.mercurial-scm.org/wiki/DirsCachePlan
rust: cleanup some white space in a dock...
r46301 ///
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 ///
/// # Structure
///
/// It's a prefix (radix) tree with no fixed arity, with a granularity of a
/// folder, allowing it to mimic a filesystem hierarchy:
///
/// ```text
/// foo/bar
/// foo/baz
/// test
/// ```
/// Will be represented (simplified) by:
///
/// ```text
/// Directory(root):
/// - File("test")
/// - Directory("foo"):
/// - File("bar")
/// - File("baz")
/// ```
///
/// Moreover, it is special-cased for storing the dirstate and as such handles
/// cases that a simple `HashMap` would handle, but while preserving the
/// hierarchy.
/// For example:
///
/// ```shell
/// $ touch foo
/// $ hg add foo
/// $ hg commit -m "foo"
/// $ hg remove foo
/// $ rm foo
/// $ mkdir foo
/// $ touch foo/a
/// $ hg add foo/a
/// $ hg status
/// R foo
/// A foo/a
/// ```
/// To represent this in a tree, one needs to keep track of whether any given
/// file was a directory and whether any given directory was a file at the last
/// dirstate update. This tree stores that information, but only in the right
/// circumstances by respecting the high-level rules that prevent nonsensical
/// structures to exist:
/// - a file can only be added as a child of another file if the latter is
/// marked as `Removed`
/// - a file cannot replace a folder unless all its descendents are removed
///
/// This second rule is not checked by the tree for performance reasons, and
/// because high-level logic already prevents that state from happening.
///
/// # Ordering
///
/// It makes no guarantee of ordering for now.
#[derive(Debug, Default, Clone, PartialEq)]
pub struct Tree {
pub root: Node,
files_count: usize,
}
impl Tree {
pub fn new() -> Self {
Self {
root: Node {
kind: NodeKind::Directory(Directory {
was_file: None,
children: Default::default(),
}),
},
files_count: 0,
}
}
/// How many files (not directories) are stored in the tree, including ones
/// marked as `Removed`.
pub fn len(&self) -> usize {
self.files_count
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Inserts a file in the tree and returns the previous entry if any.
pub fn insert(
&mut self,
path: impl AsRef<HgPath>,
kind: DirstateEntry,
) -> Option<DirstateEntry> {
let old = self.insert_node(path, kind);
match old?.kind {
NodeKind::Directory(_) => None,
NodeKind::File(f) => Some(f.entry),
}
}
/// Low-level insertion method that returns the previous node (directories
/// included).
Raphaël Gomès
rust: format with rustfmt...
r46162 fn insert_node(
&mut self,
path: impl AsRef<HgPath>,
kind: DirstateEntry,
) -> Option<Node> {
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 let InsertResult {
did_insert,
old_entry,
} = self.root.insert(path.as_ref().as_bytes(), kind);
self.files_count += if did_insert { 1 } else { 0 };
old_entry
}
/// Returns a reference to a node if it exists.
pub fn get_node(&self, path: impl AsRef<HgPath>) -> Option<&Node> {
self.root.get(path.as_ref().as_bytes())
}
/// Returns a reference to the entry corresponding to `path` if it exists.
pub fn get(&self, path: impl AsRef<HgPath>) -> Option<&DirstateEntry> {
if let Some(node) = self.get_node(&path) {
return match &node.kind {
Raphaël Gomès
rust: format with rustfmt...
r46162 NodeKind::Directory(d) => {
d.was_file.as_ref().map(|f| &f.entry)
}
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 NodeKind::File(f) => Some(&f.entry),
};
}
None
}
/// Returns `true` if an entry is found for the given `path`.
pub fn contains_key(&self, path: impl AsRef<HgPath>) -> bool {
self.get(path).is_some()
}
/// Returns a mutable reference to the entry corresponding to `path` if it
/// exists.
Raphaël Gomès
rust: format with rustfmt...
r46162 pub fn get_mut(
&mut self,
path: impl AsRef<HgPath>,
) -> Option<&mut DirstateEntry> {
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 if let Some(kind) = self.root.get_mut(path.as_ref().as_bytes()) {
return match kind {
Raphaël Gomès
rust: format with rustfmt...
r46162 NodeKind::Directory(d) => {
d.was_file.as_mut().map(|f| &mut f.entry)
}
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 NodeKind::File(f) => Some(&mut f.entry),
};
}
None
}
/// Returns an iterator over the paths and corresponding entries in the
/// tree.
pub fn iter(&self) -> Iter {
Iter::new(&self.root)
}
/// Returns an iterator of all entries in the tree, with a special
/// filesystem handling for the directories containing said entries. See
/// the documentation of `FsIter` for more.
pub fn fs_iter(&self, root_dir: PathBuf) -> FsIter {
FsIter::new(&self.root, root_dir)
}
/// Remove the entry at `path` and returns it, if it exists.
Raphaël Gomès
rust: format with rustfmt...
r46162 pub fn remove(
&mut self,
path: impl AsRef<HgPath>,
) -> Option<DirstateEntry> {
let RemoveResult { old_entry, .. } =
self.root.remove(path.as_ref().as_bytes());
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 self.files_count = self
.files_count
.checked_sub(if old_entry.is_some() { 1 } else { 0 })
.expect("removed too many files");
old_entry
}
}
impl<P: AsRef<HgPath>> Extend<(P, DirstateEntry)> for Tree {
fn extend<T: IntoIterator<Item = (P, DirstateEntry)>>(&mut self, iter: T) {
for (path, entry) in iter {
self.insert(path, entry);
}
}
}
impl<'a> IntoIterator for &'a Tree {
type Item = (HgPathBuf, DirstateEntry);
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dirstate::dirstate_tree::node::File;
use crate::{EntryState, FastHashMap};
use pretty_assertions::assert_eq;
impl Node {
/// Shortcut for getting children of a node in tests.
fn children(&self) -> Option<&FastHashMap<Vec<u8>, Node>> {
match &self.kind {
NodeKind::Directory(d) => Some(&d.children),
NodeKind::File(_) => None,
}
}
}
#[test]
fn test_dirstate_tree() {
let mut tree = Tree::new();
assert_eq!(
tree.insert_node(
HgPath::new(b"we/p"),
DirstateEntry {
state: EntryState::Normal,
mode: 0,
mtime: 0,
size: 0
}
),
None
);
dbg!(&tree);
assert!(tree.get_node(HgPath::new(b"we")).is_some());
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 41,
mtime: 42,
size: 43,
};
assert_eq!(tree.insert_node(HgPath::new(b"foo/bar"), entry), None);
assert_eq!(
tree.get_node(HgPath::new(b"foo/bar")),
Some(&Node {
kind: NodeKind::File(File {
was_directory: None,
entry
})
})
);
// We didn't override the first entry we made
assert!(tree.get_node(HgPath::new(b"we")).is_some(),);
// Inserting the same key again
assert_eq!(
tree.insert_node(HgPath::new(b"foo/bar"), entry),
Some(Node {
kind: NodeKind::File(File {
was_directory: None,
entry
}),
})
);
// Inserting the two levels deep
assert_eq!(tree.insert_node(HgPath::new(b"foo/bar/baz"), entry), None);
// Getting a file "inside a file" should return `None`
assert_eq!(tree.get_node(HgPath::new(b"foo/bar/baz/bap"),), None);
assert_eq!(
tree.insert_node(HgPath::new(b"wasdir/subfile"), entry),
None,
);
let removed_entry = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 0,
size: 0,
};
assert!(tree
.insert_node(HgPath::new(b"wasdir"), removed_entry)
.is_some());
assert_eq!(
tree.get_node(HgPath::new(b"wasdir")),
Some(&Node {
kind: NodeKind::File(File {
was_directory: Some(Box::new(Directory {
was_file: None,
children: [(
b"subfile".to_vec(),
Node {
kind: NodeKind::File(File {
was_directory: None,
entry,
})
}
)]
.to_vec()
.into_iter()
.collect()
})),
entry: removed_entry
})
})
);
assert!(tree.get(HgPath::new(b"wasdir/subfile")).is_some())
}
#[test]
fn test_insert_removed() {
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
let removed_entry = DirstateEntry {
state: EntryState::Removed,
mode: 10,
mtime: 20,
size: 30,
};
assert_eq!(tree.insert_node(HgPath::new(b"foo"), entry), None);
Raphaël Gomès
rust: format with rustfmt...
r46162 assert_eq!(
tree.insert_node(HgPath::new(b"foo/a"), removed_entry),
None
);
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 // The insert should not turn `foo` into a directory as `foo` is not
// `Removed`.
match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
NodeKind::Directory(_) => panic!("should be a file"),
NodeKind::File(_) => {}
}
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
let removed_entry = DirstateEntry {
state: EntryState::Removed,
mode: 10,
mtime: 20,
size: 30,
};
// The insert *should* turn `foo` into a directory as it is `Removed`.
assert_eq!(tree.insert_node(HgPath::new(b"foo"), removed_entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), entry), None);
match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
NodeKind::Directory(_) => {}
NodeKind::File(_) => panic!("should be a directory"),
}
}
#[test]
fn test_get() {
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
assert_eq!(tree.get(HgPath::new(b"a/b")), None);
assert_eq!(tree.get(HgPath::new(b"a")), None);
assert_eq!(tree.get(HgPath::new(b"a/b/c/d")), None);
let entry2 = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 5,
size: 1,
};
// was_directory
assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
let mut tree = Tree::new();
// was_file
assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
}
#[test]
fn test_get_mut() {
let mut tree = Tree::new();
let mut entry = DirstateEntry {
state: EntryState::Merged,
mode: 1,
mtime: 2,
size: 3,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
assert_eq!(tree.get_mut(HgPath::new(b"a/b")), None);
assert_eq!(tree.get_mut(HgPath::new(b"a")), None);
assert_eq!(tree.get_mut(HgPath::new(b"a/b/c/d")), None);
let mut entry2 = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 5,
size: 1,
};
// was_directory
assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
let mut tree = Tree::new();
// was_file
assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
assert_eq!(tree.files_count, 2);
assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
}
#[test]
fn test_remove() {
let mut tree = Tree::new();
assert_eq!(tree.files_count, 0);
assert_eq!(tree.remove(HgPath::new(b"foo")), None);
assert_eq!(tree.files_count, 0);
let entry = DirstateEntry {
state: EntryState::Normal,
mode: 0,
mtime: 0,
size: 0,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
assert_eq!(tree.files_count, 0);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/y"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/z"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"x"), entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"y"), entry), None);
assert_eq!(tree.files_count, 5);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
assert_eq!(tree.files_count, 4);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), None);
assert_eq!(tree.files_count, 4);
assert_eq!(tree.remove(HgPath::new(b"a/b/y")), Some(entry));
assert_eq!(tree.files_count, 3);
assert_eq!(tree.remove(HgPath::new(b"a/b/z")), Some(entry));
assert_eq!(tree.files_count, 2);
assert_eq!(tree.remove(HgPath::new(b"x")), Some(entry));
assert_eq!(tree.files_count, 1);
assert_eq!(tree.remove(HgPath::new(b"y")), Some(entry));
assert_eq!(tree.files_count, 0);
// `a` should have been cleaned up, no more files anywhere in its
// descendents
assert_eq!(tree.get_node(HgPath::new(b"a")), None);
assert_eq!(tree.root.children().unwrap().len(), 0);
let removed_entry = DirstateEntry {
state: EntryState::Removed,
..entry
};
assert_eq!(tree.insert(HgPath::new(b"a"), removed_entry), None);
assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a")), Some(removed_entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
assert_eq!(tree.files_count, 0);
// The entire tree should have been cleaned up, no more files anywhere
// in its descendents
assert_eq!(tree.root.children().unwrap().len(), 0);
let removed_entry = DirstateEntry {
state: EntryState::Removed,
..entry
};
assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
Raphaël Gomès
rust: format with rustfmt...
r46162 assert_eq!(
tree.insert_node(HgPath::new(b"a/b/x"), removed_entry),
None
);
Raphaël Gomès
rust: add `dirstate_tree` module...
r46136 assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a")), Some(entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(removed_entry));
assert_eq!(tree.files_count, 0);
dbg!(&tree);
// The entire tree should have been cleaned up, no more files anywhere
// in its descendents
assert_eq!(tree.root.children().unwrap().len(), 0);
assert_eq!(tree.insert(HgPath::new(b"d"), entry), None);
assert_eq!(tree.insert(HgPath::new(b"d/d/d"), entry), None);
assert_eq!(tree.files_count, 2);
// Deleting the nested file should not delete the top directory as it
// used to be a file
assert_eq!(tree.remove(HgPath::new(b"d/d/d")), Some(entry));
assert_eq!(tree.files_count, 1);
assert!(tree.get_node(HgPath::new(b"d")).is_some());
assert!(tree.remove(HgPath::new(b"d")).is_some());
assert_eq!(tree.files_count, 0);
// Deleting the nested file should not delete the top file (other way
// around from the last case)
assert_eq!(tree.insert(HgPath::new(b"a/a"), entry), None);
assert_eq!(tree.files_count, 1);
assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/a")), Some(entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
assert!(tree.get_node(HgPath::new(b"a")).is_some());
assert!(tree.get_node(HgPath::new(b"a/a")).is_none());
}
#[test]
fn test_was_directory() {
let mut tree = Tree::new();
let entry = DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 0,
size: 0,
};
assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
assert_eq!(tree.files_count, 1);
assert!(tree.insert_node(HgPath::new(b"a"), entry).is_some());
let new_a = tree.root.children().unwrap().get(&b"a".to_vec()).unwrap();
match &new_a.kind {
NodeKind::Directory(_) => panic!(),
NodeKind::File(f) => {
let dir = f.was_directory.clone().unwrap();
let c = dir
.children
.get(&b"b".to_vec())
.unwrap()
.children()
.unwrap()
.get(&b"c".to_vec())
.unwrap();
assert_eq!(
match &c.kind {
NodeKind::Directory(_) => panic!(),
NodeKind::File(f) => f.entry,
},
entry
);
}
}
assert_eq!(tree.files_count, 2);
dbg!(&tree);
assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
assert_eq!(tree.files_count, 1);
dbg!(&tree);
let a = tree.get_node(HgPath::new(b"a")).unwrap();
match &a.kind {
NodeKind::Directory(_) => panic!(),
NodeKind::File(f) => {
// Directory in `was_directory` was emptied, should be removed
assert_eq!(f.was_directory, None);
}
}
}
#[test]
fn test_extend() {
let insertions = [
(
HgPathBuf::from_bytes(b"d"),
DirstateEntry {
state: EntryState::Added,
mode: 0,
mtime: -1,
size: -1,
},
),
(
HgPathBuf::from_bytes(b"b"),
DirstateEntry {
state: EntryState::Normal,
mode: 33188,
mtime: 1599647984,
size: 2,
},
),
(
HgPathBuf::from_bytes(b"a/a"),
DirstateEntry {
state: EntryState::Normal,
mode: 33188,
mtime: 1599647984,
size: 2,
},
),
(
HgPathBuf::from_bytes(b"d/d/d"),
DirstateEntry {
state: EntryState::Removed,
mode: 0,
mtime: 0,
size: 0,
},
),
]
.to_vec();
let mut tree = Tree::new();
tree.extend(insertions.clone().into_iter());
for (path, _) in &insertions {
assert!(tree.contains_key(path), true);
}
assert_eq!(tree.files_count, 4);
}
}