upstream/mercurial-mirror Commit - r52600:ec717174

rust: apply clippy lints...

Raphaël Gomès -

r52600:ec717174 default

parent child

rust/hg-core/src/dirstate_tree/dirstate_map.rs

0 +2 -7

             use bytes_cast::BytesCast;
             use std::borrow::Cow;
             use std::path::PathBuf;
             use super::on_disk;
             use super::on_disk::DirstateV2ParseError;
             use super::owning::OwningDirstateMap;
             use super::path_with_basename::WithBasename;
             use crate::dirstate::parsers::pack_entry;
             use crate::dirstate::parsers::packed_entry_size;
             use crate::dirstate::parsers::parse_dirstate_entries;
             use crate::dirstate::CopyMapIter;
             use crate::dirstate::DirstateV2Data;
             use crate::dirstate::ParentFileData;
             use crate::dirstate::StateMapIter;
             use crate::dirstate::TruncatedTimestamp;
             use crate::matchers::Matcher;
             use crate::utils::filter_map_results;
             use crate::utils::hg_path::{HgPath, HgPathBuf};
             use crate::DirstateEntry;
             use crate::DirstateError;
             use crate::DirstateMapError;
             use crate::DirstateParents;
             use crate::DirstateStatus;
             use crate::FastHashbrownMap as FastHashMap;
             use crate::PatternFileWarning;
             use crate::StatusError;
             use crate::StatusOptions;
             /// Append to an existing data file if the amount of unreachable data (not used
             /// anymore) is less than this fraction of the total amount of existing data.
             const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
             #[derive(Debug, PartialEq, Eq)]
             /// Version of the on-disk format
             pub enum DirstateVersion {
                 V1,
                 V2,
             }
             #[derive(Debug, PartialEq, Eq)]
             pub enum DirstateMapWriteMode {
                 Auto,
                 ForceNewDataFile,
                 ForceAppend,
             }
             #[derive(Debug)]
             pub struct DirstateMap<'on_disk> {
                 /// Contents of the `.hg/dirstate` file
                 pub(super) on_disk: &'on_disk [u8],
                 pub(super) root: ChildNodes<'on_disk>,
                 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
                 pub(super) nodes_with_entry_count: u32,
                 /// Number of nodes anywhere in the tree that have
                 /// `.copy_source.is_some()`.
                 pub(super) nodes_with_copy_source_count: u32,
                 /// See on_disk::Header
                 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
                 /// How many bytes of `on_disk` are not used anymore
                 pub(super) unreachable_bytes: u32,
                 /// Size of the data used to first load this `DirstateMap`. Used in case
                 /// we need to write some new metadata, but no new data on disk,
                 /// as well as to detect writes that have happened in another process
                 /// since first read.
                 pub(super) old_data_size: usize,
                 /// UUID used when first loading this `DirstateMap`. Used to check if
                 /// the UUID has been changed by another process since first read.
                 /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
                 pub(super) old_uuid: Option<Vec<u8>>,
                 /// Identity of the dirstate file (for dirstate-v1) or the docket file
                 /// (v2). Used to detect if the file has changed from another process.
                 /// Since it's always written atomically, we can compare the inode to
                 /// check the file identity.
                 ///
                 /// TODO On non-Unix systems, something like hashing is a possibility?
                 pub(super) identity: Option<u64>,
                 pub(super) dirstate_version: DirstateVersion,
                 /// Controlled by config option `devel.dirstate.v2.data_update_mode`
                 pub(super) write_mode: DirstateMapWriteMode,
             }
             /// Using a plain `HgPathBuf` of the full path from the repository root as a
             /// map key would also work: all paths in a given map have the same parent
             /// path, so comparing full paths gives the same result as comparing base
             /// names. However `HashMap` would waste time always re-hashing the same
             /// string prefix.
             pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
             /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
             /// for on-disk nodes that don’t actually have a `Cow` to borrow.
             #[derive(Debug)]
             pub(super) enum BorrowedPath<'tree, 'on_disk> {
                 InMemory(&'tree HgPathBuf),
                 OnDisk(&'on_disk HgPath),
             }
             #[derive(Debug)]
             pub(super) enum ChildNodes<'on_disk> {
                 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
                 OnDisk(&'on_disk [on_disk::Node]),
             }
             #[derive(Debug)]
             pub(super) enum ChildNodesRef<'tree, 'on_disk> {
                 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
                 OnDisk(&'on_disk [on_disk::Node]),
             }
             #[derive(Debug)]
             pub(super) enum NodeRef<'tree, 'on_disk> {
                 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
                 OnDisk(&'on_disk on_disk::Node),
             }
             impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
                 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
                     match *self {
                         BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
                         BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
                     }
                 }
             }
             impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
                 type Target = HgPath;
                 fn deref(&self) -> &HgPath {
                     match *self {
                         BorrowedPath::InMemory(in_memory) => in_memory,
                         BorrowedPath::OnDisk(on_disk) => on_disk,
                     }
                 }
             }
             impl Default for ChildNodes<'_> {
                 fn default() -> Self {
                     ChildNodes::InMemory(Default::default())
                 }
             }
             impl<'on_disk> ChildNodes<'on_disk> {
                 pub(super) fn as_ref<'tree>(
                     &'tree self,
                 ) -> ChildNodesRef<'tree, 'on_disk> {
                     match self {
                         ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
                         ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
                     }
                 }
                 pub(super) fn is_empty(&self) -> bool {
                     match self {
                         ChildNodes::InMemory(nodes) => nodes.is_empty(),
                         ChildNodes::OnDisk(nodes) => nodes.is_empty(),
                     }
                 }
                 fn make_mut(
                     &mut self,
                     on_disk: &'on_disk [u8],
                     unreachable_bytes: &mut u32,
                 ) -> Result<
                     &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
                     DirstateV2ParseError,
                 > {
                     match self {
                         ChildNodes::InMemory(nodes) => Ok(nodes),
                         ChildNodes::OnDisk(nodes) => {
                             *unreachable_bytes +=
                                 std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32;
                             let nodes = nodes
                                 .iter()
                                 .map(|node| {
                                     Ok((
                                         node.path(on_disk)?,
                                         node.to_in_memory_node(on_disk)?,
                                     ))
                                 })
                                 .collect::<Result<_, _>>()?;
                             *self = ChildNodes::InMemory(nodes);
                             match self {
                                 ChildNodes::InMemory(nodes) => Ok(nodes),
                                 ChildNodes::OnDisk(_) => unreachable!(),
                             }
                         }
                     }
                 }
             }
             impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
                 pub(super) fn get(
                     &self,
                     base_name: &HgPath,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
                     match self {
                         ChildNodesRef::InMemory(nodes) => Ok(nodes
                             .get_key_value(base_name)
                             .map(|(k, v)| NodeRef::InMemory(k, v))),
                         ChildNodesRef::OnDisk(nodes) => {
                             let mut parse_result = Ok(());
                             let search_result = nodes.binary_search_by(|node| {
                                 match node.base_name(on_disk) {
                                     Ok(node_base_name) => node_base_name.cmp(base_name),
                                     Err(e) => {
                                         parse_result = Err(e);
                                         // Dummy comparison result, `search_result` won’t
                                         // be used since `parse_result` is an error
                                         std::cmp::Ordering::Equal
                                     }
                                 }
                             });
                             parse_result.map(|()| {
                                 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
                             })
                         }
                     }
                 }
                 /// Iterate in undefined order
                 pub(super) fn iter(
                     &self,
                 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
                     match self {
                         ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
                             nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
                         ),
                         ChildNodesRef::OnDisk(nodes) => {
                             itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
                         }
                     }
                 }
                 /// Iterate in parallel in undefined order
                 pub(super) fn par_iter(
                     &self,
                 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
                 {
                     use rayon::prelude::*;
                     match self {
                         ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
                             nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
                         ),
                         ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
                             nodes.par_iter().map(NodeRef::OnDisk),
                         ),
                     }
                 }
                 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
                     match self {
                         ChildNodesRef::InMemory(nodes) => {
                             let mut vec: Vec<_> = nodes
                                 .iter()
                                 .map(|(k, v)| NodeRef::InMemory(k, v))
                                 .collect();
                             fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
                                 match node {
                                     NodeRef::InMemory(path, _node) => path.base_name(),
                                     NodeRef::OnDisk(_) => unreachable!(),
                                 }
                             }
                             // `sort_unstable_by_key` doesn’t allow keys borrowing from the
                             // value: https://github.com/rust-lang/rust/issues/34162
                             vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
                             vec
                         }
                         ChildNodesRef::OnDisk(nodes) => {
                             // Nodes on disk are already sorted
                             nodes.iter().map(NodeRef::OnDisk).collect()
                         }
                     }
                 }
             }
             impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
                 pub(super) fn full_path(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
                     match self {
                         NodeRef::InMemory(path, _node) => Ok(path.full_path()),
                         NodeRef::OnDisk(node) => node.full_path(on_disk),
                     }
                 }
                 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
                 /// HgPath>` detached from `'tree`
                 pub(super) fn full_path_borrowed(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
                     match self {
                         NodeRef::InMemory(path, _node) => match path.full_path() {
                             Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
                             Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
                         },
                         NodeRef::OnDisk(node) => {
                             Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
                         }
                     }
                 }
                 pub(super) fn base_name(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
                     match self {
                         NodeRef::InMemory(path, _node) => Ok(path.base_name()),
                         NodeRef::OnDisk(node) => node.base_name(on_disk),
                     }
                 }
                 pub(super) fn children(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
                     match self {
                         NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
                         NodeRef::OnDisk(node) => {
                             Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
                         }
                     }
                 }
                 pub(super) fn has_copy_source(&self) -> bool {
                     match self {
                         NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
                         NodeRef::OnDisk(node) => node.has_copy_source(),
                     }
                 }
                 pub(super) fn copy_source(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
                     match self {
                         NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
                         NodeRef::OnDisk(node) => node.copy_source(on_disk),
                     }
                 }
                 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
                 /// HgPath>` detached from `'tree`
                 pub(super) fn copy_source_borrowed(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
                 {
                     Ok(match self {
                         NodeRef::InMemory(_path, node) => {
                             node.copy_source.as_ref().map(|source| match source {
                                 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
                                 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
                             })
                         }
                         NodeRef::OnDisk(node) => {
                             node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
                         }
                     })
                 }
                 pub(super) fn entry(
                     &self,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     match self {
                         NodeRef::InMemory(_path, node) => {
                             Ok(node.data.as_entry().copied())
                         }
                         NodeRef::OnDisk(node) => node.entry(),
                     }
                 }
                 pub(super) fn cached_directory_mtime(
                     &self,
                 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
                     match self {
                         NodeRef::InMemory(_path, node) => Ok(match node.data {
                             NodeData::CachedDirectory { mtime } => Some(mtime),
                             _ => None,
                         }),
                         NodeRef::OnDisk(node) => node.cached_directory_mtime(),
                     }
                 }
                 pub(super) fn descendants_with_entry_count(&self) -> u32 {
                     match self {
                         NodeRef::InMemory(_path, node) => {
                             node.descendants_with_entry_count
                         }
                         NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
                     }
                 }
                 pub(super) fn tracked_descendants_count(&self) -> u32 {
                     match self {
                         NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
                         NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
                     }
                 }
             }
             /// Represents a file or a directory
             #[derive(Default, Debug)]
             pub(super) struct Node<'on_disk> {
                 pub(super) data: NodeData,
                 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
                 pub(super) children: ChildNodes<'on_disk>,
                 /// How many (non-inclusive) descendants of this node have an entry.
                 pub(super) descendants_with_entry_count: u32,
                 /// How many (non-inclusive) descendants of this node have an entry whose
                 /// state is "tracked".
                 pub(super) tracked_descendants_count: u32,
             }
-            #[derive(Debug)]
+            #[derive(Debug, Default)]
             pub(super) enum NodeData {
                 Entry(DirstateEntry),
                 CachedDirectory { mtime: TruncatedTimestamp },
+                #[default]
                 None,
             }
-            impl Default for NodeData {
-                fn default() -> Self {
-                    NodeData::None
             impl NodeData {
                 fn has_entry(&self) -> bool {
                     matches!(self, NodeData::Entry(_))
                 }
                 fn as_entry(&self) -> Option<&DirstateEntry> {
                     match self {
                         NodeData::Entry(entry) => Some(entry),
                         _ => None,
                     }
                 }
                 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
                     match self {
                         NodeData::Entry(entry) => Some(entry),
                         _ => None,
                     }
                 }
             }
             impl<'on_disk> DirstateMap<'on_disk> {
                 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
                     Self {
                         on_disk,
                         root: ChildNodes::default(),
                         nodes_with_entry_count: 0,
                         nodes_with_copy_source_count: 0,
                         ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
                         unreachable_bytes: 0,
                         old_data_size: 0,
                         old_uuid: None,
                         identity: None,
                         dirstate_version: DirstateVersion::V1,
                         write_mode: DirstateMapWriteMode::Auto,
                     }
                 }
                 #[logging_timer::time("trace")]
                 pub fn new_v2(
                     on_disk: &'on_disk [u8],
                     data_size: usize,
                     metadata: &[u8],
                     uuid: Vec<u8>,
                     identity: Option<u64>,
                 ) -> Result<Self, DirstateError> {
                     if let Some(data) = on_disk.get(..data_size) {
                         Ok(on_disk::read(data, metadata, uuid, identity)?)
                     } else {
                         Err(DirstateV2ParseError::new("not enough bytes on disk").into())
                     }
                 }
                 #[logging_timer::time("trace")]
                 pub fn new_v1(
                     on_disk: &'on_disk [u8],
                     identity: Option<u64>,
                 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
                     let mut map = Self::empty(on_disk);
                     if map.on_disk.is_empty() {
                         return Ok((map, None));
                     }
                     let parents = parse_dirstate_entries(
                         map.on_disk,
                         |path, entry, copy_source| {
                             let tracked = entry.tracked();
                             let node = Self::get_or_insert_node_inner(
                                 map.on_disk,
                                 &mut map.unreachable_bytes,
                                 &mut map.root,
                                 path,
                                 WithBasename::to_cow_borrowed,
                                 |ancestor| {
                                     if tracked {
                                         ancestor.tracked_descendants_count += 1
                                     }
                                     ancestor.descendants_with_entry_count += 1
                                 },
                             )?;
                             assert!(
                                 !node.data.has_entry(),
                                 "duplicate dirstate entry in read"
                             );
                             assert!(
                                 node.copy_source.is_none(),
                                 "duplicate dirstate entry in read"
                             );
                             node.data = NodeData::Entry(*entry);
                             node.copy_source = copy_source.map(Cow::Borrowed);
                             map.nodes_with_entry_count += 1;
                             if copy_source.is_some() {
                                 map.nodes_with_copy_source_count += 1
                             }
                             Ok(())
                         },
                     )?;
                     let parents = Some(*parents);
                     map.identity = identity;
                     Ok((map, parents))
                 }
                 /// Assuming dirstate-v2 format, returns whether the next write should
                 /// append to the existing data file that contains `self.on_disk` (true),
                 /// or create a new data file from scratch (false).
                 pub(super) fn write_should_append(&self) -> bool {
                     match self.write_mode {
                         DirstateMapWriteMode::ForceAppend => true,
                         DirstateMapWriteMode::ForceNewDataFile => false,
                         DirstateMapWriteMode::Auto => {
                             let ratio =
                                 self.unreachable_bytes as f32 / self.on_disk.len() as f32;
                             ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
                         }
                     }
                 }
                 fn get_node<'tree>(
                     &'tree self,
                     path: &HgPath,
                 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
                     let mut children = self.root.as_ref();
                     let mut components = path.components();
                     let mut component =
                         components.next().expect("expected at least one components");
                     loop {
                         if let Some(child) = children.get(component, self.on_disk)? {
                             if let Some(next_component) = components.next() {
                                 component = next_component;
                                 children = child.children(self.on_disk)?;
                             } else {
                                 return Ok(Some(child));
                             }
                         } else {
                             return Ok(None);
                         }
                     }
                 }
                 pub fn has_node(
                     &self,
                     path: &HgPath,
                 ) -> Result<bool, DirstateV2ParseError> {
                     let node = self.get_node(path)?;
                     Ok(node.is_some())
                 }
                 /// Returns a mutable reference to the node at `path` if it exists
                 ///
                 /// `each_ancestor` is a callback that is called for each ancestor node
                 /// when descending the tree. It is used to keep the different counters
                 /// of the `DirstateMap` up-to-date.
                 fn get_node_mut<'tree>(
                     &'tree mut self,
                     path: &HgPath,
                     each_ancestor: impl FnMut(&mut Node),
                 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
                     Self::get_node_mut_inner(
                         self.on_disk,
                         &mut self.unreachable_bytes,
                         &mut self.root,
                         path,
                         each_ancestor,
                     )
                 }
                 /// Lower-level version of `get_node_mut`.
                 ///
                 /// This takes `root` instead of `&mut self` so that callers can mutate
                 /// other fields while the returned borrow is still valid.
                 ///
                 /// `each_ancestor` is a callback that is called for each ancestor node
                 /// when descending the tree. It is used to keep the different counters
                 /// of the `DirstateMap` up-to-date.
                 fn get_node_mut_inner<'tree>(
                     on_disk: &'on_disk [u8],
                     unreachable_bytes: &mut u32,
                     root: &'tree mut ChildNodes<'on_disk>,
                     path: &HgPath,
                     mut each_ancestor: impl FnMut(&mut Node),
                 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
                     let mut children = root;
                     let mut components = path.components();
                     let mut component =
                         components.next().expect("expected at least one components");
                     loop {
                         if let Some(child) = children
                             .make_mut(on_disk, unreachable_bytes)?
                             .get_mut(component)
                         {
                             if let Some(next_component) = components.next() {
                                 each_ancestor(child);
                                 component = next_component;
                                 children = &mut child.children;
                             } else {
                                 return Ok(Some(child));
                             }
                         } else {
                             return Ok(None);
                         }
                     }
                 }
                 /// Get a mutable reference to the node at `path`, creating it if it does
                 /// not exist.
                 ///
                 /// `each_ancestor` is a callback that is called for each ancestor node
                 /// when descending the tree. It is used to keep the different counters
                 /// of the `DirstateMap` up-to-date.
                 fn get_or_insert_node<'tree, 'path>(
                     &'tree mut self,
                     path: &'path HgPath,
                     each_ancestor: impl FnMut(&mut Node),
                 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
                     Self::get_or_insert_node_inner(
                         self.on_disk,
                         &mut self.unreachable_bytes,
                         &mut self.root,
                         path,
                         WithBasename::to_cow_owned,
                         each_ancestor,
                     )
                 }
                 /// Lower-level version of `get_or_insert_node_inner`, which is used when
                 /// parsing disk data to remove allocations for new nodes.
                 fn get_or_insert_node_inner<'tree, 'path>(
                     on_disk: &'on_disk [u8],
                     unreachable_bytes: &mut u32,
                     root: &'tree mut ChildNodes<'on_disk>,
                     path: &'path HgPath,
                     to_cow: impl Fn(
                         WithBasename<&'path HgPath>,
                     ) -> WithBasename<Cow<'on_disk, HgPath>>,
                     mut each_ancestor: impl FnMut(&mut Node),
                 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
                     let mut child_nodes = root;
                     let mut inclusive_ancestor_paths =
                         WithBasename::inclusive_ancestors_of(path);
                     let mut ancestor_path = inclusive_ancestor_paths
                         .next()
                         .expect("expected at least one inclusive ancestor");
                     loop {
                         let (_, child_node) = child_nodes
                             .make_mut(on_disk, unreachable_bytes)?
                             .raw_entry_mut()
                             .from_key(ancestor_path.base_name())
                             .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
                         if let Some(next) = inclusive_ancestor_paths.next() {
                             each_ancestor(child_node);
                             ancestor_path = next;
                             child_nodes = &mut child_node.children;
                         } else {
                             return Ok(child_node);
                         }
                     }
                 }
                 #[allow(clippy::too_many_arguments)]
                 fn reset_state(
                     &mut self,
                     filename: &HgPath,
                     old_entry_opt: Option<DirstateEntry>,
                     wc_tracked: bool,
                     p1_tracked: bool,
                     p2_info: bool,
                     has_meaningful_mtime: bool,
                     parent_file_data_opt: Option<ParentFileData>,
                 ) -> Result<(), DirstateError> {
                     let (had_entry, was_tracked) = match old_entry_opt {
                         Some(old_entry) => (true, old_entry.tracked()),
                         None => (false, false),
                     };
                     let node = self.get_or_insert_node(filename, |ancestor| {
                         if !had_entry {
                             ancestor.descendants_with_entry_count += 1;
                         }
                         if was_tracked {
                             if !wc_tracked {
                                 ancestor.tracked_descendants_count = ancestor
                                     .tracked_descendants_count
                                     .checked_sub(1)
                                     .expect("tracked count to be >= 0");
                             }
                         } else if wc_tracked {
                             ancestor.tracked_descendants_count += 1;
                         }
                     })?;
                     let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
                         DirstateV2Data {
                             wc_tracked,
                             p1_tracked,
                             p2_info,
                             mode_size: parent_file_data.mode_size,
                             mtime: if has_meaningful_mtime {
                                 parent_file_data.mtime
                             } else {
                                 None
                             },
                             ..Default::default()
                         }
                     } else {
                         DirstateV2Data {
                             wc_tracked,
                             p1_tracked,
                             p2_info,
                             ..Default::default()
                         }
                     };
                     node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
                     if !had_entry {
                         self.nodes_with_entry_count += 1;
                     }
                     Ok(())
                 }
                 fn set_tracked(
                     &mut self,
                     filename: &HgPath,
                     old_entry_opt: Option<DirstateEntry>,
                 ) -> Result<bool, DirstateV2ParseError> {
                     let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
                     let had_entry = old_entry_opt.is_some();
                     let tracked_count_increment = u32::from(!was_tracked);
                     let mut new = false;
                     let node = self.get_or_insert_node(filename, |ancestor| {
                         if !had_entry {
                             ancestor.descendants_with_entry_count += 1;
                         }
                         ancestor.tracked_descendants_count += tracked_count_increment;
                     })?;
                     if let Some(old_entry) = old_entry_opt {
                         let mut e = old_entry;
                         if e.tracked() {
                             // XXX
                             // This is probably overkill for more case, but we need this to
                             // fully replace the `normallookup` call with `set_tracked`
                             // one. Consider smoothing this in the future.
                             e.set_possibly_dirty();
                         } else {
                             new = true;
                             e.set_tracked();
                         }
                         node.data = NodeData::Entry(e)
                     } else {
                         node.data = NodeData::Entry(DirstateEntry::new_tracked());
                         self.nodes_with_entry_count += 1;
                         new = true;
                     };
                     Ok(new)
                 }
                 /// Set a node as untracked in the dirstate.
                 ///
                 /// It is the responsibility of the caller to remove the copy source and/or
                 /// the entry itself if appropriate.
                 ///
                 /// # Panics
                 ///
                 /// Panics if the node does not exist.
                 fn set_untracked(
                     &mut self,
                     filename: &HgPath,
                     old_entry: DirstateEntry,
                 ) -> Result<(), DirstateV2ParseError> {
                     let node = self
                         .get_node_mut(filename, |ancestor| {
                             ancestor.tracked_descendants_count = ancestor
                                 .tracked_descendants_count
                                 .checked_sub(1)
                                 .expect("tracked_descendants_count should be >= 0");
                         })?
                         .expect("node should exist");
                     let mut new_entry = old_entry;
                     new_entry.set_untracked();
                     node.data = NodeData::Entry(new_entry);
                     Ok(())
                 }
                 /// Set a node as clean in the dirstate.
                 ///
                 /// It is the responsibility of the caller to remove the copy source.
                 ///
                 /// # Panics
                 ///
                 /// Panics if the node does not exist.
                 fn set_clean(
                     &mut self,
                     filename: &HgPath,
                     old_entry: DirstateEntry,
                     mode: u32,
                     size: u32,
                     mtime: TruncatedTimestamp,
                 ) -> Result<(), DirstateError> {
                     let node = self
                         .get_node_mut(filename, |ancestor| {
                             if !old_entry.tracked() {
                                 ancestor.tracked_descendants_count += 1;
                             }
                         })?
                         .expect("node should exist");
                     let mut new_entry = old_entry;
                     new_entry.set_clean(mode, size, mtime);
                     node.data = NodeData::Entry(new_entry);
                     Ok(())
                 }
                 /// Set a node as possibly dirty in the dirstate.
                 ///
                 /// # Panics
                 ///
                 /// Panics if the node does not exist.
                 fn set_possibly_dirty(
                     &mut self,
                     filename: &HgPath,
                 ) -> Result<(), DirstateError> {
                     let node = self
                         .get_node_mut(filename, |_ancestor| {})?
                         .expect("node should exist");
                     let entry = node.data.as_entry_mut().expect("entry should exist");
                     entry.set_possibly_dirty();
                     node.data = NodeData::Entry(*entry);
                     Ok(())
                 }
                 /// Clears the cached mtime for the (potential) folder at `path`.
                 pub(super) fn clear_cached_mtime(
                     &mut self,
                     path: &HgPath,
                 ) -> Result<(), DirstateV2ParseError> {
                     let node = match self.get_node_mut(path, |_ancestor| {})? {
                         Some(node) => node,
                         None => return Ok(()),
                     };
                     if let NodeData::CachedDirectory { .. } = &node.data {
                         node.data = NodeData::None
                     }
                     Ok(())
                 }
                 /// Sets the cached mtime for the (potential) folder at `path`.
                 pub(super) fn set_cached_mtime(
                     &mut self,
                     path: &HgPath,
                     mtime: TruncatedTimestamp,
                 ) -> Result<(), DirstateV2ParseError> {
                     let node = match self.get_node_mut(path, |_ancestor| {})? {
                         Some(node) => node,
                         None => return Ok(()),
                     };
                     match &node.data {
                         NodeData::Entry(_) => {} // Don’t overwrite an entry
                         NodeData::CachedDirectory { .. } | NodeData::None => {
                             node.data = NodeData::CachedDirectory { mtime }
                         }
                     }
                     Ok(())
                 }
                 fn iter_nodes<'tree>(
                     &'tree self,
                 ) -> impl Iterator<
                     Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
                 > + 'tree {
                     // Depth first tree traversal.
                     //
                     // If we could afford internal iteration and recursion,
                     // this would look like:
                     //
                     // ```
                     // fn traverse_children(
                     //     children: &ChildNodes,
                     //     each: &mut impl FnMut(&Node),
                     // ) {
                     //     for child in children.values() {
                     //         traverse_children(&child.children, each);
                     //         each(child);
                     //     }
                     // }
                     // ```
                     //
                     // However we want an external iterator and therefore can’t use the
                     // call stack. Use an explicit stack instead:
                     let mut stack = Vec::new();
                     let mut iter = self.root.as_ref().iter();
                     std::iter::from_fn(move || {
                         while let Some(child_node) = iter.next() {
                             let children = match child_node.children(self.on_disk) {
                                 Ok(children) => children,
                                 Err(error) => return Some(Err(error)),
                             };
                             // Pseudo-recursion
                             let new_iter = children.iter();
                             let old_iter = std::mem::replace(&mut iter, new_iter);
                             stack.push((child_node, old_iter));
                         }
                         // Found the end of a `children.iter()` iterator.
                         if let Some((child_node, next_iter)) = stack.pop() {
                             // "Return" from pseudo-recursion by restoring state from the
                             // explicit stack
                             iter = next_iter;
                             Some(Ok(child_node))
                         } else {
                             // Reached the bottom of the stack, we’re done
                             None
                         }
                     })
                 }
                 fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
                     if let Cow::Borrowed(path) = path {
                         *unreachable_bytes += path.len() as u32
                     }
                 }
                 pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
                     self.write_mode = write_mode;
                 }
             }
             type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
             impl OwningDirstateMap {
                 pub fn clear(&mut self) {
                     self.with_dmap_mut(|map| {
                         map.root = Default::default();
                         map.nodes_with_entry_count = 0;
                         map.nodes_with_copy_source_count = 0;
                         map.unreachable_bytes = map.on_disk.len() as u32;
                     });
                 }
                 pub fn set_tracked(
                     &mut self,
                     filename: &HgPath,
                 ) -> Result<bool, DirstateV2ParseError> {
                     let old_entry_opt = self.get(filename)?;
                     self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
                 }
                 pub fn set_untracked(
                     &mut self,
                     filename: &HgPath,
                 ) -> Result<bool, DirstateError> {
                     let old_entry_opt = self.get(filename)?;
                     match old_entry_opt {
                         None => Ok(false),
                         Some(old_entry) => {
                             if !old_entry.tracked() {
                                 // `DirstateMap::set_untracked` is not a noop if
                                 // already not tracked as it will decrement the
                                 // tracked counters while going down.
                                 return Ok(true);
                             }
                             if old_entry.added() {
                                 // Untracking an "added" entry will just result in a
                                 // worthless entry (and other parts of the code will
                                 // complain about it), just drop it entirely.
                                 self.drop_entry_and_copy_source(filename)?;
                                 return Ok(true);
                             }
                             if !old_entry.p2_info() {
                                 self.copy_map_remove(filename)?;
                             }
                             self.with_dmap_mut(|map| {
                                 map.set_untracked(filename, old_entry)?;
                                 Ok(true)
                             })
                         }
                     }
                 }
                 pub fn set_clean(
                     &mut self,
                     filename: &HgPath,
                     mode: u32,
                     size: u32,
                     mtime: TruncatedTimestamp,
                 ) -> Result<(), DirstateError> {
                     let old_entry = match self.get(filename)? {
                         None => {
                             return Err(
                                 DirstateMapError::PathNotFound(filename.into()).into()
                             )
                         }
                         Some(e) => e,
                     };
                     self.copy_map_remove(filename)?;
                     self.with_dmap_mut(|map| {
                         map.set_clean(filename, old_entry, mode, size, mtime)
                     })
                 }
                 pub fn set_possibly_dirty(
                     &mut self,
                     filename: &HgPath,
                 ) -> Result<(), DirstateError> {
                     if self.get(filename)?.is_none() {
                         return Err(DirstateMapError::PathNotFound(filename.into()).into());
                     }
                     self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
                 }
                 pub fn reset_state(
                     &mut self,
                     filename: &HgPath,
                     wc_tracked: bool,
                     p1_tracked: bool,
                     p2_info: bool,
                     has_meaningful_mtime: bool,
                     parent_file_data_opt: Option<ParentFileData>,
                 ) -> Result<(), DirstateError> {
                     if !(p1_tracked || p2_info || wc_tracked) {
                         self.drop_entry_and_copy_source(filename)?;
                         return Ok(());
                     }
                     self.copy_map_remove(filename)?;
                     let old_entry_opt = self.get(filename)?;
                     self.with_dmap_mut(|map| {
                         map.reset_state(
                             filename,
                             old_entry_opt,
                             wc_tracked,
                             p1_tracked,
                             p2_info,
                             has_meaningful_mtime,
                             parent_file_data_opt,
                         )
                     })
                 }
                 pub fn drop_entry_and_copy_source(
                     &mut self,
                     filename: &HgPath,
                 ) -> Result<(), DirstateError> {
                     let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
                     struct Dropped {
                         was_tracked: bool,
                         had_entry: bool,
                         had_copy_source: bool,
                     }
                     /// If this returns `Ok(Some((dropped, removed)))`, then
                     ///
                     /// * `dropped` is about the leaf node that was at `filename`
                     /// * `removed` is whether this particular level of recursion just
                     ///   removed a node in `nodes`.
                     fn recur<'on_disk>(
                         on_disk: &'on_disk [u8],
                         unreachable_bytes: &mut u32,
                         nodes: &mut ChildNodes<'on_disk>,
                         path: &HgPath,
                     ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
                         let (first_path_component, rest_of_path) =
                             path.split_first_component();
                         let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
                         let node = if let Some(node) = nodes.get_mut(first_path_component)
                         {
                             node
                         } else {
                             return Ok(None);
                         };
                         let dropped;
                         if let Some(rest) = rest_of_path {
                             if let Some((d, removed)) = recur(
                                 on_disk,
                                 unreachable_bytes,
                                 &mut node.children,
                                 rest,
                             )? {
                                 dropped = d;
                                 if dropped.had_entry {
                                     node.descendants_with_entry_count = node
                                         .descendants_with_entry_count
                                         .checked_sub(1)
                                         .expect(
                                             "descendants_with_entry_count should be >= 0",
                                         );
                                 }
                                 if dropped.was_tracked {
                                     node.tracked_descendants_count = node
                                         .tracked_descendants_count
                                         .checked_sub(1)
                                         .expect(
                                             "tracked_descendants_count should be >= 0",
                                         );
                                 }
                                 // Directory caches must be invalidated when removing a
                                 // child node
                                 if removed {
                                     if let NodeData::CachedDirectory { .. } = &node.data {
                                         node.data = NodeData::None
                                     }
                                 }
                             } else {
                                 return Ok(None);
                             }
                         } else {
                             let entry = node.data.as_entry();
                             let was_tracked = entry.map_or(false, |entry| entry.tracked());
                             let had_entry = entry.is_some();
                             if had_entry {
                                 node.data = NodeData::None
                             }
                             let mut had_copy_source = false;
                             if let Some(source) = &node.copy_source {
                                 DirstateMap::count_dropped_path(
                                     unreachable_bytes,
                                     Cow::Borrowed(source),
                                 );
                                 had_copy_source = true;
                                 node.copy_source = None
                             }
                             dropped = Dropped {
                                 was_tracked,
                                 had_entry,
                                 had_copy_source,
                             };
                         }
                         // After recursion, for both leaf (rest_of_path is None) nodes and
                         // parent nodes, remove a node if it just became empty.
                         let remove = !node.data.has_entry()
                             && node.copy_source.is_none()
                             && node.children.is_empty();
                         if remove {
                             let (key, _) =
                                 nodes.remove_entry(first_path_component).unwrap();
                             DirstateMap::count_dropped_path(
                                 unreachable_bytes,
                                 Cow::Borrowed(key.full_path()),
                             )
                         }
                         Ok(Some((dropped, remove)))
                     }
                     self.with_dmap_mut(|map| {
                         if let Some((dropped, _removed)) = recur(
                             map.on_disk,
                             &mut map.unreachable_bytes,
                             &mut map.root,
                             filename,
                         )? {
                             if dropped.had_entry {
                                 map.nodes_with_entry_count = map
                                     .nodes_with_entry_count
                                     .checked_sub(1)
                                     .expect("nodes_with_entry_count should be >= 0");
                             }
                             if dropped.had_copy_source {
                                 map.nodes_with_copy_source_count = map
                                     .nodes_with_copy_source_count
                                     .checked_sub(1)
                                     .expect("nodes_with_copy_source_count should be >= 0");
                             }
                         } else {
                             debug_assert!(!was_tracked);
                         }
                         Ok(())
                     })
                 }
                 pub fn has_tracked_dir(
                     &mut self,
                     directory: &HgPath,
                 ) -> Result<bool, DirstateError> {
                     self.with_dmap_mut(|map| {
                         if let Some(node) = map.get_node(directory)? {
                             // A node without a `DirstateEntry` was created to hold child
                             // nodes, and is therefore a directory.
                             let is_dir = node.entry()?.is_none();
                             Ok(is_dir && node.tracked_descendants_count() > 0)
                         } else {
                             Ok(false)
                         }
                     })
                 }
                 pub fn has_dir(
                     &mut self,
                     directory: &HgPath,
                 ) -> Result<bool, DirstateError> {
                     self.with_dmap_mut(|map| {
                         if let Some(node) = map.get_node(directory)? {
                             // A node without a `DirstateEntry` was created to hold child
                             // nodes, and is therefore a directory.
                             let is_dir = node.entry()?.is_none();
                             Ok(is_dir && node.descendants_with_entry_count() > 0)
                         } else {
                             Ok(false)
                         }
                     })
                 }
                 #[logging_timer::time("trace")]
                 pub fn pack_v1(
                     &self,
                     parents: DirstateParents,
                 ) -> Result<Vec<u8>, DirstateError> {
                     let map = self.get_map();
                     // Optizimation (to be measured?): pre-compute size to avoid `Vec`
                     // reallocations
                     let mut size = parents.as_bytes().len();
                     for node in map.iter_nodes() {
                         let node = node?;
                         if node.entry()?.is_some() {
                             size += packed_entry_size(
                                 node.full_path(map.on_disk)?,
                                 node.copy_source(map.on_disk)?,
                             );
                         }
                     }
                     let mut packed = Vec::with_capacity(size);
                     packed.extend(parents.as_bytes());
                     for node in map.iter_nodes() {
                         let node = node?;
                         if let Some(entry) = node.entry()? {
                             pack_entry(
                                 node.full_path(map.on_disk)?,
                                 &entry,
                                 node.copy_source(map.on_disk)?,
                                 &mut packed,
                             );
                         }
                     }
                     Ok(packed)
                 }
                 /// Returns new data and metadata together with whether that data should be
                 /// appended to the existing data file whose content is at
                 /// `map.on_disk` (true), instead of written to a new data file
                 /// (false), and the previous size of data on disk.
                 #[logging_timer::time("trace")]
                 pub fn pack_v2(
                     &self,
                     write_mode: DirstateMapWriteMode,
                 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
                 {
                     let map = self.get_map();
                     on_disk::write(map, write_mode)
                 }
                 /// `callback` allows the caller to process and do something with the
                 /// results of the status. This is needed to do so efficiently (i.e.
                 /// without cloning the `DirstateStatus` object with its paths) because
                 /// we need to borrow from `Self`.
                 pub fn with_status<R>(
                     &mut self,
                     matcher: &(dyn Matcher + Sync),
                     root_dir: PathBuf,
                     ignore_files: Vec<PathBuf>,
                     options: StatusOptions,
                     callback: impl for<'r> FnOnce(
                         Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
                     ) -> R,
                 ) -> R {
                     self.with_dmap_mut(|map| {
                         callback(super::status::status(
                             map,
                             matcher,
                             root_dir,
                             ignore_files,
                             options,
                         ))
                     })
                 }
                 pub fn copy_map_len(&self) -> usize {
                     let map = self.get_map();
                     map.nodes_with_copy_source_count as usize
                 }
                 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
                     let map = self.get_map();
                     Box::new(filter_map_results(map.iter_nodes(), move |node| {
                         Ok(if let Some(source) = node.copy_source(map.on_disk)? {
                             Some((node.full_path(map.on_disk)?, source))
                         } else {
                             None
                         })
                     }))
                 }
                 pub fn copy_map_contains_key(
                     &self,
                     key: &HgPath,
                 ) -> Result<bool, DirstateV2ParseError> {
                     let map = self.get_map();
                     Ok(if let Some(node) = map.get_node(key)? {
                         node.has_copy_source()
                     } else {
                         false
                     })
                 }
                 pub fn copy_map_get(
                     &self,
                     key: &HgPath,
                 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
                     let map = self.get_map();
                     if let Some(node) = map.get_node(key)? {
                         if let Some(source) = node.copy_source(map.on_disk)? {
                             return Ok(Some(source));
                         }
                     }
                     Ok(None)
                 }
                 pub fn copy_map_remove(
                     &mut self,
                     key: &HgPath,
                 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
                     self.with_dmap_mut(|map| {
                         let count = &mut map.nodes_with_copy_source_count;
                         let unreachable_bytes = &mut map.unreachable_bytes;
                         Ok(DirstateMap::get_node_mut_inner(
                             map.on_disk,
                             unreachable_bytes,
                             &mut map.root,
                             key,
                             |_ancestor| {},
                         )?
                         .and_then(|node| {
                             if let Some(source) = &node.copy_source {
                                 *count = count
                                     .checked_sub(1)
                                     .expect("nodes_with_copy_source_count should be >= 0");
                                 DirstateMap::count_dropped_path(
                                     unreachable_bytes,
                                     Cow::Borrowed(source),
                                 );
                             }
                             node.copy_source.take().map(Cow::into_owned)
                         }))
                     })
                 }
                 pub fn copy_map_insert(
                     &mut self,
                     key: &HgPath,
                     value: &HgPath,
                 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
                     self.with_dmap_mut(|map| {
                         let node = map.get_or_insert_node(key, |_ancestor| {})?;
                         let had_copy_source = node.copy_source.is_none();
                         let old = node
                             .copy_source
                             .replace(value.to_owned().into())
                             .map(Cow::into_owned);
                         if had_copy_source {
                             map.nodes_with_copy_source_count += 1
                         }
                         Ok(old)
                     })
                 }
                 pub fn len(&self) -> usize {
                     let map = self.get_map();
                     map.nodes_with_entry_count as usize
                 }
                 pub fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 pub fn contains_key(
                     &self,
                     key: &HgPath,
                 ) -> Result<bool, DirstateV2ParseError> {
                     Ok(self.get(key)?.is_some())
                 }
                 pub fn get(
                     &self,
                     key: &HgPath,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     let map = self.get_map();
                     Ok(if let Some(node) = map.get_node(key)? {
                         node.entry()?
                     } else {
                         None
                     })
                 }
                 pub fn iter(&self) -> StateMapIter<'_> {
                     let map = self.get_map();
                     Box::new(filter_map_results(map.iter_nodes(), move |node| {
                         Ok(if let Some(entry) = node.entry()? {
                             Some((node.full_path(map.on_disk)?, entry))
                         } else {
                             None
                         })
                     }))
                 }
                 pub fn iter_tracked_dirs(
                     &mut self,
                 ) -> Result<
                     Box<
                         dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
                             + Send
                             + '_,
                     >,
                     DirstateError,
                 > {
                     let map = self.get_map();
                     let on_disk = map.on_disk;
                     Ok(Box::new(filter_map_results(
                         map.iter_nodes(),
                         move |node| {
                             Ok(if node.tracked_descendants_count() > 0 {
                                 Some(node.full_path(on_disk)?)
                             } else {
                                 None
                             })
                         },
                     )))
                 }
                 /// Only public because it needs to be exposed to the Python layer.
                 /// It is not the full `setparents` logic, only the parts that mutate the
                 /// entries.
                 pub fn setparents_fixup(
                     &mut self,
                 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
                     // XXX
                     // All the copying and re-querying is quite inefficient, but this is
                     // still a lot better than doing it from Python.
                     //
                     // The better solution is to develop a mechanism for `iter_mut`,
                     // which will be a lot more involved: we're dealing with a lazy,
                     // append-mostly, tree-like data structure. This will do for now.
                     let mut copies = vec![];
                     let mut files_with_p2_info = vec![];
                     for res in self.iter() {
                         let (path, entry) = res?;
                         if entry.p2_info() {
                             files_with_p2_info.push(path.to_owned())
                         }
                     }
                     self.with_dmap_mut(|map| {
                         for path in files_with_p2_info.iter() {
                             let node = map.get_or_insert_node(path, |_| {})?;
                             let entry =
                                 node.data.as_entry_mut().expect("entry should exist");
                             entry.drop_merge_data();
                             if let Some(source) = node.copy_source.take().as_deref() {
                                 copies.push((path.to_owned(), source.to_owned()));
                             }
                         }
                         Ok(copies)
                     })
                 }
                 pub fn debug_iter(
                     &self,
                     all: bool,
                 ) -> Box<
                     dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
                         + Send
                         + '_,
                 > {
                     let map = self.get_map();
                     Box::new(filter_map_results(map.iter_nodes(), move |node| {
                         let debug_tuple = if let Some(entry) = node.entry()? {
                             entry.debug_tuple()
                         } else if !all {
                             return Ok(None);
                         } else if let Some(mtime) = node.cached_directory_mtime()? {
                             (b' ', 0, -1, mtime.truncated_seconds() as i32)
                         } else {
                             (b' ', 0, -1, -1)
                         };
                         Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
                     }))
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 /// Shortcut to return tracked descendants of a path.
                 /// Panics if the path does not exist.
                 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
                     let path = dbg!(HgPath::new(path));
                     let node = map.get_map().get_node(path);
                     node.unwrap().unwrap().tracked_descendants_count()
                 }
                 /// Shortcut to return descendants with an entry.
                 /// Panics if the path does not exist.
                 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
                     let path = dbg!(HgPath::new(path));
                     let node = map.get_map().get_node(path);
                     node.unwrap().unwrap().descendants_with_entry_count()
                 }
                 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
                     let path = dbg!(HgPath::new(path));
                     let node = map.get_map().get_node(path);
                     assert!(node.unwrap().is_none());
                 }
                 /// Shortcut for path creation in tests
                 fn p(b: &[u8]) -> &HgPath {
                     HgPath::new(b)
                 }
                 /// Test the very simple case a single tracked file
                 #[test]
                 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
                     let mut map = OwningDirstateMap::new_empty(vec![]);
                     assert_eq!(map.len(), 0);
                     map.set_tracked(p(b"some/nested/path"))?;
                     assert_eq!(map.len(), 1);
                     assert_eq!(tracked_descendants(&map, b"some"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
                     map.set_untracked(p(b"some/nested/path"))?;
                     assert_eq!(map.len(), 0);
                     assert!(map.get_map().get_node(p(b"some"))?.is_none());
                     Ok(())
                 }
                 /// Test the simple case of all tracked, but multiple files
                 #[test]
                 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
                     let mut map = OwningDirstateMap::new_empty(vec![]);
                     map.set_tracked(p(b"some/nested/path"))?;
                     map.set_tracked(p(b"some/nested/file"))?;
                     // one layer without any files to test deletion cascade
                     map.set_tracked(p(b"some/other/nested/path"))?;
                     map.set_tracked(p(b"root_file"))?;
                     map.set_tracked(p(b"some/file"))?;
                     map.set_tracked(p(b"some/file2"))?;
                     map.set_tracked(p(b"some/file3"))?;
                     assert_eq!(map.len(), 7);
                     assert_eq!(tracked_descendants(&map, b"some"), 6);
                     assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
                     assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
                     map.set_untracked(p(b"some/nested/path"))?;
                     assert_eq!(map.len(), 6);
                     assert_eq!(tracked_descendants(&map, b"some"), 5);
                     assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                     map.set_untracked(p(b"some/nested/file"))?;
                     assert_eq!(map.len(), 5);
                     assert_eq!(tracked_descendants(&map, b"some"), 4);
                     assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                     assert_does_not_exist(&map, b"some_nested");
                     map.set_untracked(p(b"some/other/nested/path"))?;
                     assert_eq!(map.len(), 4);
                     assert_eq!(tracked_descendants(&map, b"some"), 3);
                     assert_does_not_exist(&map, b"some/other");
                     map.set_untracked(p(b"root_file"))?;
                     assert_eq!(map.len(), 3);
                     assert_eq!(tracked_descendants(&map, b"some"), 3);
                     assert_does_not_exist(&map, b"root_file");
                     map.set_untracked(p(b"some/file"))?;
                     assert_eq!(map.len(), 2);
                     assert_eq!(tracked_descendants(&map, b"some"), 2);
                     assert_does_not_exist(&map, b"some/file");
                     map.set_untracked(p(b"some/file2"))?;
                     assert_eq!(map.len(), 1);
                     assert_eq!(tracked_descendants(&map, b"some"), 1);
                     assert_does_not_exist(&map, b"some/file2");
                     map.set_untracked(p(b"some/file3"))?;
                     assert_eq!(map.len(), 0);
                     assert_does_not_exist(&map, b"some/file3");
                     Ok(())
                 }
                 /// Check with a mix of tracked and non-tracked items
                 #[test]
                 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
                     let mut map = OwningDirstateMap::new_empty(vec![]);
                     // A file that was just added
                     map.set_tracked(p(b"some/nested/path"))?;
                     // This has no information, the dirstate should ignore it
                     map.reset_state(p(b"some/file"), false, false, false, false, None)?;
                     assert_does_not_exist(&map, b"some/file");
                     // A file that was removed
                     map.reset_state(
                         p(b"some/nested/file"),
                         false,
                         true,
                         false,
                         false,
                         None,
                     )?;
                     assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
                     // Only present in p2
                     map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
                     assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
                     // A file that was merged
                     map.reset_state(p(b"root_file"), true, true, true, false, None)?;
                     assert!(map.get(p(b"root_file"))?.unwrap().tracked());
                     // A file that is added, with info from p2
                     // XXX is that actually possible?
                     map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
                     assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
                     // A clean file
                     // One layer without any files to test deletion cascade
                     map.reset_state(
                         p(b"some/other/nested/path"),
                         true,
                         true,
                         false,
                         false,
                         None,
                     )?;
                     assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
                     assert_eq!(map.len(), 6);
                     assert_eq!(tracked_descendants(&map, b"some"), 3);
                     assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
                     assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
                     assert_eq!(
                         descendants_with_an_entry(&map, b"some/other/nested/path"),
                     );
                     assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
                     // might as well check this
                     map.set_untracked(p(b"path/does/not/exist"))?;
                     assert_eq!(map.len(), 6);
                     map.set_untracked(p(b"some/other/nested/path"))?;
                     // It is set untracked but not deleted since it held other information
                     assert_eq!(map.len(), 6);
                     assert_eq!(tracked_descendants(&map, b"some"), 2);
                     assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
                     assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
                     map.set_untracked(p(b"some/nested/path"))?;
                     // It is set untracked *and* deleted since it was only added
                     assert_eq!(map.len(), 5);
                     assert_eq!(tracked_descendants(&map, b"some"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
                     assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
                     assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
                     assert_does_not_exist(&map, b"some/nested/path");
                     map.set_untracked(p(b"root_file"))?;
                     // Untracked but not deleted
                     assert_eq!(map.len(), 5);
                     assert!(map.get(p(b"root_file"))?.is_some());
                     map.set_untracked(p(b"some/file2"))?;
                     assert_eq!(map.len(), 5);
                     assert_eq!(tracked_descendants(&map, b"some"), 0);
                     assert!(map.get(p(b"some/file2"))?.is_some());
                     map.set_untracked(p(b"some/file3"))?;
                     assert_eq!(map.len(), 5);
                     assert_eq!(tracked_descendants(&map, b"some"), 0);
                     assert!(map.get(p(b"some/file3"))?.is_some());
                     Ok(())
                 }
                 /// Check that copies counter is correctly updated
                 #[test]
                 fn test_copy_source() -> Result<(), DirstateError> {
                     let mut map = OwningDirstateMap::new_empty(vec![]);
                     // Clean file
                     map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
                     // Merged file
                     map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
                     // Removed file
                     map.reset_state(p(b"removed"), false, true, false, false, None)?;
                     // Added file
                     map.reset_state(p(b"files/added"), true, false, false, false, None)?;
                     // Add copy
                     map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
                     assert_eq!(map.copy_map_len(), 1);
                     // Copy override
                     map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
                     assert_eq!(map.copy_map_len(), 1);
                     // Multiple copies
                     map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
                     assert_eq!(map.copy_map_len(), 2);
                     map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
                     assert_eq!(map.copy_map_len(), 3);
                     // Added, so the entry is completely removed
                     map.set_untracked(p(b"files/added"))?;
                     assert_does_not_exist(&map, b"files/added");
                     assert_eq!(map.copy_map_len(), 2);
                     // Removed, so the entry is kept around, so is its copy
                     map.set_untracked(p(b"removed"))?;
                     assert!(map.get(p(b"removed"))?.is_some());
                     assert_eq!(map.copy_map_len(), 2);
                     // Clean, so the entry is kept around, but not its copy
                     map.set_untracked(p(b"files/clean"))?;
                     assert!(map.get(p(b"files/clean"))?.is_some());
                     assert_eq!(map.copy_map_len(), 1);
                     map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
                     assert_eq!(map.copy_map_len(), 2);
                     // Info from p2, so its copy source info is kept around
                     map.set_untracked(p(b"files/from_p2"))?;
                     assert!(map.get(p(b"files/from_p2"))?.is_some());
                     assert_eq!(map.copy_map_len(), 2);
                     Ok(())
                 }
                 /// Test with "on disk" data. For the sake of this test, the "on disk" data
                 /// does not actually come from the disk, but it's opaque to the code being
                 /// tested.
                 #[test]
                 fn test_on_disk() -> Result<(), DirstateError> {
                     // First let's create some data to put "on disk"
                     let mut map = OwningDirstateMap::new_empty(vec![]);
                     // A file that was just added
                     map.set_tracked(p(b"some/nested/added"))?;
                     map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
                     // A file that was removed
                     map.reset_state(
                         p(b"some/nested/removed"),
                         false,
                         true,
                         false,
                         false,
                         None,
                     )?;
                     // Only present in p2
                     map.reset_state(
                         p(b"other/p2_info_only"),
                         false,
                         false,
                         true,
                         false,
                         None,
                     )?;
                     map.copy_map_insert(
                         p(b"other/p2_info_only"),
                         p(b"other/p2_info_copy_source"),
                     )?;
                     // A file that was merged
                     map.reset_state(p(b"merged"), true, true, true, false, None)?;
                     // A file that is added, with info from p2
                     // XXX is that actually possible?
                     map.reset_state(
                         p(b"other/added_with_p2"),
                         true,
                         false,
                         true,
                         false,
                         None,
                     )?;
                     // One layer without any files to test deletion cascade
                     // A clean file
                     map.reset_state(
                         p(b"some/other/nested/clean"),
                         true,
                         true,
                         false,
                         false,
                         None,
                     )?;
                     let (packed, metadata, _should_append, _old_data_size) =
                         map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
                     let packed_len = packed.len();
                     assert!(packed_len > 0);
                     // Recreate "from disk"
                     let mut map = OwningDirstateMap::new_v2(
                         packed,
                         packed_len,
                         metadata.as_bytes(),
                         vec![],
                         None,
                     )?;
                     // Check that everything is accounted for
                     assert!(map.contains_key(p(b"some/nested/added"))?);
                     assert!(map.contains_key(p(b"some/nested/removed"))?);
                     assert!(map.contains_key(p(b"merged"))?);
                     assert!(map.contains_key(p(b"other/p2_info_only"))?);
                     assert!(map.contains_key(p(b"other/added_with_p2"))?);
                     assert!(map.contains_key(p(b"some/other/nested/clean"))?);
                     assert_eq!(
                         map.copy_map_get(p(b"some/nested/added"))?,
                         Some(p(b"added_copy_source"))
                     );
                     assert_eq!(
                         map.copy_map_get(p(b"other/p2_info_only"))?,
                         Some(p(b"other/p2_info_copy_source"))
                     );
                     assert_eq!(tracked_descendants(&map, b"some"), 2);
                     assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
                     assert_eq!(tracked_descendants(&map, b"other"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
                     assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
                     assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                     assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
                     assert_eq!(map.len(), 6);
                     assert_eq!(map.get_map().unreachable_bytes, 0);
                     assert_eq!(map.copy_map_len(), 2);
                     // Shouldn't change anything since it's already not tracked
                     map.set_untracked(p(b"some/nested/removed"))?;
                     assert_eq!(map.get_map().unreachable_bytes, 0);
                     if let ChildNodes::InMemory(_) = map.get_map().root {
                         panic!("root should not have been mutated")
                     }
                     // We haven't mutated enough (nothing, actually), we should still be in
                     // the append strategy
                     assert!(map.get_map().write_should_append());
                     // But this mutates the structure, so there should be unreachable_bytes
                     assert!(map.set_untracked(p(b"some/nested/added"))?);
                     let unreachable_bytes = map.get_map().unreachable_bytes;
                     assert!(unreachable_bytes > 0);
                     if let ChildNodes::OnDisk(_) = map.get_map().root {
                         panic!("root should have been mutated")
                     }
                     // This should not mutate the structure either, since `root` has
                     // already been mutated along with its direct children.
                     map.set_untracked(p(b"merged"))?;
                     assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
                     if let NodeRef::InMemory(_, _) =
                         map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
                     {
                         panic!("'other/added_with_p2' should not have been mutated")
                     }
                     // But this should, since it's in a different path
                     // than `<root>some/nested/add`
                     map.set_untracked(p(b"other/added_with_p2"))?;
                     assert!(map.get_map().unreachable_bytes > unreachable_bytes);
                     if let NodeRef::OnDisk(_) =
                         map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
                     {
                         panic!("'other/added_with_p2' should have been mutated")
                     }
                     // We have rewritten most of the tree, we should create a new file
                     assert!(!map.get_map().write_should_append());
                     Ok(())
                 }
             }

rust/hg-core/src/dirstate_tree/on_disk.rs

0 +3 -5

             //! The "version 2" disk representation of the dirstate
             //!
             //! See `mercurial/helptext/internals/dirstate-v2.txt`
             use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
             use crate::dirstate_tree::dirstate_map::DirstateVersion;
             use crate::dirstate_tree::dirstate_map::{
                 self, DirstateMap, DirstateMapWriteMode, NodeRef,
             };
             use crate::dirstate_tree::path_with_basename::WithBasename;
             use crate::errors::HgError;
             use crate::utils::hg_path::HgPath;
             use crate::DirstateEntry;
             use crate::DirstateError;
             use crate::DirstateParents;
             use bitflags::bitflags;
             use bytes_cast::unaligned::{U16Be, U32Be};
             use bytes_cast::BytesCast;
             use format_bytes::format_bytes;
             use rand::Rng;
             use std::borrow::Cow;
             use std::fmt::Write;
             /// Added at the start of `.hg/dirstate` when the "v2" format is used.
             /// This a redundant sanity check more than an actual "magic number" since
             /// `.hg/requires` already governs which format should be used.
             pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
             /// Keep space for 256-bit hashes
             const STORED_NODE_ID_BYTES: usize = 32;
             /// … even though only 160 bits are used for now, with SHA-1
             const USED_NODE_ID_BYTES: usize = 20;
             pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
             pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
             /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
             const TREE_METADATA_SIZE: usize = 44;
             const NODE_SIZE: usize = 44;
             /// Make sure that size-affecting changes are made knowingly
             #[allow(unused)]
             fn static_assert_size_of() {
                 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
                 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
                 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
             }
             // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
             #[derive(BytesCast)]
             #[repr(C)]
             struct DocketHeader {
                 marker: [u8; V2_FORMAT_MARKER.len()],
                 parent_1: [u8; STORED_NODE_ID_BYTES],
                 parent_2: [u8; STORED_NODE_ID_BYTES],
                 metadata: TreeMetadata,
                 /// Counted in bytes
                 data_size: Size,
                 uuid_size: u8,
             }
             pub struct Docket<'on_disk> {
                 header: &'on_disk DocketHeader,
                 pub uuid: &'on_disk [u8],
             }
             /// Fields are documented in the *Tree metadata in the docket file*
             /// section of `mercurial/helptext/internals/dirstate-v2.txt`
             #[derive(BytesCast)]
             #[repr(C)]
             pub struct TreeMetadata {
                 root_nodes: ChildNodes,
                 nodes_with_entry_count: Size,
                 nodes_with_copy_source_count: Size,
                 unreachable_bytes: Size,
                 unused: [u8; 4],
                 /// See *Optional hash of ignore patterns* section of
                 /// `mercurial/helptext/internals/dirstate-v2.txt`
                 ignore_patterns_hash: IgnorePatternsHash,
             }
             /// Fields are documented in the *The data file format*
             /// section of `mercurial/helptext/internals/dirstate-v2.txt`
             #[derive(BytesCast, Debug)]
             #[repr(C)]
             pub(super) struct Node {
                 full_path: PathSlice,
                 /// In bytes from `self.full_path.start`
                 base_name_start: PathSize,
                 copy_source: OptPathSlice,
                 children: ChildNodes,
                 pub(super) descendants_with_entry_count: Size,
                 pub(super) tracked_descendants_count: Size,
                 flags: U16Be,
                 size: U32Be,
                 mtime: PackedTruncatedTimestamp,
             }
             bitflags! {
                 #[repr(C)]
                 struct Flags: u16 {
                     const WDIR_TRACKED = 1 << 0;
                     const P1_TRACKED = 1 << 1;
                     const P2_INFO = 1 << 2;
                     const MODE_EXEC_PERM = 1 << 3;
                     const MODE_IS_SYMLINK = 1 << 4;
                     const HAS_FALLBACK_EXEC = 1 <<  5;
                     const FALLBACK_EXEC = 1 <<  6;
                     const HAS_FALLBACK_SYMLINK = 1 <<  7;
                     const FALLBACK_SYMLINK = 1 <<  8;
                     const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
                     const HAS_MODE_AND_SIZE = 1 <<10;
                     const HAS_MTIME = 1 <<11;
                     const MTIME_SECOND_AMBIGUOUS = 1 << 12;
                     const DIRECTORY = 1 <<13;
                     const ALL_UNKNOWN_RECORDED = 1 <<14;
                     const ALL_IGNORED_RECORDED = 1 <<15;
                 }
             }
             /// Duration since the Unix epoch
             #[derive(BytesCast, Copy, Clone, Debug)]
             #[repr(C)]
             struct PackedTruncatedTimestamp {
                 truncated_seconds: U32Be,
                 nanoseconds: U32Be,
             }
             /// Counted in bytes from the start of the file
             ///
             /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
             type Offset = U32Be;
             /// Counted in number of items
             ///
             /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
             type Size = U32Be;
             /// Counted in bytes
             ///
             /// NOTE: we choose not to support file names/paths longer than 64 KiB.
             type PathSize = U16Be;
             /// A contiguous sequence of `len` times `Node`, representing the child nodes
             /// of either some other node or of the repository root.
             ///
             /// Always sorted by ascending `full_path`, to allow binary search.
             /// Since nodes with the same parent nodes also have the same parent path,
             /// only the `base_name`s need to be compared during binary search.
             #[derive(BytesCast, Copy, Clone, Debug)]
             #[repr(C)]
             struct ChildNodes {
                 start: Offset,
                 len: Size,
             }
             /// A `HgPath` of `len` bytes
             #[derive(BytesCast, Copy, Clone, Debug)]
             #[repr(C)]
             struct PathSlice {
                 start: Offset,
                 len: PathSize,
             }
             /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
             type OptPathSlice = PathSlice;
             /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
             ///
             /// This should only happen if Mercurial is buggy or a repository is corrupted.
             #[derive(Debug)]
             pub struct DirstateV2ParseError {
                 message: String,
             }
             impl DirstateV2ParseError {
                 pub fn new<S: Into<String>>(message: S) -> Self {
                     Self {
                         message: message.into(),
                     }
                 }
             }
             impl From<DirstateV2ParseError> for HgError {
                 fn from(e: DirstateV2ParseError) -> Self {
                     HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
                 }
             }
             impl From<DirstateV2ParseError> for crate::DirstateError {
                 fn from(error: DirstateV2ParseError) -> Self {
                     HgError::from(error).into()
                 }
             }
             impl TreeMetadata {
                 pub fn as_bytes(&self) -> &[u8] {
                     BytesCast::as_bytes(self)
                 }
             }
             impl<'on_disk> Docket<'on_disk> {
                 /// Generate the identifier for a new data file
                 ///
                 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
                 /// See `mercurial/revlogutils/docket.py`
                 pub fn new_uid() -> String {
                     const ID_LENGTH: usize = 8;
                     let mut id = String::with_capacity(ID_LENGTH);
                     let mut rng = rand::thread_rng();
                     for _ in 0..ID_LENGTH {
                         // One random hexadecimal digit.
                         // `unwrap` never panics because `impl Write for String`
                         // never returns an error.
                         write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
                     }
                     id
                 }
                 pub fn serialize(
                     parents: DirstateParents,
                     tree_metadata: TreeMetadata,
                     data_size: u64,
                     uuid: &[u8],
                 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
                     let header = DocketHeader {
                         marker: *V2_FORMAT_MARKER,
                         parent_1: parents.p1.pad_to_256_bits(),
                         parent_2: parents.p2.pad_to_256_bits(),
                         metadata: tree_metadata,
                         data_size: u32::try_from(data_size)?.into(),
                         uuid_size: uuid.len().try_into()?,
                     };
                     let header = header.as_bytes();
                     let mut docket = Vec::with_capacity(header.len() + uuid.len());
                     docket.extend_from_slice(header);
                     docket.extend_from_slice(uuid);
                     Ok(docket)
                 }
                 pub fn parents(&self) -> DirstateParents {
                     use crate::Node;
                     let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
                         .unwrap();
                     let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
                         .unwrap();
                     DirstateParents { p1, p2 }
                 }
                 pub fn tree_metadata(&self) -> &[u8] {
                     self.header.metadata.as_bytes()
                 }
                 pub fn data_size(&self) -> usize {
                     // This `unwrap` could only panic on a 16-bit CPU
                     self.header.data_size.get().try_into().unwrap()
                 }
                 pub fn data_filename(&self) -> String {
                     String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
                 }
             }
             pub fn read_docket(
                 on_disk: &[u8],
             ) -> Result<Docket<'_>, DirstateV2ParseError> {
                 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
                     DirstateV2ParseError::new(format!("when reading docket, {}", e))
                 })?;
                 let uuid_size = header.uuid_size as usize;
                 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
                     Ok(Docket { header, uuid })
                 } else {
                     Err(DirstateV2ParseError::new(
                         "invalid format marker or uuid size",
                     ))
                 }
             }
             pub(super) fn read<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
                 uuid: Vec<u8>,
                 identity: Option<u64>,
             ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
                 if on_disk.is_empty() {
                     let mut map = DirstateMap::empty(on_disk);
                     map.dirstate_version = DirstateVersion::V2;
                     return Ok(map);
                 }
                 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
                     DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
                 })?;
                 let dirstate_map = DirstateMap {
                     on_disk,
                     root: dirstate_map::ChildNodes::OnDisk(
                         read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
                             e.message = format!("{}, when reading root notes", e.message);
                             e
                         })?,
                     ),
                     nodes_with_entry_count: meta.nodes_with_entry_count.get(),
                     nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
                     ignore_patterns_hash: meta.ignore_patterns_hash,
                     unreachable_bytes: meta.unreachable_bytes.get(),
                     old_data_size: on_disk.len(),
                     old_uuid: Some(uuid),
                     identity,
                     dirstate_version: DirstateVersion::V2,
                     write_mode: DirstateMapWriteMode::Auto,
                 };
                 Ok(dirstate_map)
             }
             impl Node {
                 pub(super) fn full_path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     read_hg_path(on_disk, self.full_path)
                 }
                 pub(super) fn base_name_start(
                     &self,
                 ) -> Result<usize, DirstateV2ParseError> {
                     let start = self.base_name_start.get();
                     if start < self.full_path.len.get() {
-                        let start = usize::try_from(start)
+                        let start = usize::from(start);
-                            // u32 -> usize, could only panic on a 16-bit CPU
-                            .expect("dirstate-v2 base_name_start out of bounds");
                         Ok(start)
                     } else {
                         Err(DirstateV2ParseError::new("not enough bytes for base name"))
                     }
                 }
                 pub(super) fn base_name<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     let full_path = self.full_path(on_disk)?;
                     let base_name_start = self.base_name_start()?;
                     Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
                 }
                 pub(super) fn path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
                     Ok(WithBasename::from_raw_parts(
                         Cow::Borrowed(self.full_path(on_disk)?),
                         self.base_name_start()?,
                     ))
                 }
                 pub(super) fn has_copy_source(&self) -> bool {
                     self.copy_source.start.get() != 0
                 }
                 pub(super) fn copy_source<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
                     Ok(if self.has_copy_source() {
                         Some(read_hg_path(on_disk, self.copy_source)?)
                     } else {
                         None
                     })
                 }
                 fn flags(&self) -> Flags {
                     Flags::from_bits_truncate(self.flags.get())
                 }
                 fn has_entry(&self) -> bool {
                     self.flags().intersects(
                         Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
                     )
                 }
                 pub(super) fn node_data(
                     &self,
                 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
                     if self.has_entry() {
                         Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
                     } else if let Some(mtime) = self.cached_directory_mtime()? {
                         Ok(dirstate_map::NodeData::CachedDirectory { mtime })
                     } else {
                         Ok(dirstate_map::NodeData::None)
                     }
                 }
                 pub(super) fn cached_directory_mtime(
                     &self,
                 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
                     // For now we do not have code to handle the absence of
                     // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
                     // unset.
                     if self.flags().contains(Flags::DIRECTORY)
                         && self.flags().contains(Flags::HAS_MTIME)
                         && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
                     {
                         Ok(Some(self.mtime()?))
                     } else {
                         Ok(None)
                     }
                 }
                 fn synthesize_unix_mode(&self) -> u32 {
                     // Some platforms' libc don't have the same type (MacOS uses i32 here)
                     #[allow(clippy::unnecessary_cast)]
                     let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
                         libc::S_IFLNK as u32
                     } else {
                         libc::S_IFREG as u32
                     };
                     let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
 o755
                     } else {
 o644
                     };
                     file_type | permissions
                 }
                 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
                     let mut m: TruncatedTimestamp = self.mtime.try_into()?;
                     if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
                         m.second_ambiguous = true;
                     }
                     Ok(m)
                 }
                 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
                     // TODO: convert through raw bits instead?
                     let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
                     let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
                     let p2_info = self.flags().contains(Flags::P2_INFO);
                     let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
                         && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
                     {
                         Some((self.synthesize_unix_mode(), self.size.into()))
                     } else {
                         None
                     };
                     let mtime = if self.flags().contains(Flags::HAS_MTIME)
                         && !self.flags().contains(Flags::DIRECTORY)
                         && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
                     {
                         Some(self.mtime()?)
                     } else {
                         None
                     };
                     let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
                     {
                         Some(self.flags().contains(Flags::FALLBACK_EXEC))
                     } else {
                         None
                     };
                     let fallback_symlink =
                         if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
                             Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
                         } else {
                             None
                         };
                     Ok(DirstateEntry::from_v2_data(DirstateV2Data {
                         wc_tracked,
                         p1_tracked,
                         p2_info,
                         mode_size,
                         mtime,
                         fallback_exec,
                         fallback_symlink,
                     }))
                 }
                 pub(super) fn entry(
                     &self,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     if self.has_entry() {
                         Ok(Some(self.assume_entry()?))
                     } else {
                         Ok(None)
                     }
                 }
                 pub(super) fn children<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
                     read_nodes(on_disk, self.children)
                 }
                 pub(super) fn to_in_memory_node<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
                     Ok(dirstate_map::Node {
                         children: dirstate_map::ChildNodes::OnDisk(
                             self.children(on_disk)?,
                         ),
                         copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
                         data: self.node_data()?,
                         descendants_with_entry_count: self
                             .descendants_with_entry_count
                             .get(),
                         tracked_descendants_count: self.tracked_descendants_count.get(),
                     })
                 }
                 fn from_dirstate_entry(
                     entry: &DirstateEntry,
                 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
                     let DirstateV2Data {
                         wc_tracked,
                         p1_tracked,
                         p2_info,
                         mode_size: mode_size_opt,
                         mtime: mtime_opt,
                         fallback_exec,
                         fallback_symlink,
                     } = entry.v2_data();
                     // TODO: convert through raw flag bits instead?
                     let mut flags = Flags::empty();
                     flags.set(Flags::WDIR_TRACKED, wc_tracked);
                     flags.set(Flags::P1_TRACKED, p1_tracked);
                     flags.set(Flags::P2_INFO, p2_info);
                     // Some platforms' libc don't have the same type (MacOS uses i32 here)
                     #[allow(clippy::unnecessary_cast)]
                     let size = if let Some((m, s)) = mode_size_opt {
                         let exec_perm = m & (libc::S_IXUSR as u32) != 0;
                         let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
                         flags.set(Flags::MODE_EXEC_PERM, exec_perm);
                         flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
                         flags.insert(Flags::HAS_MODE_AND_SIZE);
                         s.into()
                     } else {
 .into()
                     };
                     let mtime = if let Some(m) = mtime_opt {
                         flags.insert(Flags::HAS_MTIME);
                         if m.second_ambiguous {
                             flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
                         };
                         m.into()
                     } else {
                         PackedTruncatedTimestamp::null()
                     };
                     if let Some(f_exec) = fallback_exec {
                         flags.insert(Flags::HAS_FALLBACK_EXEC);
                         if f_exec {
                             flags.insert(Flags::FALLBACK_EXEC);
                         }
                     }
                     if let Some(f_symlink) = fallback_symlink {
                         flags.insert(Flags::HAS_FALLBACK_SYMLINK);
                         if f_symlink {
                             flags.insert(Flags::FALLBACK_SYMLINK);
                         }
                     }
                     (flags, size, mtime)
                 }
             }
             fn read_hg_path(
                 on_disk: &[u8],
                 slice: PathSlice,
             ) -> Result<&HgPath, DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
             }
             fn read_nodes(
                 on_disk: &[u8],
                 slice: ChildNodes,
             ) -> Result<&[Node], DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get())
             }
             fn read_slice<T, Len>(
                 on_disk: &[u8],
                 start: Offset,
                 len: Len,
             ) -> Result<&[T], DirstateV2ParseError>
             where
                 T: BytesCast,
                 Len: TryInto<usize>,
             {
                 // Either `usize::MAX` would result in "out of bounds" error since a single
                 // `&[u8]` cannot occupy the entire addess space.
-                let start = start.get().try_into().unwrap_or(std::usize::MAX);
+                let start = start.get().try_into().unwrap_or(usize::MAX);
-                let len = len.try_into().unwrap_or(std::usize::MAX);
+                let len = len.try_into().unwrap_or(usize::MAX);
                 let bytes = match on_disk.get(start..) {
                     Some(bytes) => bytes,
                     None => {
                         return Err(DirstateV2ParseError::new(
                             "not enough bytes from disk",
                         ))
                     }
                 };
                 T::slice_from_bytes(bytes, len)
                     .map_err(|e| {
                         DirstateV2ParseError::new(format!("when reading a slice, {}", e))
                     })
                     .map(|(slice, _rest)| slice)
             }
             /// Returns new data and metadata, together with whether that data should be
             /// appended to the existing data file whose content is at
             /// `dirstate_map.on_disk` (true), instead of written to a new data file
             /// (false), and the previous size of data on disk.
             pub(super) fn write(
                 dirstate_map: &DirstateMap,
                 write_mode: DirstateMapWriteMode,
             ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
                 let append = match write_mode {
                     DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
                     DirstateMapWriteMode::ForceNewDataFile => false,
                     DirstateMapWriteMode::ForceAppend => true,
                 };
                 if append {
                     log::trace!("appending to the dirstate data file");
                 } else {
                     log::trace!("creating new dirstate data file");
                 }
                 // This ignores the space for paths, and for nodes without an entry.
                 // TODO: better estimate? Skip the `Vec` and write to a file directly?
                 let size_guess = std::mem::size_of::<Node>()
                     * dirstate_map.nodes_with_entry_count as usize;
                 let mut writer = Writer {
                     dirstate_map,
                     append,
                     out: Vec::with_capacity(size_guess),
                 };
                 let root_nodes = dirstate_map.root.as_ref();
                 for node in root_nodes.iter() {
                     // Catch some corruptions before we write to disk
                     let full_path = node.full_path(dirstate_map.on_disk)?;
                     let base_name = node.base_name(dirstate_map.on_disk)?;
                     if full_path != base_name {
                         let explanation = format!(
                             "Dirstate root node '{}' is not at the root",
                             full_path
                         );
                         return Err(HgError::corrupted(explanation).into());
                     }
                 }
                 let root_nodes = writer.write_nodes(root_nodes)?;
                 let unreachable_bytes = if append {
                     dirstate_map.unreachable_bytes
                 } else {
                 };
                 let meta = TreeMetadata {
                     root_nodes,
                     nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
                     nodes_with_copy_source_count: dirstate_map
                         .nodes_with_copy_source_count
                         .into(),
                     unreachable_bytes: unreachable_bytes.into(),
                     unused: [0; 4],
                     ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
                 };
                 Ok((writer.out, meta, append, dirstate_map.old_data_size))
             }
             struct Writer<'dmap, 'on_disk> {
                 dirstate_map: &'dmap DirstateMap<'on_disk>,
                 append: bool,
                 out: Vec<u8>,
             }
             impl Writer<'_, '_> {
                 fn write_nodes(
                     &mut self,
                     nodes: dirstate_map::ChildNodesRef,
                 ) -> Result<ChildNodes, DirstateError> {
                     // Reuse already-written nodes if possible
                     if self.append {
                         if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                             let start = self.on_disk_offset_of(nodes_slice).expect(
                                 "dirstate-v2 OnDisk nodes not found within on_disk",
                             );
                             let len = child_nodes_len_from_usize(nodes_slice.len());
                             return Ok(ChildNodes { start, len });
                         }
                     }
                     // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
                     // undefined iteration order. Sort to enable binary search in the
                     // written file.
                     let nodes = nodes.sorted();
                     let nodes_len = nodes.len();
                     // First accumulate serialized nodes in a `Vec`
                     let mut on_disk_nodes = Vec::with_capacity(nodes_len);
                     for node in nodes {
                         let children = node.children(self.dirstate_map.on_disk)?;
                         let full_path = node.full_path(self.dirstate_map.on_disk)?;
                         self.check_children(&children, full_path)?;
                         let children = self.write_nodes(children)?;
                         let full_path = self.write_path(full_path.as_bytes());
                         let copy_source = if let Some(source) =
                             node.copy_source(self.dirstate_map.on_disk)?
                         {
                             self.write_path(source.as_bytes())
                         } else {
                             PathSlice {
                                 start: 0.into(),
                                 len: 0.into(),
                             }
                         };
                         on_disk_nodes.push(match node {
                             NodeRef::InMemory(path, node) => {
                                 let (flags, size, mtime) = match &node.data {
                                     dirstate_map::NodeData::Entry(entry) => {
                                         Node::from_dirstate_entry(entry)
                                     }
                                     dirstate_map::NodeData::CachedDirectory { mtime } => {
                                         // we currently never set a mtime if unknown file
                                         // are present.
                                         // So if we have a mtime for a directory, we know
                                         // they are no unknown
                                         // files and we
                                         // blindly set ALL_UNKNOWN_RECORDED.
                                         //
                                         // We never set ALL_IGNORED_RECORDED since we
                                         // don't track that case
                                         // currently.
                                         let mut flags = Flags::DIRECTORY
                                             | Flags::HAS_MTIME
                                             | Flags::ALL_UNKNOWN_RECORDED;
                                         if mtime.second_ambiguous {
                                             flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
                                         }
                                         (flags, 0.into(), (*mtime).into())
                                     }
                                     dirstate_map::NodeData::None => (
                                         Flags::DIRECTORY,
 .into(),
                                         PackedTruncatedTimestamp::null(),
                                     ),
                                 };
                                 Node {
                                     children,
                                     copy_source,
                                     full_path,
                                     base_name_start: u16::try_from(path.base_name_start())
                                         // Could only panic for paths over 64 KiB
                                         .expect("dirstate-v2 path length overflow")
                                         .into(),
                                     descendants_with_entry_count: node
                                         .descendants_with_entry_count
                                         .into(),
                                     tracked_descendants_count: node
                                         .tracked_descendants_count
                                         .into(),
                                     flags: flags.bits().into(),
                                     size,
                                     mtime,
                                 }
                             }
                             NodeRef::OnDisk(node) => Node {
                                 children,
                                 copy_source,
                                 full_path,
                                 ..*node
                             },
                         })
                     }
                     // … so we can write them contiguously, after writing everything else
                     // they refer to.
                     let start = self.current_offset();
                     let len = child_nodes_len_from_usize(nodes_len);
                     self.out.extend(on_disk_nodes.as_bytes());
                     Ok(ChildNodes { start, len })
                 }
                 /// Catch some dirstate corruptions before writing them to disk
                 fn check_children(
                     &mut self,
                     children: &dirstate_map::ChildNodesRef,
                     full_path: &HgPath,
                 ) -> Result<(), DirstateError> {
                     for child in children.iter() {
                         let child_full_path =
                             child.full_path(self.dirstate_map.on_disk)?;
                         let prefix_length = child_full_path.len()
                             // remove the filename
                             - child.base_name(self.dirstate_map.on_disk)?.len()
                             // remove the slash
                             - 1;
                         let child_prefix = &child_full_path.as_bytes()[..prefix_length];
                         if child_prefix != full_path.as_bytes() {
                             let explanation = format!(
                                 "dirstate child node's path '{}' \
                                     does not start with its parent's path '{}'",
                                 child_full_path, full_path,
                             );
                             return Err(HgError::corrupted(explanation).into());
                         }
                     }
                     Ok(())
                 }
                 /// If the given slice of items is within `on_disk`, returns its offset
                 /// from the start of `on_disk`.
                 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
                 where
                     T: BytesCast,
                 {
                     fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
                         let start = slice.as_ptr() as usize;
                         let end = start + slice.len();
                         start..=end
                     }
                     let slice_addresses = address_range(slice.as_bytes());
                     let on_disk_addresses = address_range(self.dirstate_map.on_disk);
                     if on_disk_addresses.contains(slice_addresses.start())
                         && on_disk_addresses.contains(slice_addresses.end())
                     {
                         let offset = slice_addresses.start() - on_disk_addresses.start();
                         Some(offset_from_usize(offset))
                     } else {
                         None
                     }
                 }
                 fn current_offset(&mut self) -> Offset {
                     let mut offset = self.out.len();
                     if self.append {
                         offset += self.dirstate_map.on_disk.len()
                     }
                     offset_from_usize(offset)
                 }
                 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
                     let len = path_len_from_usize(slice.len());
                     // Reuse an already-written path if possible
                     if self.append {
                         if let Some(start) = self.on_disk_offset_of(slice) {
                             return PathSlice { start, len };
                         }
                     }
                     let start = self.current_offset();
                     self.out.extend(slice.as_bytes());
                     PathSlice { start, len }
                 }
             }
             fn offset_from_usize(x: usize) -> Offset {
                 u32::try_from(x)
                     // Could only panic for a dirstate file larger than 4 GiB
                     .expect("dirstate-v2 offset overflow")
                     .into()
             }
             fn child_nodes_len_from_usize(x: usize) -> Size {
                 u32::try_from(x)
                     // Could only panic with over 4 billion nodes
                     .expect("dirstate-v2 slice length overflow")
                     .into()
             }
             fn path_len_from_usize(x: usize) -> PathSize {
                 u16::try_from(x)
                     // Could only panic for paths over 64 KiB
                     .expect("dirstate-v2 path length overflow")
                     .into()
             }
             impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
                 fn from(timestamp: TruncatedTimestamp) -> Self {
                     Self {
                         truncated_seconds: timestamp.truncated_seconds().into(),
                         nanoseconds: timestamp.nanoseconds().into(),
                     }
                 }
             }
             impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
                 type Error = DirstateV2ParseError;
                 fn try_from(
                     timestamp: PackedTruncatedTimestamp,
                 ) -> Result<Self, Self::Error> {
                     Self::from_already_truncated(
                         timestamp.truncated_seconds.get(),
                         timestamp.nanoseconds.get(),
                         false,
                     )
                 }
             }
             impl PackedTruncatedTimestamp {
                 fn null() -> Self {
                     Self {
                         truncated_seconds: 0.into(),
                         nanoseconds: 0.into(),
                     }
                 }
             }

rust/hg-core/src/matchers.rs

0 +6 -2

             // matchers.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Structs and types for matching files and directories.
             use format_bytes::format_bytes;
             use once_cell::sync::OnceCell;
             use crate::{
                 dirstate::dirs_multiset::DirsChildrenMultiset,
                 filepatterns::{
                     build_single_regex, filter_subincludes, get_patterns_from_file,
                     PatternFileWarning, PatternResult,
                 },
                 utils::{
                     files::{dir_ancestors, find_dirs},
                     hg_path::{HgPath, HgPathBuf, HgPathError},
                     Escaped,
                 },
                 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
             };
             use crate::dirstate::status::IgnoreFnType;
             use crate::filepatterns::normalize_path_bytes;
             use std::collections::HashSet;
             use std::fmt::{Display, Error, Formatter};
             use std::path::{Path, PathBuf};
             use std::{borrow::ToOwned, collections::BTreeSet};
             #[derive(Debug, PartialEq)]
             pub enum VisitChildrenSet {
                 /// Don't visit anything
                 Empty,
                 /// Visit this directory and probably its children
                 This,
                 /// Only visit the children (both files and directories) if they
                 /// are mentioned in this set. (empty set corresponds to [Empty])
                 /// TODO Should we implement a `NonEmptyHashSet`?
                 Set(HashSet<HgPathBuf>),
                 /// Visit this directory and all subdirectories
                 /// (you can stop asking about the children set)
                 Recursive,
             }
             pub trait Matcher: core::fmt::Debug {
                 /// Explicitly listed files
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
                 /// Returns whether `filename` is in `file_set`
                 fn exact_match(&self, filename: &HgPath) -> bool;
                 /// Returns whether `filename` is matched by this matcher
                 fn matches(&self, filename: &HgPath) -> bool;
                 /// Decides whether a directory should be visited based on whether it
                 /// has potential matches in it or one of its subdirectories, and
                 /// potentially lists which subdirectories of that directory should be
                 /// visited. This is based on the match's primary, included, and excluded
                 /// patterns.
                 ///
                 /// # Example
                 ///
                 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
                 /// return the following values (assuming the implementation of
                 /// visit_children_set is capable of recognizing this; some implementations
                 /// are not).
                 ///
                 /// ```text
                 /// ```ignore
                 /// '' -> {'foo', 'qux'}
                 /// 'baz' -> set()
                 /// 'foo' -> {'bar'}
                 /// // Ideally this would be `Recursive`, but since the prefix nature of
                 /// // matchers is applied to the entire matcher, we have to downgrade this
                 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
                 /// // `RootFilesIn'-kind matcher being mixed in.
                 /// 'foo/bar' -> 'this'
                 /// 'qux' -> 'this'
                 /// ```
                 /// # Important
                 ///
                 /// Most matchers do not know if they're representing files or
                 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
                 /// file or a directory, so `visit_children_set('dir')` for most matchers
                 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
                 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
                 /// it may return `VisitChildrenSet::This`.
                 /// Do not rely on the return being a `HashSet` indicating that there are
                 /// no files in this dir to investigate (or equivalently that if there are
                 /// files to investigate in 'dir' that it will always return
                 /// `VisitChildrenSet::This`).
                 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
                 /// Matcher will match everything and `files_set()` will be empty:
                 /// optimization might be possible.
                 fn matches_everything(&self) -> bool;
                 /// Matcher will match exactly the files in `files_set()`: optimization
                 /// might be possible.
                 fn is_exact(&self) -> bool;
             }
             /// Matches everything.
             ///```
             /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
             ///
             /// let matcher = AlwaysMatcher;
             ///
             /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
             /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
             /// ```
             #[derive(Debug)]
             pub struct AlwaysMatcher;
             impl Matcher for AlwaysMatcher {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     None
                 }
                 fn exact_match(&self, _filename: &HgPath) -> bool {
                     false
                 }
                 fn matches(&self, _filename: &HgPath) -> bool {
                     true
                 }
                 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
                     VisitChildrenSet::Recursive
                 }
                 fn matches_everything(&self) -> bool {
                     true
                 }
                 fn is_exact(&self) -> bool {
                     false
                 }
             }
             /// Matches nothing.
             #[derive(Debug)]
             pub struct NeverMatcher;
             impl Matcher for NeverMatcher {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     None
                 }
                 fn exact_match(&self, _filename: &HgPath) -> bool {
                     false
                 }
                 fn matches(&self, _filename: &HgPath) -> bool {
                     false
                 }
                 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
                     VisitChildrenSet::Empty
                 }
                 fn matches_everything(&self) -> bool {
                     false
                 }
                 fn is_exact(&self) -> bool {
                     true
                 }
             }
             /// Matches the input files exactly. They are interpreted as paths, not
             /// patterns.
             ///
             ///```
             /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
             ///
             /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
             /// let matcher = FileMatcher::new(files).unwrap();
             ///
             /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
             /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
             /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
             /// ```
             #[derive(Debug)]
             pub struct FileMatcher {
                 files: HashSet<HgPathBuf>,
                 dirs: DirsMultiset,
                 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
             }
             impl FileMatcher {
                 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
                     let dirs = DirsMultiset::from_manifest(&files)?;
                     Ok(Self {
                         files: HashSet::from_iter(files),
                         dirs,
                         sorted_visitchildrenset_candidates: OnceCell::new(),
                     })
                 }
                 fn inner_matches(&self, filename: &HgPath) -> bool {
                     self.files.contains(filename.as_ref())
                 }
             }
             impl Matcher for FileMatcher {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     Some(&self.files)
                 }
                 fn exact_match(&self, filename: &HgPath) -> bool {
                     self.inner_matches(filename)
                 }
                 fn matches(&self, filename: &HgPath) -> bool {
                     self.inner_matches(filename)
                 }
                 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                     if self.files.is_empty() || !self.dirs.contains(directory) {
                         return VisitChildrenSet::Empty;
                     }
                     let compute_candidates = || -> BTreeSet<HgPathBuf> {
                         let mut candidates: BTreeSet<HgPathBuf> =
                             self.dirs.iter().cloned().collect();
                         candidates.extend(self.files.iter().cloned());
                         candidates.remove(HgPath::new(b""));
                         candidates
                     };
                     let candidates =
                         if directory.as_ref().is_empty() {
                             compute_candidates()
                         } else {
                             let sorted_candidates = self
                                 .sorted_visitchildrenset_candidates
                                 .get_or_init(compute_candidates);
                             let directory_bytes = directory.as_ref().as_bytes();
                             let start: HgPathBuf =
                                 format_bytes!(b"{}/", directory_bytes).into();
                             let start_len = start.len();
                             // `0` sorts after `/`
                             let end = format_bytes!(b"{}0", directory_bytes).into();
                             BTreeSet::from_iter(sorted_candidates.range(start..end).map(
                                 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
                             ))
                         };
                     // `self.dirs` includes all of the directories, recursively, so if
                     // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
                     // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                     // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
                     // subdir will be in there without a slash.
                     VisitChildrenSet::Set(
                         candidates
                             .into_iter()
                             .filter_map(|c| {
                                 if c.bytes().all(|b| *b != b'/') {
                                     Some(c)
                                 } else {
                                     None
                                 }
                             })
                             .collect(),
                     )
                 }
                 fn matches_everything(&self) -> bool {
                     false
                 }
                 fn is_exact(&self) -> bool {
                     true
                 }
             }
             /// Matches a set of (kind, pat, source) against a 'root' directory.
             /// (Currently the 'root' directory is effectively always empty)
             /// ```
             /// use hg::{
             ///     matchers::{PatternMatcher, Matcher},
             ///     IgnorePattern,
             ///     PatternSyntax,
             ///     utils::hg_path::{HgPath, HgPathBuf}
             /// };
             /// use std::collections::HashSet;
             /// use std::path::Path;
             /// ///
             /// let ignore_patterns : Vec<IgnorePattern> =
             ///     vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
             ///          IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
             ///          IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
             ///          IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
             ///     ];
             /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
             /// ///
             /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
             /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
             /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
             /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
             /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
             /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
             /// assert_eq!(matcher.file_set().unwrap(),
             ///            &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
             ///                            HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
             /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
             /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
             /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
             /// ```
             pub struct PatternMatcher<'a> {
                 patterns: Vec<u8>,
                 match_fn: IgnoreFnType<'a>,
                 /// Whether all the patterns match a prefix (i.e. recursively)
                 prefix: bool,
                 files: HashSet<HgPathBuf>,
                 dirs_explicit: HashSet<HgPathBuf>,
                 dirs: DirsMultiset,
             }
             impl core::fmt::Debug for PatternMatcher<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("PatternMatcher")
                         .field("patterns", &String::from_utf8_lossy(&self.patterns))
                         .field("prefix", &self.prefix)
                         .field("files", &self.files)
                         .field("dirs", &self.dirs)
                         .finish()
                 }
             }
             impl<'a> PatternMatcher<'a> {
                 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
                     let RootsDirsAndParents {
                         roots,
                         dirs: dirs_explicit,
                         parents,
                     } = roots_dirs_and_parents(&ignore_patterns)?;
                     let files = roots;
                     let dirs = parents;
                     let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
                     let prefix = ignore_patterns.iter().all(|k| {
                         matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
                     });
                     let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
                     Ok(Self {
                         patterns,
                         match_fn,
                         prefix,
                         files,
                         dirs,
                         dirs_explicit,
                     })
                 }
             }
             impl<'a> Matcher for PatternMatcher<'a> {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     Some(&self.files)
                 }
                 fn exact_match(&self, filename: &HgPath) -> bool {
                     self.files.contains(filename)
                 }
                 fn matches(&self, filename: &HgPath) -> bool {
                     if self.files.contains(filename) {
                         return true;
                     }
                     (self.match_fn)(filename)
                 }
                 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                     if self.prefix && self.files.contains(directory) {
                         return VisitChildrenSet::Recursive;
                     }
                     if self.dirs.contains(directory) {
                         return VisitChildrenSet::This;
                     }
                     if dir_ancestors(directory).any(|parent_dir| {
                         self.files.contains(parent_dir)
                             || self.dirs_explicit.contains(parent_dir)
                     }) {
                         VisitChildrenSet::This
                     } else {
                         VisitChildrenSet::Empty
                     }
                 }
                 fn matches_everything(&self) -> bool {
                     false
                 }
                 fn is_exact(&self) -> bool {
                     false
                 }
             }
             /// Matches files that are included in the ignore rules.
             /// ```
             /// use hg::{
             ///     matchers::{IncludeMatcher, Matcher},
             ///     IgnorePattern,
             ///     PatternSyntax,
             ///     utils::hg_path::HgPath
             /// };
             /// use std::path::Path;
             /// ///
             /// let ignore_patterns =
             /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
             /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
             /// ///
             /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
             /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
             /// ///
             /// let ignore_patterns =
             /// vec![IgnorePattern::new(PatternSyntax::RootFilesIn, b"dir/subdir", Path::new(""))];
             /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
             /// ///
             /// assert!(!matcher.matches(HgPath::new(b"file")));
             /// assert!(!matcher.matches(HgPath::new(b"dir/file")));
             /// assert!(matcher.matches(HgPath::new(b"dir/subdir/file")));
             /// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file")));
             /// ```
             pub struct IncludeMatcher<'a> {
                 patterns: Vec<u8>,
                 match_fn: IgnoreFnType<'a>,
                 /// Whether all the patterns match a prefix (i.e. recursively)
                 prefix: bool,
                 roots: HashSet<HgPathBuf>,
                 dirs: HashSet<HgPathBuf>,
                 parents: DirsMultiset,
             }
             impl core::fmt::Debug for IncludeMatcher<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("IncludeMatcher")
                         .field("patterns", &String::from_utf8_lossy(&self.patterns))
                         .field("prefix", &self.prefix)
                         .field("roots", &self.roots)
                         .field("dirs", &self.dirs)
                         .field("parents", &self.parents)
                         .finish()
                 }
             }
             impl<'a> Matcher for IncludeMatcher<'a> {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     None
                 }
                 fn exact_match(&self, _filename: &HgPath) -> bool {
                     false
                 }
                 fn matches(&self, filename: &HgPath) -> bool {
                     (self.match_fn)(filename)
                 }
                 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                     let dir = directory;
                     if self.prefix && self.roots.contains(dir) {
                         return VisitChildrenSet::Recursive;
                     }
                     if self.roots.contains(HgPath::new(b""))
                         || self.roots.contains(dir)
                         || self.dirs.contains(dir)
                         || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
                     {
                         return VisitChildrenSet::This;
                     }
                     if self.parents.contains(dir.as_ref()) {
                         let multiset = self.get_all_parents_children();
                         if let Some(children) = multiset.get(dir) {
                             return VisitChildrenSet::Set(
                                 children.iter().map(HgPathBuf::from).collect(),
                             );
                         }
                     }
                     VisitChildrenSet::Empty
                 }
                 fn matches_everything(&self) -> bool {
                     false
                 }
                 fn is_exact(&self) -> bool {
                     false
                 }
             }
             /// The union of multiple matchers. Will match if any of the matchers match.
             #[derive(Debug)]
             pub struct UnionMatcher {
                 matchers: Vec<Box<dyn Matcher + Sync>>,
             }
             impl Matcher for UnionMatcher {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     None
                 }
                 fn exact_match(&self, _filename: &HgPath) -> bool {
                     false
                 }
                 fn matches(&self, filename: &HgPath) -> bool {
                     self.matchers.iter().any(|m| m.matches(filename))
                 }
                 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                     let mut result = HashSet::new();
                     let mut this = false;
                     for matcher in self.matchers.iter() {
                         let visit = matcher.visit_children_set(directory);
                         match visit {
                             VisitChildrenSet::Empty => continue,
                             VisitChildrenSet::This => {
                                 this = true;
                                 // Don't break, we might have an 'all' in here.
                                 continue;
                             }
                             VisitChildrenSet::Set(set) => {
                                 result.extend(set);
                             }
                             VisitChildrenSet::Recursive => {
                                 return visit;
                             }
                         }
                     }
                     if this {
                         return VisitChildrenSet::This;
                     }
                     if result.is_empty() {
                         VisitChildrenSet::Empty
                     } else {
                         VisitChildrenSet::Set(result)
                     }
                 }
                 fn matches_everything(&self) -> bool {
                     // TODO Maybe if all are AlwaysMatcher?
                     false
                 }
                 fn is_exact(&self) -> bool {
                     false
                 }
             }
             impl UnionMatcher {
                 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
                     Self { matchers }
                 }
             }
             #[derive(Debug)]
             pub struct IntersectionMatcher {
                 m1: Box<dyn Matcher + Sync>,
                 m2: Box<dyn Matcher + Sync>,
                 files: Option<HashSet<HgPathBuf>>,
             }
             impl Matcher for IntersectionMatcher {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     self.files.as_ref()
                 }
                 fn exact_match(&self, filename: &HgPath) -> bool {
                     self.files.as_ref().map_or(false, |f| f.contains(filename))
                 }
                 fn matches(&self, filename: &HgPath) -> bool {
                     self.m1.matches(filename) && self.m2.matches(filename)
                 }
                 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                     let m1_set = self.m1.visit_children_set(directory);
                     if m1_set == VisitChildrenSet::Empty {
                         return VisitChildrenSet::Empty;
                     }
                     let m2_set = self.m2.visit_children_set(directory);
                     if m2_set == VisitChildrenSet::Empty {
                         return VisitChildrenSet::Empty;
                     }
                     if m1_set == VisitChildrenSet::Recursive {
                         return m2_set;
                     } else if m2_set == VisitChildrenSet::Recursive {
                         return m1_set;
                     }
                     match (&m1_set, &m2_set) {
                         (VisitChildrenSet::Recursive, _) => m2_set,
                         (_, VisitChildrenSet::Recursive) => m1_set,
                         (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
                             VisitChildrenSet::This
                         }
                         (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
                             let set: HashSet<_> = m1.intersection(m2).cloned().collect();
                             if set.is_empty() {
                                 VisitChildrenSet::Empty
                             } else {
                                 VisitChildrenSet::Set(set)
                             }
                         }
                         _ => unreachable!(),
                     }
                 }
                 fn matches_everything(&self) -> bool {
                     self.m1.matches_everything() && self.m2.matches_everything()
                 }
                 fn is_exact(&self) -> bool {
                     self.m1.is_exact() || self.m2.is_exact()
                 }
             }
             impl IntersectionMatcher {
                 pub fn new(
                     mut m1: Box<dyn Matcher + Sync>,
                     mut m2: Box<dyn Matcher + Sync>,
                 ) -> Self {
                     let files = if m1.is_exact() || m2.is_exact() {
                         if !m1.is_exact() {
                             std::mem::swap(&mut m1, &mut m2);
                         }
                         m1.file_set().map(|m1_files| {
-                            m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
+                            m1_files
+                                .iter()
+                                .filter(|&f| m2.matches(f))
+                                .cloned()
+                                .collect()
                         })
                     } else {
                         // without exact input file sets, we can't do an exact
                         // intersection, so we must over-approximate by
                         // unioning instead
                         m1.file_set().map(|m1_files| match m2.file_set() {
                             Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
                             None => m1_files.iter().cloned().collect(),
                         })
                     };
                     Self { m1, m2, files }
                 }
             }
             #[derive(Debug)]
             pub struct DifferenceMatcher {
                 base: Box<dyn Matcher + Sync>,
                 excluded: Box<dyn Matcher + Sync>,
                 files: Option<HashSet<HgPathBuf>>,
             }
             impl Matcher for DifferenceMatcher {
                 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                     self.files.as_ref()
                 }
                 fn exact_match(&self, filename: &HgPath) -> bool {
                     self.files.as_ref().map_or(false, |f| f.contains(filename))
                 }
                 fn matches(&self, filename: &HgPath) -> bool {
                     self.base.matches(filename) && !self.excluded.matches(filename)
                 }
                 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                     let excluded_set = self.excluded.visit_children_set(directory);
                     if excluded_set == VisitChildrenSet::Recursive {
                         return VisitChildrenSet::Empty;
                     }
                     let base_set = self.base.visit_children_set(directory);
                     // Possible values for base: 'recursive', 'this', set(...), set()
                     // Possible values for excluded:          'this', set(...), set()
                     // If excluded has nothing under here that we care about, return base,
                     // even if it's 'recursive'.
                     if excluded_set == VisitChildrenSet::Empty {
                         return base_set;
                     }
                     match base_set {
                         VisitChildrenSet::This | VisitChildrenSet::Recursive => {
                             // Never return 'recursive' here if excluded_set is any kind of
                             // non-empty (either 'this' or set(foo)), since excluded might
                             // return set() for a subdirectory.
                             VisitChildrenSet::This
                         }
                         set => {
                             // Possible values for base:         set(...), set()
                             // Possible values for excluded: 'this', set(...)
                             // We ignore excluded set results. They're possibly incorrect:
                             //  base = path:dir/subdir
                             //  excluded=rootfilesin:dir,
                             //  visit_children_set(''):
                             //   base returns {'dir'}, excluded returns {'dir'}, if we
                             //   subtracted we'd return set(), which is *not* correct, we
                             //   still need to visit 'dir'!
                             set
                         }
                     }
                 }
                 fn matches_everything(&self) -> bool {
                     false
                 }
                 fn is_exact(&self) -> bool {
                     self.base.is_exact()
                 }
             }
             impl DifferenceMatcher {
                 pub fn new(
                     base: Box<dyn Matcher + Sync>,
                     excluded: Box<dyn Matcher + Sync>,
                 ) -> Self {
                     let base_is_exact = base.is_exact();
                     let base_files = base.file_set().map(ToOwned::to_owned);
                     let mut new = Self {
                         base,
                         excluded,
                         files: None,
                     };
                     if base_is_exact {
                         new.files = base_files.map(|files| {
-                            files.iter().cloned().filter(|f| new.matches(f)).collect()
+                            files.iter().filter(|&f| new.matches(f)).cloned().collect()
                         });
                     }
                     new
                 }
             }
             /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
             /// contexts.
             ///
             /// The `status` algorithm makes heavy use of threads, and calling `is_match`
             /// from many threads at once is prone to contention, probably within the
             /// scratch space needed as the regex DFA is built lazily.
             ///
             /// We are in the process of raising the issue upstream, but for now
             /// the workaround used here is to store the `Regex` in a lazily populated
             /// thread-local variable, sharing the initial read-only compilation, but
             /// not the lazy dfa scratch space mentioned above.
             ///
             /// This reduces the contention observed with 16+ threads, but does not
             /// completely remove it. Hopefully this can be addressed upstream.
             struct RegexMatcher {
                 /// Compiled at the start of the status algorithm, used as a base for
                 /// cloning in each thread-local `self.local`, thus sharing the expensive
                 /// first compilation.
                 base: regex::bytes::Regex,
                 /// Thread-local variable that holds the `Regex` that is actually queried
                 /// from each thread.
                 local: thread_local::ThreadLocal<regex::bytes::Regex>,
             }
             impl RegexMatcher {
                 /// Returns whether the path matches the stored `Regex`.
                 pub fn is_match(&self, path: &HgPath) -> bool {
                     self.local
                         .get_or(|| self.base.clone())
                         .is_match(path.as_bytes())
                 }
             }
             /// Return a `RegexBuilder` from a bytes pattern
             ///
             /// This works around the fact that even if it works on byte haysacks,
             /// [`regex::bytes::Regex`] still uses UTF-8 patterns.
             pub fn re_bytes_builder(pattern: &[u8]) -> regex::bytes::RegexBuilder {
                 use std::io::Write;
                 // The `regex` crate adds `.*` to the start and end of expressions if there
                 // are no anchors, so add the start anchor.
                 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
                 for byte in pattern {
                     if *byte > 127 {
                         write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
                     } else {
                         escaped_bytes.push(*byte);
                     }
                 }
                 escaped_bytes.push(b')');
                 // Avoid the cost of UTF8 checking
                 //
                 // # Safety
                 // This is safe because we escaped all non-ASCII bytes.
                 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
                 regex::bytes::RegexBuilder::new(&pattern_string)
             }
             /// Returns a function that matches an `HgPath` against the given regex
             /// pattern.
             ///
             /// This can fail when the pattern is invalid or not supported by the
             /// underlying engine (the `regex` crate), for instance anything with
             /// back-references.
             #[logging_timer::time("trace")]
             fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
                 let re = re_bytes_builder(pattern)
                     .unicode(false)
                     // Big repos with big `.hgignore` will hit the default limit and
                     // incur a significant performance hit. One repo's `hg status` hit
                     // multiple *minutes*.
                     .dfa_size_limit(50 * (1 << 20))
                     .build()
                     .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
                 Ok(RegexMatcher {
                     base: re,
                     local: Default::default(),
                 })
             }
             /// Returns the regex pattern and a function that matches an `HgPath` against
             /// said regex formed by the given ignore patterns.
             fn build_regex_match<'a>(
                 ignore_patterns: &[IgnorePattern],
                 glob_suffix: &[u8],
             ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
                 let mut regexps = vec![];
                 let mut exact_set = HashSet::new();
                 for pattern in ignore_patterns {
                     if let Some(re) = build_single_regex(pattern, glob_suffix)? {
                         regexps.push(re);
                     } else {
                         let exact = normalize_path_bytes(&pattern.pattern);
                         exact_set.insert(HgPathBuf::from_bytes(&exact));
                     }
                 }
                 let full_regex = regexps.join(&b'|');
                 // An empty pattern would cause the regex engine to incorrectly match the
                 // (empty) root directory
                 let func = if !(regexps.is_empty()) {
                     let matcher = re_matcher(&full_regex)?;
                     let func = move |filename: &HgPath| {
                         exact_set.contains(filename) || matcher.is_match(filename)
                     };
                     Box::new(func) as IgnoreFnType
                 } else {
                     let func = move |filename: &HgPath| exact_set.contains(filename);
                     Box::new(func) as IgnoreFnType
                 };
                 Ok((full_regex, func))
             }
             /// Returns roots and directories corresponding to each pattern.
             ///
             /// This calculates the roots and directories exactly matching the patterns and
             /// returns a tuple of (roots, dirs). It does not return other directories
             /// which may also need to be considered, like the parent directories.
             fn roots_and_dirs(
                 ignore_patterns: &[IgnorePattern],
             ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
                 let mut roots = Vec::new();
                 let mut dirs = Vec::new();
                 for ignore_pattern in ignore_patterns {
                     let IgnorePattern {
                         syntax, pattern, ..
                     } = ignore_pattern;
                     match syntax {
                         PatternSyntax::RootGlob | PatternSyntax::Glob => {
                             let mut root = HgPathBuf::new();
                             for p in pattern.split(|c| *c == b'/') {
                                 if p.iter()
                                     .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
                                 {
                                     break;
                                 }
                                 root.push(HgPathBuf::from_bytes(p).as_ref());
                             }
                             roots.push(root);
                         }
                         PatternSyntax::Path
                         | PatternSyntax::RelPath
                         | PatternSyntax::FilePath => {
                             let pat = HgPath::new(if pattern == b"." {
                                 &[] as &[u8]
                             } else {
                                 pattern
                             });
                             roots.push(pat.to_owned());
                         }
                         PatternSyntax::RootFilesIn => {
                             let pat = if pattern == b"." {
                                 &[] as &[u8]
                             } else {
                                 pattern
                             };
                             dirs.push(HgPathBuf::from_bytes(pat));
                         }
                         _ => {
                             roots.push(HgPathBuf::new());
                         }
                     }
                 }
                 (roots, dirs)
             }
             /// Paths extracted from patterns
             #[derive(Debug, PartialEq)]
             struct RootsDirsAndParents {
                 /// Directories to match recursively
                 pub roots: HashSet<HgPathBuf>,
                 /// Directories to match non-recursively
                 pub dirs: HashSet<HgPathBuf>,
                 /// Implicitly required directories to go to items in either roots or dirs
                 pub parents: DirsMultiset,
             }
             /// Extract roots, dirs and parents from patterns.
             fn roots_dirs_and_parents(
                 ignore_patterns: &[IgnorePattern],
             ) -> PatternResult<RootsDirsAndParents> {
                 let (roots, dirs) = roots_and_dirs(ignore_patterns);
                 let mut parents = DirsMultiset::from_manifest(&dirs)?;
                 for path in &roots {
                     parents.add_path(path)?
                 }
                 Ok(RootsDirsAndParents {
                     roots: HashSet::from_iter(roots),
                     dirs: HashSet::from_iter(dirs),
                     parents,
                 })
             }
             /// Returns a function that checks whether a given file (in the general sense)
             /// should be matched.
             fn build_match<'a>(
                 ignore_patterns: Vec<IgnorePattern>,
                 glob_suffix: &[u8],
             ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
                 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
                 // For debugging and printing
                 let mut patterns = vec![];
                 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
                 if !subincludes.is_empty() {
                     // Build prefix-based matcher functions for subincludes
                     let mut submatchers = FastHashMap::default();
                     let mut prefixes = vec![];
                     for sub_include in subincludes {
                         let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
                         let match_fn =
                             Box::new(move |path: &HgPath| matcher.matches(path));
                         prefixes.push(sub_include.prefix.clone());
                         submatchers.insert(sub_include.prefix.clone(), match_fn);
                     }
                     let match_subinclude = move |filename: &HgPath| {
                         for prefix in prefixes.iter() {
                             if let Some(rel) = filename.relative_to(prefix) {
                                 if (submatchers[prefix])(rel) {
                                     return true;
                                 }
                             }
                         }
                         false
                     };
                     match_funcs.push(Box::new(match_subinclude));
                 }
                 if !ignore_patterns.is_empty() {
                     // Either do dumb matching if all patterns are rootfiles, or match
                     // with a regex.
                     if ignore_patterns
                         .iter()
                         .all(|k| k.syntax == PatternSyntax::RootFilesIn)
                     {
                         let dirs: HashSet<_> = ignore_patterns
                             .iter()
                             .map(|k| k.pattern.to_owned())
                             .collect();
                         let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
                         let match_func = move |path: &HgPath| -> bool {
                             let path = path.as_bytes();
                             let i = path.iter().rposition(|a| *a == b'/');
                             let dir = if let Some(i) = i { &path[..i] } else { b"." };
                             dirs.contains(dir)
                         };
                         match_funcs.push(Box::new(match_func));
                         patterns.extend(b"rootfilesin: ");
                         dirs_vec.sort();
                         patterns.extend(dirs_vec.escaped_bytes());
                     } else {
                         let (new_re, match_func) =
                             build_regex_match(&ignore_patterns, glob_suffix)?;
                         patterns = new_re;
                         match_funcs.push(match_func)
                     }
                 }
                 Ok(if match_funcs.len() == 1 {
                     (patterns, match_funcs.remove(0))
                 } else {
                     (
                         patterns,
                         Box::new(move |f: &HgPath| -> bool {
                             match_funcs.iter().any(|match_func| match_func(f))
                         }),
                     )
                 })
             }
             /// Parses all "ignore" files with their recursive includes and returns a
             /// function that checks whether a given file (in the general sense) should be
             /// ignored.
             pub fn get_ignore_matcher<'a>(
                 mut all_pattern_files: Vec<PathBuf>,
                 root_dir: &Path,
                 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
             ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
                 let mut all_patterns = vec![];
                 let mut all_warnings = vec![];
                 // Sort to make the ordering of calls to `inspect_pattern_bytes`
                 // deterministic even if the ordering of `all_pattern_files` is not (such
                 // as when a iteration order of a Python dict or Rust HashMap is involved).
                 // Sort by "string" representation instead of the default by component
                 // (with a Rust-specific definition of a component)
                 all_pattern_files
                     .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
                 for pattern_file in &all_pattern_files {
                     let (patterns, warnings) = get_patterns_from_file(
                         pattern_file,
                         root_dir,
                         inspect_pattern_bytes,
                     )?;
                     all_patterns.extend(patterns.to_owned());
                     all_warnings.extend(warnings);
                 }
                 let matcher = IncludeMatcher::new(all_patterns)?;
                 Ok((matcher, all_warnings))
             }
             /// Parses all "ignore" files with their recursive includes and returns a
             /// function that checks whether a given file (in the general sense) should be
             /// ignored.
             pub fn get_ignore_function<'a>(
                 all_pattern_files: Vec<PathBuf>,
                 root_dir: &Path,
                 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
             ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
                 let res =
                     get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
                 res.map(|(matcher, all_warnings)| {
                     let res: IgnoreFnType<'a> =
                         Box::new(move |path: &HgPath| matcher.matches(path));
                     (res, all_warnings)
                 })
             }
             impl<'a> IncludeMatcher<'a> {
                 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
                     let RootsDirsAndParents {
                         roots,
                         dirs,
                         parents,
                     } = roots_dirs_and_parents(&ignore_patterns)?;
                     let prefix = ignore_patterns.iter().all(|k| {
                         matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
                     });
                     let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
                     Ok(Self {
                         patterns,
                         match_fn,
                         prefix,
                         roots,
                         dirs,
                         parents,
                     })
                 }
                 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
                     // TODO cache
                     let thing = self
                         .dirs
                         .iter()
                         .chain(self.roots.iter())
                         .chain(self.parents.iter());
                     DirsChildrenMultiset::new(thing, Some(self.parents.iter()))
                 }
                 pub fn debug_get_patterns(&self) -> &[u8] {
                     self.patterns.as_ref()
                 }
             }
             impl<'a> Display for IncludeMatcher<'a> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
                     // XXX What about exact matches?
                     // I'm not sure it's worth it to clone the HashSet and keep it
                     // around just in case someone wants to display the matcher, plus
                     // it's going to be unreadable after a few entries, but we need to
                     // inform in this display that exact matches are being used and are
                     // (on purpose) missing from the `includes`.
                     write!(
                         f,
                         "IncludeMatcher(includes='{}')",
                         String::from_utf8_lossy(&self.patterns.escaped_bytes())
                     )
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use pretty_assertions::assert_eq;
                 use std::collections::BTreeMap;
                 use std::collections::BTreeSet;
                 use std::fmt::Debug;
                 use std::path::Path;
                 #[test]
                 fn test_roots_and_dirs() {
                     let pats = vec![
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                     ];
                     let (roots, dirs) = roots_and_dirs(&pats);
                     assert_eq!(
                         roots,
                         vec!(
                             HgPathBuf::from_bytes(b"g/h"),
                             HgPathBuf::from_bytes(b"g/h"),
                             HgPathBuf::new()
                         ),
                     );
                     assert_eq!(dirs, vec!());
                 }
                 #[test]
                 fn test_roots_dirs_and_parents() {
                     let pats = vec![
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                     ];
                     let mut roots = HashSet::new();
                     roots.insert(HgPathBuf::from_bytes(b"g/h"));
                     roots.insert(HgPathBuf::new());
                     let dirs = HashSet::new();
                     let parents = DirsMultiset::from_manifest(&[
                         HgPathBuf::from_bytes(b"x"),
                         HgPathBuf::from_bytes(b"g/x"),
                         HgPathBuf::from_bytes(b"g/y"),
                     ])
                     .unwrap();
                     assert_eq!(
                         roots_dirs_and_parents(&pats).unwrap(),
                         RootsDirsAndParents {
                             roots,
                             dirs,
                             parents
                         }
                     );
                 }
                 #[test]
                 fn test_filematcher_visit_children_set() {
                     // Visitchildrenset
                     let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
                     let matcher = FileMatcher::new(files).unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"foo.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                 }
                 #[test]
                 fn test_filematcher_visit_children_set_files_and_dirs() {
                     let files = vec![
                         HgPathBuf::from_bytes(b"rootfile.txt"),
                         HgPathBuf::from_bytes(b"a/file1.txt"),
                         HgPathBuf::from_bytes(b"a/b/file2.txt"),
                         // No file in a/b/c
                         HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
                     ];
                     let matcher = FileMatcher::new(files).unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"a"));
                     set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"b"));
                     set.insert(HgPathBuf::from_bytes(b"file1.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"c"));
                     set.insert(HgPathBuf::from_bytes(b"file2.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"d"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b/c")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"file4.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                 }
                 #[test]
                 fn test_patternmatcher() {
                     // VisitdirPrefix
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::Path,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     // OPT: This should probably be Recursive if its parent is?
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // VisitchildrensetPrefix
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::Path,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     // OPT: This should probably be Recursive if its parent is?
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // VisitdirRootfilesin
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RootFilesIn,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     // VisitchildrensetRootfilesin
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RootFilesIn,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // FIXME: These should probably be {'dir'}, {'subdir'} and This,
                     // respectively
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     // VisitdirGlob
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::Glob,
                         b"dir/z*",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // OPT: these should probably be False.
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     // VisitchildrensetGlob
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::Glob,
                         b"dir/z*",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     // OPT: these should probably be Empty
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     // VisitdirFilepath
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::FilePath,
                         b"dir/z",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     // VisitchildrensetFilepath
                     let m = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::FilePath,
                         b"dir/z",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                 }
                 #[test]
                 fn test_includematcher() {
                     // VisitchildrensetPrefix
                     let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RelPath,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     // OPT: This should probably be 'all' if its parent is?
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // VisitchildrensetRootfilesin
                     let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RootFilesIn,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // VisitchildrensetGlob
                     let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::Glob,
                         b"dir/z*",
                         Path::new(""),
                     )])
                     .unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     // OPT: these should probably be set().
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     // VisitchildrensetFilePath
                     let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::FilePath,
                         b"dir/z",
                         Path::new(""),
                     )])
                     .unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"z"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     // OPT: these should probably be set().
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     // Test multiple patterns
                     let matcher = IncludeMatcher::new(vec![
                         IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                     ])
                     .unwrap();
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                     // Test multiple patterns
                     let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::Glob,
                         b"**/*.exe",
                         Path::new(""),
                     )])
                     .unwrap();
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::This
                     );
                 }
                 #[test]
                 fn test_unionmatcher() {
                     // Path + Rootfiles
                     let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RelPath,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RootFilesIn,
                         b"dir",
                         Path::new(""),
                     )])
                     .unwrap();
                     let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // OPT: These next two could be 'all' instead of 'this'.
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     // Path + unrelated Path
                     let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RelPath,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RelPath,
                         b"folder",
                         Path::new(""),
                     )])
                     .unwrap();
                     let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"folder"));
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Recursive
                     );
                     // OPT: These next two could be 'all' instead of 'this'.
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     // Path + subpath
                     let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RelPath,
                         b"dir/subdir/x",
                         Path::new(""),
                     )])
                     .unwrap();
                     let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RelPath,
                         b"dir/subdir",
                         Path::new(""),
                     )])
                     .unwrap();
                     let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Recursive
                     );
                     // OPT: this should probably be 'all' not 'this'.
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::This
                     );
                 }
                 #[test]
                 fn test_intersectionmatcher() {
                     // Include path + Include rootfiles
                     let m1 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let m2 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RootFilesIn,
                             b"dir",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let matcher = IntersectionMatcher::new(m1, m2);
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     // Non intersecting paths
                     let m1 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let m2 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"folder",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let matcher = IntersectionMatcher::new(m1, m2);
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     // Nested paths
                     let m1 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir/x",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let m2 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let matcher = IntersectionMatcher::new(m1, m2);
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"x"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::Empty
                     );
                     // OPT: this should probably be 'all' not 'this'.
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     // Diverging paths
                     let m1 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir/x",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let m2 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir/z",
                             Path::new(""),
                         )])
                         .unwrap(),
                     );
                     let matcher = IntersectionMatcher::new(m1, m2);
                     // OPT: these next two could probably be Empty as well.
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     // OPT: these next two could probably be Empty as well.
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                 }
                 #[test]
                 fn test_differencematcher() {
                     // Two alwaysmatchers should function like a nevermatcher
                     let m1 = AlwaysMatcher;
                     let m2 = AlwaysMatcher;
                     let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
                     for case in &[
                         &b""[..],
                         b"dir",
                         b"dir/subdir",
                         b"dir/subdir/z",
                         b"dir/foo",
                         b"dir/subdir/x",
                         b"folder",
                     ] {
                         assert_eq!(
                             matcher.visit_children_set(HgPath::new(case)),
                             VisitChildrenSet::Empty
                         );
                     }
                     // One always and one never should behave the same as an always
                     let m1 = AlwaysMatcher;
                     let m2 = NeverMatcher;
                     let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
                     for case in &[
                         &b""[..],
                         b"dir",
                         b"dir/subdir",
                         b"dir/subdir/z",
                         b"dir/foo",
                         b"dir/subdir/x",
                         b"folder",
                     ] {
                         assert_eq!(
                             matcher.visit_children_set(HgPath::new(case)),
                             VisitChildrenSet::Recursive
                         );
                     }
                     // Two include matchers
                     let m1 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir",
                             Path::new("/repo"),
                         )])
                         .unwrap(),
                     );
                     let m2 = Box::new(
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RootFilesIn,
                             b"dir",
                             Path::new("/repo"),
                         )])
                         .unwrap(),
                     );
                     let matcher = DifferenceMatcher::new(m1, m2);
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPathBuf::from_bytes(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/foo")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                 }
                 mod invariants {
                     pub mod visit_children_set {
                         use crate::{
                             matchers::{tests::Tree, Matcher, VisitChildrenSet},
                             utils::hg_path::HgPath,
                         };
                         #[allow(dead_code)]
                         #[derive(Debug)]
                         struct Error<'a, M> {
                             matcher: &'a M,
                             path: &'a HgPath,
                             matching: &'a Tree,
                             visit_children_set: &'a VisitChildrenSet,
                         }
                         fn holds(
                             matching: &Tree,
                             not_matching: &Tree,
                             vcs: &VisitChildrenSet,
                         ) -> bool {
                             match vcs {
                                 VisitChildrenSet::Empty => matching.is_empty(),
                                 VisitChildrenSet::This => {
                                     // `This` does not come with any obligations.
                                     true
                                 }
                                 VisitChildrenSet::Recursive => {
                                     // `Recursive` requires that *everything* in the
                                     // subtree matches. This
                                     // requirement is relied on for example in
                                     // DifferenceMatcher implementation.
                                     not_matching.is_empty()
                                 }
                                 VisitChildrenSet::Set(allowed_children) => {
                                     // `allowed_children` does not distinguish between
                                     // files and directories: if it's not included, it
                                     // must not be matched.
                                     for k in matching.dirs.keys() {
                                         if !(allowed_children.contains(k)) {
                                             return false;
                                         }
                                     }
                                     for k in matching.files.iter() {
                                         if !(allowed_children.contains(k)) {
                                             return false;
                                         }
                                     }
                                     true
                                 }
                             }
                         }
                         pub fn check<M: Matcher + std::fmt::Debug>(
                             matcher: &M,
                             path: &HgPath,
                             matching: &Tree,
                             not_matching: &Tree,
                             visit_children_set: &VisitChildrenSet,
                         ) {
                             if !holds(matching, not_matching, visit_children_set) {
                                 panic!(
                                     "{:#?}",
                                     Error {
                                         matcher,
                                         path,
                                         visit_children_set,
                                         matching
                                     }
                                 )
                             }
                         }
                     }
                 }
                 #[derive(Debug, Clone)]
                 pub struct Tree {
                     files: BTreeSet<HgPathBuf>,
                     dirs: BTreeMap<HgPathBuf, Tree>,
                 }
                 impl Tree {
                     fn len(&self) -> usize {
                         let mut n = 0;
                         n += self.files.len();
                         for d in self.dirs.values() {
                             n += d.len();
                         }
                         n
                     }
                     fn is_empty(&self) -> bool {
                         self.files.is_empty() && self.dirs.is_empty()
                     }
                     fn make(
                         files: BTreeSet<HgPathBuf>,
                         dirs: BTreeMap<HgPathBuf, Tree>,
                     ) -> Self {
                         Self {
                             files,
                             dirs: dirs
                                 .into_iter()
                                 .filter(|(_k, v)| (!(v.is_empty())))
                                 .collect(),
                         }
                     }
                     fn filter_and_check<M: Matcher + Debug>(
                         &self,
                         m: &M,
                         path: &HgPath,
                     ) -> (Self, Self) {
                         let (files1, files2): (BTreeSet<HgPathBuf>, BTreeSet<HgPathBuf>) =
                             self.files
                                 .iter()
                                 .map(|v| v.to_owned())
                                 .partition(|v| m.matches(&path.join(v)));
                         let (dirs1, dirs2): (
                             BTreeMap<HgPathBuf, Tree>,
                             BTreeMap<HgPathBuf, Tree>,
                         ) = self
                             .dirs
                             .iter()
                             .map(|(k, v)| {
                                 let path = path.join(k);
                                 let (t1, t2) = v.filter_and_check(m, &path);
                                 ((k.clone(), t1), (k.clone(), t2))
                             })
                             .unzip();
                         let matching = Self::make(files1, dirs1);
                         let not_matching = Self::make(files2, dirs2);
                         let vcs = m.visit_children_set(path);
                         invariants::visit_children_set::check(
                             m,
                             path,
                             &matching,
                             &not_matching,
                             &vcs,
                         );
                         (matching, not_matching)
                     }
                     fn check_matcher<M: Matcher + Debug>(
                         &self,
                         m: &M,
                         expect_count: usize,
                     ) {
                         let res = self.filter_and_check(m, &HgPathBuf::new());
                         if expect_count != res.0.len() {
                             eprintln!(
                                 "warning: expected {} matches, got {} for {:#?}",
                                 expect_count,
                                 res.0.len(),
                                 m
                             );
                         }
                     }
                 }
                 fn mkdir(children: &[(&[u8], &Tree)]) -> Tree {
                     let p = HgPathBuf::from_bytes;
                     let names = [
                         p(b"a"),
                         p(b"b.txt"),
                         p(b"file.txt"),
                         p(b"c.c"),
                         p(b"c.h"),
                         p(b"dir1"),
                         p(b"dir2"),
                         p(b"subdir"),
                     ];
                     let files: BTreeSet<HgPathBuf> = BTreeSet::from(names);
                     let dirs = children
                         .iter()
                         .map(|(name, t)| (p(name), (*t).clone()))
                         .collect();
                     Tree { files, dirs }
                 }
                 fn make_example_tree() -> Tree {
                     let leaf = mkdir(&[]);
                     let abc = mkdir(&[(b"d", &leaf)]);
                     let ab = mkdir(&[(b"c", &abc)]);
                     let a = mkdir(&[(b"b", &ab)]);
                     let dir = mkdir(&[(b"subdir", &leaf), (b"subdir.c", &leaf)]);
                     mkdir(&[(b"dir", &dir), (b"dir1", &dir), (b"dir2", &dir), (b"a", &a)])
                 }
                 #[test]
                 fn test_pattern_matcher_visit_children_set() {
                     let tree = make_example_tree();
                     let pattern_dir1_glob_c =
                         PatternMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::Glob,
                             b"dir1/*.c",
                             Path::new(""),
                         )])
                         .unwrap();
                     let pattern_dir1 = || {
                         PatternMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::Path,
                             b"dir1",
                             Path::new(""),
                         )])
                         .unwrap()
                     };
                     let pattern_dir1_a = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::Glob,
                         b"dir1/a",
                         Path::new(""),
                     )])
                     .unwrap();
                     let pattern_relglob_c = || {
                         PatternMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelGlob,
                             b"*.c",
                             Path::new(""),
                         )])
                         .unwrap()
                     };
                     let files = vec![HgPathBuf::from_bytes(b"dir/subdir/b.txt")];
                     let file_dir_subdir_b = FileMatcher::new(files).unwrap();
                     let files = vec![
                         HgPathBuf::from_bytes(b"file.txt"),
                         HgPathBuf::from_bytes(b"a/file.txt"),
                         HgPathBuf::from_bytes(b"a/b/file.txt"),
                         // No file in a/b/c
                         HgPathBuf::from_bytes(b"a/b/c/d/file.txt"),
                     ];
                     let file_abcdfile = FileMatcher::new(files).unwrap();
                     let rootfilesin_dir = PatternMatcher::new(vec![IgnorePattern::new(
                         PatternSyntax::RootFilesIn,
                         b"dir",
                         Path::new(""),
                     )])
                     .unwrap();
                     let pattern_filepath_dir_subdir =
                         PatternMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::FilePath,
                             b"dir/subdir",
                             Path::new(""),
                         )])
                         .unwrap();
                     let include_dir_subdir =
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir",
                             Path::new(""),
                         )])
                         .unwrap();
                     let more_includematchers = [
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::Glob,
                             b"dir/s*",
                             Path::new(""),
                         )])
                         .unwrap(),
                         // Test multiple patterns
                         IncludeMatcher::new(vec![
                             IgnorePattern::new(
                                 PatternSyntax::RelPath,
                                 b"dir",
                                 Path::new(""),
                             ),
                             IgnorePattern::new(PatternSyntax::Glob, b"s*", Path::new("")),
                         ])
                         .unwrap(),
                         // Test multiple patterns
                         IncludeMatcher::new(vec![IgnorePattern::new(
                             PatternSyntax::Glob,
                             b"**/*.c",
                             Path::new(""),
                         )])
                         .unwrap(),
                     ];
                     tree.check_matcher(&pattern_dir1(), 25);
                     tree.check_matcher(&pattern_dir1_a, 1);
                     tree.check_matcher(&pattern_dir1_glob_c, 2);
                     tree.check_matcher(&pattern_relglob_c(), 14);
                     tree.check_matcher(&AlwaysMatcher, 112);
                     tree.check_matcher(&NeverMatcher, 0);
                     tree.check_matcher(
                         &IntersectionMatcher::new(
                             Box::new(pattern_relglob_c()),
                             Box::new(pattern_dir1()),
                         ),
 ,
                     );
                     tree.check_matcher(
                         &UnionMatcher::new(vec![
                             Box::new(pattern_relglob_c()),
                             Box::new(pattern_dir1()),
                         ]),
 ,
                     );
                     tree.check_matcher(
                         &DifferenceMatcher::new(
                             Box::new(pattern_relglob_c()),
                             Box::new(pattern_dir1()),
                         ),
 ,
                     );
                     tree.check_matcher(&file_dir_subdir_b, 1);
                     tree.check_matcher(&file_abcdfile, 4);
                     tree.check_matcher(&rootfilesin_dir, 8);
                     tree.check_matcher(&pattern_filepath_dir_subdir, 1);
                     tree.check_matcher(&include_dir_subdir, 9);
                     tree.check_matcher(&more_includematchers[0], 17);
                     tree.check_matcher(&more_includematchers[1], 25);
                     tree.check_matcher(&more_includematchers[2], 35);
                 }
             }

rust/hg-core/src/revlog/changelog.rs

0 +2 -2

             use std::ascii::escape_default;
             use std::borrow::Cow;
             use std::collections::BTreeMap;
             use std::fmt::{Debug, Formatter};
             use std::{iter, str};
             use chrono::{DateTime, FixedOffset, NaiveDateTime};
             use itertools::{Either, Itertools};
             use crate::errors::HgError;
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::revlog::{Revlog, RevlogEntry, RevlogError};
             use crate::utils::hg_path::HgPath;
             use crate::vfs::Vfs;
             use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
             /// A specialized `Revlog` to work with changelog data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(
                     store_vfs: &Vfs,
                     options: RevlogOpenOptions,
                 ) -> Result<Self, HgError> {
                     let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogRevisionData` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.entry_for_checked_rev(rev)?.data()
                 }
                 /// Return the [`ChangelogEntry`] for the given revision number.
                 pub fn entry_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Same as [`Self::entry_for_rev`] for checked revisions.
                 fn entry_for_checked_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogEntry, RevlogError> {
                     let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
                     Ok(ChangelogEntry { revlog_entry })
                 }
                 /// Return the [`ChangelogRevisionData`] for the given revision number.
                 ///
                 /// This is a useful shortcut in case the caller does not need the
                 /// generic revlog information (parents, hashes etc). Otherwise
                 /// consider taking a [`ChangelogEntry`] with
                 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
                 pub fn data_for_rev(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     self.entry_for_rev(rev)?.data()
                 }
                 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     self.revlog.rev_from_node(node)
                 }
             }
             impl Graph for Changelog {
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     self.revlog.parents(rev)
                 }
             }
             /// A specialized `RevlogEntry` for `changelog` data format
             ///
             /// This is a `RevlogEntry` with the added semantics that the associated
             /// data should meet the requirements for `changelog`, materialized by
             /// the fact that `data()` constructs a `ChangelogRevisionData`.
             /// In case that promise would be broken, the `data` method returns an error.
             #[derive(Clone)]
             pub struct ChangelogEntry<'changelog> {
                 /// Same data, as a generic `RevlogEntry`.
                 pub(crate) revlog_entry: RevlogEntry<'changelog>,
             }
             impl<'changelog> ChangelogEntry<'changelog> {
                 pub fn data<'a>(
                     &'a self,
                 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
                     let bytes = self.revlog_entry.data()?;
                     if bytes.is_empty() {
                         Ok(ChangelogRevisionData::null())
                     } else {
                         Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
                             RevlogError::Other(HgError::CorruptedRepository(format!(
                                 "Invalid changelog data for revision {}: {:?}",
                                 self.revlog_entry.revision(),
                                 err
                             )))
                         })?)
                     }
                 }
                 /// Obtain a reference to the underlying `RevlogEntry`.
                 ///
                 /// This allows the caller to access the information that is common
                 /// to all revlog entries: revision number, node id, parent revisions etc.
                 pub fn as_revlog_entry(&self) -> &RevlogEntry {
                     &self.revlog_entry
                 }
                 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p1_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
                 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
                     Ok(self
                         .revlog_entry
                         .p2_entry()?
                         .map(|revlog_entry| Self { revlog_entry }))
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(PartialEq)]
             pub struct ChangelogRevisionData<'changelog> {
                 /// The data bytes of the `changelog` entry.
                 bytes: Cow<'changelog, [u8]>,
                 /// The end offset for the hex manifest (not including the newline)
                 manifest_end: usize,
                 /// The end offset for the user+email (not including the newline)
                 user_end: usize,
                 /// The end offset for the timestamp+timezone+extras (not including the
                 /// newline)
                 timestamp_end: usize,
                 /// The end offset for the file list (not including the newline)
                 files_end: usize,
             }
             impl<'changelog> ChangelogRevisionData<'changelog> {
                 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
                     let mut line_iter = bytes.split(|b| b == &b'\n');
                     let manifest_end = line_iter
                         .next()
                         .expect("Empty iterator from split()?")
                         .len();
                     let user_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after manifest line")
                     })?;
                     let user_end = manifest_end + 1 + user_slice.len();
                     let timestamp_slice = line_iter.next().ok_or_else(|| {
                         HgError::corrupted("Changeset data truncated after user line")
                     })?;
                     let timestamp_end = user_end + 1 + timestamp_slice.len();
                     let mut files_end = timestamp_end + 1;
                     loop {
                         let line = line_iter.next().ok_or_else(|| {
                             HgError::corrupted("Changeset data truncated in files list")
                         })?;
                         if line.is_empty() {
                             if files_end == bytes.len() {
                                 // The list of files ended with a single newline (there
                                 // should be two)
                                 return Err(HgError::corrupted(
                                     "Changeset data truncated after files list",
                                 ));
                             }
                             files_end -= 1;
                             break;
                         }
                         files_end += line.len() + 1;
                     }
                     Ok(Self {
                         bytes,
                         manifest_end,
                         user_end,
                         timestamp_end,
                         files_end,
                     })
                 }
                 fn null() -> Self {
                     Self::new(Cow::Borrowed(
                         b"0000000000000000000000000000000000000000\n\n0 0\n\n",
                     ))
                     .unwrap()
                 }
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes.split(|b| b == &b'\n')
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     let manifest_node_hex = &self.bytes[..self.manifest_end];
                     Node::from_hex_for_repo(manifest_node_hex)
                 }
                 /// The full user string (usually a name followed by an email enclosed in
                 /// angle brackets)
                 pub fn user(&self) -> &[u8] {
                     &self.bytes[self.manifest_end + 1..self.user_end]
                 }
                 /// The full timestamp line (timestamp in seconds, offset in seconds, and
                 /// possibly extras)
                 // TODO: We should expose this in a more useful way
                 pub fn timestamp_line(&self) -> &[u8] {
                     &self.bytes[self.user_end + 1..self.timestamp_end]
                 }
                 /// Parsed timestamp.
                 pub fn timestamp(&self) -> Result<DateTime<FixedOffset>, HgError> {
                     parse_timestamp(self.timestamp_line())
                 }
                 /// Optional commit extras.
                 pub fn extra(&self) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                     parse_timestamp_line_extra(self.timestamp_line())
                 }
                 /// The files changed in this revision.
                 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
                     if self.timestamp_end == self.files_end {
                         Either::Left(iter::empty())
                     } else {
                         Either::Right(
                             self.bytes[self.timestamp_end + 1..self.files_end]
                                 .split(|b| b == &b'\n')
                                 .map(HgPath::new),
                         )
                     }
                 }
                 /// The change description.
                 pub fn description(&self) -> &[u8] {
                     &self.bytes[self.files_end + 2..]
                 }
             }
             impl Debug for ChangelogRevisionData<'_> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("ChangelogRevisionData")
                         .field("bytes", &debug_bytes(&self.bytes))
                         .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
                         .field(
                             "user",
                             &debug_bytes(
                                 &self.bytes[self.manifest_end + 1..self.user_end],
                             ),
                         )
                         .field(
                             "timestamp",
                             &debug_bytes(
                                 &self.bytes[self.user_end + 1..self.timestamp_end],
                             ),
                         )
                         .field(
                             "files",
                             &debug_bytes(
                                 &self.bytes[self.timestamp_end + 1..self.files_end],
                             ),
                         )
                         .field(
                             "description",
                             &debug_bytes(&self.bytes[self.files_end + 2..]),
                         )
                         .finish()
                 }
             }
             fn debug_bytes(bytes: &[u8]) -> String {
                 String::from_utf8_lossy(
                     &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
                 )
                 .to_string()
             }
             /// Parse the raw bytes of the timestamp line from a changelog entry.
             ///
             /// According to the documentation in `hg help dates` and the
             /// implementation in `changelog.py`, the format of the timestamp line
             /// is `time tz extra\n` where:
             ///
             /// - `time` is an ASCII-encoded signed int or float denoting a UTC timestamp
             ///   as seconds since the UNIX epoch.
             ///
             /// - `tz` is the timezone offset as an ASCII-encoded signed integer denoting
             ///   seconds WEST of UTC (so negative for timezones east of UTC, which is the
             ///   opposite of the sign in ISO 8601 timestamps).
             ///
             /// - `extra` is an optional set of NUL-delimited key-value pairs, with the key
             ///   and value in each pair separated by an ASCII colon. Keys are limited to
             ///   ASCII letters, digits, hyphens, and underscores, whereas values can be
             ///   arbitrary bytes.
             fn parse_timestamp(
                 timestamp_line: &[u8],
             ) -> Result<DateTime<FixedOffset>, HgError> {
                 let mut parts = timestamp_line.splitn(3, |c| *c == b' ');
                 let timestamp_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timestamp"))?;
                 let timestamp_str = str::from_utf8(timestamp_bytes).map_err(|e| {
                     HgError::corrupted(format!("timestamp is not valid UTF-8: {e}"))
                 })?;
                 let timestamp_utc = timestamp_str
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("failed to parse timestamp: {e}"))
                     })
                     .and_then(|secs| {
                         NaiveDateTime::from_timestamp_opt(secs, 0).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "integer timestamp out of valid range: {secs}"
                             ))
                         })
                     })
                     // Attempt to parse the timestamp as a float if we can't parse
                     // it as an int. It doesn't seem like float timestamps are actually
                     // used in practice, but the Python code supports them.
                     .or_else(|_| parse_float_timestamp(timestamp_str))?;
                 let timezone_bytes = parts
                     .next()
                     .ok_or_else(|| HgError::corrupted("missing timezone"))?;
                 let timezone_secs: i32 = str::from_utf8(timezone_bytes)
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not valid UTF-8: {e}"))
                     })?
                     .parse()
                     .map_err(|e| {
                         HgError::corrupted(format!("timezone is not an integer: {e}"))
                     })?;
                 let timezone = FixedOffset::west_opt(timezone_secs)
                     .ok_or_else(|| HgError::corrupted("timezone offset out of bounds"))?;
                 Ok(DateTime::from_naive_utc_and_offset(timestamp_utc, timezone))
             }
             /// Attempt to parse the given string as floating-point timestamp, and
             /// convert the result into a `chrono::NaiveDateTime`.
             fn parse_float_timestamp(
                 timestamp_str: &str,
             ) -> Result<NaiveDateTime, HgError> {
                 let timestamp = timestamp_str.parse::<f64>().map_err(|e| {
                     HgError::corrupted(format!("failed to parse timestamp: {e}"))
                 })?;
                 // To construct a `NaiveDateTime` we'll need to convert the float
                 // into signed integer seconds and unsigned integer nanoseconds.
                 let mut secs = timestamp.trunc() as i64;
                 let mut subsecs = timestamp.fract();
                 // If the timestamp is negative, we need to express the fractional
                 // component as positive nanoseconds since the previous second.
                 if timestamp < 0.0 {
                     secs -= 1;
                     subsecs += 1.0;
                 }
                 // This cast should be safe because the fractional component is
                 // by definition less than 1.0, so this value should not exceed
                 // 1 billion, which is representable as an f64 without loss of
                 // precision and should fit into a u32 without overflowing.
                 //
                 // (Any loss of precision in the fractional component will have
                 // already happened at the time of initial parsing; in general,
                 // f64s are insufficiently precise to provide nanosecond-level
                 // precision with present-day timestamps.)
                 let nsecs = (subsecs * 1_000_000_000.0) as u32;
                 NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or_else(|| {
                     HgError::corrupted(format!(
                         "float timestamp out of valid range: {timestamp}"
                     ))
                 })
             }
             /// Decode changeset extra fields.
             ///
             /// Extras are null-delimited key-value pairs where the key consists of ASCII
             /// alphanumeric characters plus hyphens and underscores, and the value can
             /// contain arbitrary bytes.
             fn decode_extra(extra: &[u8]) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 extra
                     .split(|c| *c == b'\0')
                     .map(|pair| {
                         let pair = unescape_extra(pair);
                         let mut iter = pair.splitn(2, |c| *c == b':');
                         let key_bytes =
                             iter.next().filter(|k| !k.is_empty()).ok_or_else(|| {
                                 HgError::corrupted("empty key in changeset extras")
                             })?;
                         let key = str::from_utf8(key_bytes)
                             .ok()
                             .filter(|k| {
                                 k.chars().all(|c| {
                                     c.is_ascii_alphanumeric() || c == '_' || c == '-'
                                 })
                             })
                             .ok_or_else(|| {
                                 let key = String::from_utf8_lossy(key_bytes);
                                 HgError::corrupted(format!(
                                     "invalid key in changeset extras: {key}",
                                 ))
                             })?
                             .to_string();
                         let value = iter.next().map(Into::into).ok_or_else(|| {
                             HgError::corrupted(format!(
                                 "missing value for changeset extra: {key}"
                             ))
                         })?;
                         Ok((key, value))
                     })
                     .collect()
             }
             /// Parse the extra fields from a changeset's timestamp line.
             fn parse_timestamp_line_extra(
                 timestamp_line: &[u8],
             ) -> Result<BTreeMap<String, Vec<u8>>, HgError> {
                 Ok(timestamp_line
                     .splitn(3, |c| *c == b' ')
                     .nth(2)
                     .map(decode_extra)
                     .transpose()?
                     .unwrap_or_default())
             }
             /// Decode Mercurial's escaping for changelog extras.
             ///
             /// The `_string_escape` function in `changelog.py` only escapes 4 characters
             /// (null, backslash, newline, and carriage return) so we only decode those.
             ///
             /// The Python code also includes a workaround for decoding escaped nuls
             /// that are followed by an ASCII octal digit, since Python's built-in
             /// `string_escape` codec will interpret that as an escaped octal byte value.
             /// That workaround is omitted here since we don't support decoding octal.
             fn unescape_extra(bytes: &[u8]) -> Vec<u8> {
                 let mut output = Vec::with_capacity(bytes.len());
                 let mut input = bytes.iter().copied();
                 while let Some(c) = input.next() {
                     if c != b'\\' {
                         output.push(c);
                         continue;
                     }
                     match input.next() {
                         Some(b'0') => output.push(b'\0'),
                         Some(b'\\') => output.push(b'\\'),
                         Some(b'n') => output.push(b'\n'),
                         Some(b'r') => output.push(b'\r'),
                         // The following cases should never occur in theory because any
                         // backslashes in the original input should have been escaped
                         // with another backslash, so it should not be possible to
                         // observe an escape sequence other than the 4 above.
                         Some(c) => output.extend_from_slice(&[b'\\', c]),
                         None => output.push(b'\\'),
                     }
                 }
                 output
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::vfs::Vfs;
                 use crate::NULL_REVISION;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_create_changelogrevisiondata_invalid() {
                     // Completely empty
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after manifest
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
                     // No newline after user
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
                     // No newline after timestamp
                     assert!(
                         ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
                     );
                     // Missing newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2"
                     ))
                     .is_err(),);
                     // Only one newline after files
                     assert!(ChangelogRevisionData::new(Cow::Borrowed(
                         b"abcd\n\n0 0\nfile1\nfile2\n"
                     ))
                     .is_err(),);
                 }
                 #[test]
                 fn test_create_changelogrevisiondata() {
                     let data = ChangelogRevisionData::new(Cow::Borrowed(
                         b"0123456789abcdef0123456789abcdef01234567
             Some One <someone@example.com>
 0
             file1
             file2
             some
             commit
             message",
                     ))
                     .unwrap();
                     assert_eq!(
                         data.manifest_node().unwrap(),
                         Node::from_hex("0123456789abcdef0123456789abcdef01234567")
                             .unwrap()
                     );
                     assert_eq!(data.user(), b"Some One <someone@example.com>");
                     assert_eq!(data.timestamp_line(), b"0 0");
                     assert_eq!(
                         data.files().collect_vec(),
                         vec![HgPath::new("file1"), HgPath::new("file2")]
                     );
                     assert_eq!(data.description(), b"some\ncommit\nmessage");
                 }
                 #[test]
                 fn test_data_from_rev_null() -> Result<(), RevlogError> {
                     // an empty revlog will be enough for this case
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog =
                         Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
                             .unwrap();
                     let changelog = Changelog { revlog };
                     assert_eq!(
                         changelog.data_for_rev(NULL_REVISION.into())?,
                         ChangelogRevisionData::null()
                     );
                     // same with the intermediate entry object
                     assert_eq!(
                         changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
                         ChangelogRevisionData::null()
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_empty_files_list() {
                     assert!(ChangelogRevisionData::null()
                         .files()
                         .collect_vec()
                         .is_empty());
                 }
                 #[test]
                 fn test_unescape_basic() {
                     // '\0', '\\', '\n', and '\r' are correctly unescaped.
                     let expected = b"AAA\0BBB\\CCC\nDDD\rEEE";
                     let escaped = br"AAA\0BBB\\CCC\nDDD\rEEE";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_unsupported_sequence() {
                     // Other escape sequences are left unaltered.
                     for c in 0u8..255 {
                         match c {
                             b'0' | b'\\' | b'n' | b'r' => continue,
                             c => {
                                 let expected = &[b'\\', c][..];
                                 let unescaped = unescape_extra(expected);
                                 assert_eq!(expected, &unescaped[..]);
                             }
                         }
                     }
                 }
                 #[test]
                 fn test_unescape_trailing_backslash() {
                     // Trailing backslashes are OK.
                     let expected = br"hi\";
                     let unescaped = unescape_extra(expected);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_unescape_nul_followed_by_octal() {
                     // Escaped NUL chars followed by octal digits are decoded correctly.
-                    let expected = b"\012";
+                    let expected = b"\x0012";
                     let escaped = br"\012";
                     let unescaped = unescape_extra(escaped);
                     assert_eq!(&expected[..], &unescaped[..]);
                 }
                 #[test]
                 fn test_parse_float_timestamp() {
                     let test_cases = [
                         // Zero should map to the UNIX epoch.
                         ("0.0", "1970-01-01 00:00:00"),
                         // Negative zero should be the same as positive zero.
                         ("-0.0", "1970-01-01 00:00:00"),
                         // Values without fractional components should work like integers.
                         // (Assuming the timestamp is within the limits of f64 precision.)
                         ("1115154970.0", "2005-05-03 21:16:10"),
                         // We expect some loss of precision in the fractional component
                         // when parsing arbitrary floating-point values.
                         ("1115154970.123456789", "2005-05-03 21:16:10.123456716"),
                         // But representable f64 values should parse losslessly.
                         ("1115154970.123456716", "2005-05-03 21:16:10.123456716"),
                         // Negative fractional components are subtracted from the epoch.
                         ("-1.333", "1969-12-31 23:59:58.667"),
                     ];
                     for (input, expected) in test_cases {
                         let res = parse_float_timestamp(input).unwrap().to_string();
                         assert_eq!(res, expected);
                     }
                 }
                 fn escape_extra(bytes: &[u8]) -> Vec<u8> {
                     let mut output = Vec::with_capacity(bytes.len());
                     for c in bytes.iter().copied() {
                         output.extend_from_slice(match c {
                             b'\0' => &b"\\0"[..],
                             b'\\' => &b"\\\\"[..],
                             b'\n' => &b"\\n"[..],
                             b'\r' => &b"\\r"[..],
                             _ => {
                                 output.push(c);
                                 continue;
                             }
                         });
                     }
                     output
                 }
                 fn encode_extra<K, V>(pairs: impl IntoIterator<Item = (K, V)>) -> Vec<u8>
                 where
                     K: AsRef<[u8]>,
                     V: AsRef<[u8]>,
                 {
                     let extras = pairs.into_iter().map(|(k, v)| {
                         escape_extra(&[k.as_ref(), b":", v.as_ref()].concat())
                     });
                     // Use fully-qualified syntax to avoid a future naming conflict with
                     // the standard library: https://github.com/rust-lang/rust/issues/79524
                     Itertools::intersperse(extras, b"\0".to_vec()).concat()
                 }
                 #[test]
                 fn test_decode_extra() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let encoded = encode_extra(&extra);
                     let decoded = decode_extra(&encoded).unwrap();
                     assert_eq!(extra, decoded);
                 }
                 #[test]
                 fn test_corrupt_extra() {
                     let test_cases = [
                         (&b""[..], "empty input"),
                         (&b"\0"[..], "unexpected null byte"),
                         (&b":empty-key"[..], "empty key"),
                         (&b"\0leading-null:"[..], "leading null"),
                         (&b"trailing-null:\0"[..], "trailing null"),
                         (&b"missing-value"[..], "missing value"),
                         (&b"$!@# non-alphanum-key:"[..], "non-alphanumeric key"),
                         (&b"\xF0\x9F\xA6\x80 non-ascii-key:"[..], "non-ASCII key"),
                     ];
                     for (extra, msg) in test_cases {
                         assert!(
-                            decode_extra(&extra).is_err(),
+                            decode_extra(extra).is_err(),
                             "corrupt extra should have failed to parse: {}",
                             msg
                         );
                     }
                 }
                 #[test]
                 fn test_parse_timestamp_line() {
                     let extra = [
                         ("branch".into(), b"default".to_vec()),
                         ("key-with-hyphens".into(), b"value1".to_vec()),
                         ("key_with_underscores".into(), b"value2".to_vec()),
                         ("empty-value".into(), b"".to_vec()),
                         ("binary-value".into(), (0u8..=255).collect::<Vec<_>>()),
                     ]
                     .into_iter()
                     .collect::<BTreeMap<String, Vec<u8>>>();
                     let mut line: Vec<u8> = b"1115154970 28800 ".to_vec();
                     line.extend_from_slice(&encode_extra(&extra));
                     let timestamp = parse_timestamp(&line).unwrap();
                     assert_eq!(&timestamp.to_rfc3339(), "2005-05-03T13:16:10-08:00");
                     let parsed_extra = parse_timestamp_line_extra(&line).unwrap();
                     assert_eq!(extra, parsed_extra);
                 }
             }

rust/hg-core/src/revlog/index.rs

0 +6 -3

             use std::collections::{HashMap, HashSet};
             use std::fmt::Debug;
             use std::ops::Deref;
             use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
             use bitvec::prelude::*;
             use byteorder::{BigEndian, ByteOrder};
             use bytes_cast::{unaligned, BytesCast};
             use super::REVIDX_KNOWN_FLAGS;
             use crate::errors::HgError;
             use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES};
             use crate::revlog::node::Node;
             use crate::revlog::{Revision, NULL_REVISION};
             use crate::{
                 dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError,
                 RevlogIndex, UncheckedRevision,
             };
             pub const INDEX_ENTRY_SIZE: usize = 64;
             pub const INDEX_HEADER_SIZE: usize = 4;
             pub const COMPRESSION_MODE_INLINE: u8 = 2;
             #[derive(Debug)]
             pub struct IndexHeader {
                 pub(super) header_bytes: [u8; INDEX_HEADER_SIZE],
             }
             #[derive(Copy, Clone)]
             pub struct IndexHeaderFlags {
                 flags: u16,
             }
             /// Corresponds to the high bits of `_format_flags` in python
             impl IndexHeaderFlags {
                 /// Corresponds to FLAG_INLINE_DATA in python
                 pub fn is_inline(self) -> bool {
                     self.flags & 1 != 0
                 }
                 /// Corresponds to FLAG_GENERALDELTA in python
                 pub fn uses_generaldelta(self) -> bool {
                     self.flags & 2 != 0
                 }
             }
             /// Corresponds to the INDEX_HEADER structure,
             /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
             impl IndexHeader {
                 fn format_flags(&self) -> IndexHeaderFlags {
                     // No "unknown flags" check here, unlike in python. Maybe there should
                     // be.
                     IndexHeaderFlags {
                         flags: BigEndian::read_u16(&self.header_bytes[0..2]),
                     }
                 }
                 /// The only revlog version currently supported by rhg.
                 const REVLOGV1: u16 = 1;
                 /// Corresponds to `_format_version` in Python.
                 fn format_version(&self) -> u16 {
                     BigEndian::read_u16(&self.header_bytes[2..4])
                 }
                 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
                     if index_bytes.is_empty() {
                         return Ok(None);
                     }
                     if index_bytes.len() < 4 {
                         return Err(HgError::corrupted(
                             "corrupted revlog: can't read the index format header",
                         ));
                     }
                     Ok(Some(IndexHeader {
                         header_bytes: {
                             let bytes: [u8; 4] =
                                 index_bytes[0..4].try_into().expect("impossible");
                             bytes
                         },
                     }))
                 }
             }
             /// Abstracts the access to the index bytes since they can be spread between
             /// the immutable (bytes) part and the mutable (added) part if any appends
             /// happened. This makes it transparent for the callers.
             struct IndexData {
                 /// Immutable bytes, most likely taken from disk
                 bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
                 /// Used when stripping index contents, keeps track of the start of the
                 /// first stripped revision, which is used to give a slice of the
                 /// `bytes` field.
                 truncation: Option<usize>,
                 /// Bytes that were added after reading the index
                 added: Vec<u8>,
                 first_entry: [u8; INDEX_ENTRY_SIZE],
             }
             impl IndexData {
                 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self {
                     let mut first_entry = [0; INDEX_ENTRY_SIZE];
                     if bytes.len() >= INDEX_ENTRY_SIZE {
                         first_entry[INDEX_HEADER_SIZE..]
                             .copy_from_slice(&bytes[INDEX_HEADER_SIZE..INDEX_ENTRY_SIZE])
                     }
                     Self {
                         bytes,
                         truncation: None,
                         added: vec![],
                         first_entry,
                     }
                 }
                 pub fn len(&self) -> usize {
                     match self.truncation {
                         Some(truncation) => truncation + self.added.len(),
                         None => self.bytes.len() + self.added.len(),
                     }
                 }
                 fn remove(
                     &mut self,
                     rev: Revision,
                     offsets: Option<&[usize]>,
                 ) -> Result<(), RevlogError> {
                     let rev = rev.0 as usize;
                     let truncation = if let Some(offsets) = offsets {
                         offsets[rev]
                     } else {
                         rev * INDEX_ENTRY_SIZE
                     };
                     if truncation < self.bytes.len() {
                         self.truncation = Some(truncation);
                         self.added.clear();
                     } else {
                         self.added.truncate(truncation - self.bytes.len());
                     }
                     Ok(())
                 }
                 fn is_new(&self) -> bool {
                     self.bytes.is_empty()
                 }
             }
             impl std::ops::Index<std::ops::Range<usize>> for IndexData {
                 type Output = [u8];
                 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
                     let start = index.start;
                     let end = index.end;
                     let immutable_len = match self.truncation {
                         Some(truncation) => truncation,
                         None => self.bytes.len(),
                     };
                     if start < immutable_len {
                         if end > immutable_len {
                             panic!("index data cannot span existing and added ranges");
                         }
                         &self.bytes[index]
                     } else {
                         &self.added[start - immutable_len..end - immutable_len]
                     }
                 }
             }
             #[derive(Debug, PartialEq, Eq)]
             pub struct RevisionDataParams {
                 pub flags: u16,
                 pub data_offset: u64,
                 pub data_compressed_length: i32,
                 pub data_uncompressed_length: i32,
                 pub data_delta_base: i32,
                 pub link_rev: i32,
                 pub parent_rev_1: i32,
                 pub parent_rev_2: i32,
                 pub node_id: [u8; NODE_BYTES_LENGTH],
                 pub _sidedata_offset: u64,
                 pub _sidedata_compressed_length: i32,
                 pub data_compression_mode: u8,
                 pub _sidedata_compression_mode: u8,
                 pub _rank: i32,
             }
             impl Default for RevisionDataParams {
                 fn default() -> Self {
                     Self {
                         flags: 0,
                         data_offset: 0,
                         data_compressed_length: 0,
                         data_uncompressed_length: 0,
                         data_delta_base: -1,
                         link_rev: -1,
                         parent_rev_1: -1,
                         parent_rev_2: -1,
                         node_id: [0; NODE_BYTES_LENGTH],
                         _sidedata_offset: 0,
                         _sidedata_compressed_length: 0,
                         data_compression_mode: COMPRESSION_MODE_INLINE,
                         _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
                         _rank: -1,
                     }
                 }
             }
             #[derive(BytesCast)]
             #[repr(C)]
             pub struct RevisionDataV1 {
                 data_offset_or_flags: unaligned::U64Be,
                 data_compressed_length: unaligned::I32Be,
                 data_uncompressed_length: unaligned::I32Be,
                 data_delta_base: unaligned::I32Be,
                 link_rev: unaligned::I32Be,
                 parent_rev_1: unaligned::I32Be,
                 parent_rev_2: unaligned::I32Be,
                 node_id: [u8; STORED_NODE_ID_BYTES],
             }
             fn _static_assert_size_of_revision_data_v1() {
                 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
             }
             impl RevisionDataParams {
                 pub fn validate(&self) -> Result<(), RevlogError> {
                     if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
                         return Err(RevlogError::corrupted(format!(
                             "unknown revlog index flags: {}",
                             self.flags
                         )));
                     }
                     if self.data_compression_mode != COMPRESSION_MODE_INLINE {
                         return Err(RevlogError::corrupted(format!(
                             "invalid data compression mode: {}",
                             self.data_compression_mode
                         )));
                     }
                     // FIXME isn't this only for v2 or changelog v2?
                     if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
                         return Err(RevlogError::corrupted(format!(
                             "invalid sidedata compression mode: {}",
                             self._sidedata_compression_mode
                         )));
                     }
                     Ok(())
                 }
                 pub fn into_v1(self) -> RevisionDataV1 {
                     let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
                     let mut node_id = [0; STORED_NODE_ID_BYTES];
                     node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
                     RevisionDataV1 {
                         data_offset_or_flags: data_offset_or_flags.into(),
                         data_compressed_length: self.data_compressed_length.into(),
                         data_uncompressed_length: self.data_uncompressed_length.into(),
                         data_delta_base: self.data_delta_base.into(),
                         link_rev: self.link_rev.into(),
                         parent_rev_1: self.parent_rev_1.into(),
                         parent_rev_2: self.parent_rev_2.into(),
                         node_id,
                     }
                 }
             }
             /// A Revlog index
             pub struct Index {
                 bytes: IndexData,
                 /// Offsets of starts of index blocks.
                 /// Only needed when the index is interleaved with data.
                 offsets: RwLock<Option<Vec<usize>>>,
                 uses_generaldelta: bool,
                 is_inline: bool,
                 /// Cache of (head_revisions, filtered_revisions)
                 ///
                 /// The head revisions in this index, kept in sync. Should
                 /// be accessed via the [`Self::head_revs`] method.
                 /// The last filtered revisions in this index, used to make sure
                 /// we haven't changed filters when returning the cached `head_revs`.
                 head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>,
             }
             impl Debug for Index {
                 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                     f.debug_struct("Index")
                         .field("offsets", &self.offsets)
                         .field("uses_generaldelta", &self.uses_generaldelta)
                         .finish()
                 }
             }
             impl Graph for Index {
                 #[inline(always)]
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     let err = || GraphError::ParentOutOfRange(rev);
                     match self.get_entry(rev) {
                         Some(entry) => {
                             // The C implementation checks that the parents are valid
                             // before returning
                             Ok([
                                 self.check_revision(entry.p1()).ok_or_else(err)?,
                                 self.check_revision(entry.p2()).ok_or_else(err)?,
                             ])
                         }
                         None => Ok([NULL_REVISION, NULL_REVISION]),
                     }
                 }
             }
             /// A cache suitable for find_snapshots
             ///
             /// Logically equivalent to a mapping whose keys are [`BaseRevision`] and
             /// values sets of [`BaseRevision`]
             ///
             /// TODO the dubious part is insisting that errors must be RevlogError
             /// we would probably need to sprinkle some magic here, such as an associated
             /// type that would be Into<RevlogError> but even that would not be
             /// satisfactory, as errors potentially have nothing to do with the revlog.
             pub trait SnapshotsCache {
                 fn insert_for(
                     &mut self,
                     rev: BaseRevision,
                     value: BaseRevision,
                 ) -> Result<(), RevlogError>;
             }
             impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> {
                 fn insert_for(
                     &mut self,
                     rev: BaseRevision,
                     value: BaseRevision,
                 ) -> Result<(), RevlogError> {
                     let all_values = self.entry(rev).or_default();
                     all_values.insert(value);
                     Ok(())
                 }
             }
             impl Index {
                 /// Create an index from bytes.
                 /// Calculate the start of each entry when is_inline is true.
                 pub fn new(
                     bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>,
                     default_header: IndexHeader,
                 ) -> Result<Self, HgError> {
                     let header =
                         IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
                     if header.format_version() != IndexHeader::REVLOGV1 {
                         // A proper new version should have had a repo/store
                         // requirement.
                         return Err(HgError::corrupted("unsupported revlog version"));
                     }
                     // This is only correct because we know version is REVLOGV1.
                     // In v2 we always use generaldelta, while in v0 we never use
                     // generaldelta. Similar for [is_inline] (it's only used in v1).
                     let uses_generaldelta = header.format_flags().uses_generaldelta();
                     if header.format_flags().is_inline() {
                         let mut offset: usize = 0;
                         let mut offsets = Vec::new();
                         while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                             offsets.push(offset);
                             let end = offset + INDEX_ENTRY_SIZE;
                             let entry = IndexEntry {
                                 bytes: &bytes[offset..end],
                             };
                             offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
                         }
                         if offset == bytes.len() {
                             Ok(Self {
                                 bytes: IndexData::new(bytes),
                                 offsets: RwLock::new(Some(offsets)),
                                 uses_generaldelta,
                                 is_inline: true,
                                 head_revs: RwLock::new((vec![], HashSet::new())),
                             })
                         } else {
                             Err(HgError::corrupted("unexpected inline revlog length"))
                         }
                     } else {
                         Ok(Self {
                             bytes: IndexData::new(bytes),
                             offsets: RwLock::new(None),
                             uses_generaldelta,
                             is_inline: false,
                             head_revs: RwLock::new((vec![], HashSet::new())),
                         })
                     }
                 }
                 pub fn uses_generaldelta(&self) -> bool {
                     self.uses_generaldelta
                 }
                 /// Value of the inline flag.
                 pub fn is_inline(&self) -> bool {
                     self.is_inline
                 }
                 /// Return a slice of bytes if `revlog` is inline. Panic if not.
                 pub fn data(&self, start: usize, end: usize) -> &[u8] {
                     if !self.is_inline() {
                         panic!("tried to access data in the index of a revlog that is not inline");
                     }
                     &self.bytes[start..end]
                 }
                 /// Return number of entries of the revlog index.
                 pub fn len(&self) -> usize {
                     if self.is_inline() {
                         (*self.get_offsets())
                             .as_ref()
                             .expect("inline should have offsets")
                             .len()
                     } else {
                         self.bytes.len() / INDEX_ENTRY_SIZE
                     }
                 }
                 pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> {
                     assert!(self.is_inline());
                     {
                         // Wrap in a block to drop the read guard
                         // TODO perf?
                         let mut offsets = self.offsets.write().unwrap();
                         if offsets.is_none() {
                             offsets.replace(inline_scan(&self.bytes.bytes).1);
                         }
                     }
                     self.offsets.read().unwrap()
                 }
                 pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> {
                     assert!(self.is_inline());
                     let mut offsets = self.offsets.write().unwrap();
                     if offsets.is_none() {
                         offsets.replace(inline_scan(&self.bytes.bytes).1);
                     }
                     offsets
                 }
                 /// Returns `true` if the `Index` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return the index entry corresponding to the given revision or `None`
                 /// for [`NULL_REVISION`]
                 ///
                 /// The specified revision being of the checked type, it always exists
                 /// if it was validated by this index.
                 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
                     if rev == NULL_REVISION {
                         return None;
                     }
                     if rev.0 == 0 {
                         Some(IndexEntry {
                             bytes: &self.bytes.first_entry[..],
                         })
                     } else {
                         Some(if self.is_inline() {
                             self.get_entry_inline(rev)
                         } else {
                             self.get_entry_separated(rev)
                         })
                     }
                 }
                 /// Return the binary content of the index entry for the given revision
                 ///
                 /// See [get_entry()](`Self::get_entry()`) for cases when `None` is
                 /// returned.
                 pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> {
                     self.get_entry(rev).map(|e| {
                         let bytes = e.as_bytes();
                         if rev.0 == 0 {
                             &bytes[4..]
                         } else {
                             bytes
                         }
                     })
                 }
                 pub fn entry_as_params(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Option<RevisionDataParams> {
                     let rev = self.check_revision(rev)?;
                     self.get_entry(rev).map(|e| RevisionDataParams {
                         flags: e.flags(),
                         data_offset: if rev.0 == 0 && !self.bytes.is_new() {
                             e.flags() as u64
                         } else {
                             e.raw_offset()
                         },
                         data_compressed_length: e
                             .compressed_len()
                             .try_into()
                             .unwrap_or_else(|_| {
                                 // Python's `unionrepo` sets the compressed length to be
                                 // `-1` (or `u32::MAX` if transmuted to `u32`) because it
                                 // cannot know the correct compressed length of a given
                                 // revision. I'm not sure if this is true, but having this
                                 // edge case won't hurt other use cases, let's handle it.
                                 assert_eq!(e.compressed_len(), u32::MAX);
                                 NULL_REVISION.0
                             }),
                         data_uncompressed_length: e.uncompressed_len(),
                         data_delta_base: e.base_revision_or_base_of_delta_chain().0,
                         link_rev: e.link_revision().0,
                         parent_rev_1: e.p1().0,
                         parent_rev_2: e.p2().0,
                         node_id: e.hash().as_bytes().try_into().unwrap(),
                         ..Default::default()
                     })
                 }
                 fn get_entry_inline(&self, rev: Revision) -> IndexEntry {
                     let offsets = &self.get_offsets();
                     let offsets = offsets.as_ref().expect("inline should have offsets");
                     let start = offsets[rev.0 as usize];
                     let end = start + INDEX_ENTRY_SIZE;
                     let bytes = &self.bytes[start..end];
                     IndexEntry { bytes }
                 }
                 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
                     let start = rev.0 as usize * INDEX_ENTRY_SIZE;
                     let end = start + INDEX_ENTRY_SIZE;
                     let bytes = &self.bytes[start..end];
                     IndexEntry { bytes }
                 }
                 fn null_entry(&self) -> IndexEntry {
                     IndexEntry {
                         bytes: &[0; INDEX_ENTRY_SIZE],
                     }
                 }
                 /// Return the head revisions of this index
                 pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> {
                     self.head_revs_filtered(&HashSet::new(), false)
                         .map(|h| h.unwrap())
                 }
                 /// Python-specific shortcut to save on PyList creation
                 pub fn head_revs_shortcut(
                     &self,
                 ) -> Result<Option<Vec<Revision>>, GraphError> {
                     self.head_revs_filtered(&HashSet::new(), true)
                 }
                 /// Return the heads removed and added by advancing from `begin` to `end`.
                 /// In revset language, we compute:
                 /// - `heads(:begin)-heads(:end)`
                 /// - `heads(:end)-heads(:begin)`
                 pub fn head_revs_diff(
                     &self,
                     begin: Revision,
                     end: Revision,
                 ) -> Result<(Vec<Revision>, Vec<Revision>), GraphError> {
                     let mut heads_added = vec![];
                     let mut heads_removed = vec![];
                     let mut acc = HashSet::new();
                     let Revision(begin) = begin;
                     let Revision(end) = end;
                     let mut i = end;
                     while i > begin {
                         // acc invariant:
                         // `j` is in the set iff `j <= i` and it has children
                         // among `i+1..end` (inclusive)
                         if !acc.remove(&i) {
                             heads_added.push(Revision(i));
                         }
                         for Revision(parent) in self.parents(Revision(i))? {
                             acc.insert(parent);
                         }
                         i -= 1;
                     }
                     // At this point `acc` contains old revisions that gained new children.
                     // We need to check if they had any children before. If not, those
                     // revisions are the removed heads.
                     while !acc.is_empty() {
                         // acc invariant:
                         // `j` is in the set iff `j <= i` and it has children
                         // among `begin+1..end`, but not among `i+1..begin` (inclusive)
                         assert!(i >= -1); // yes, `-1` can also be a head if the repo is empty
                         if acc.remove(&i) {
                             heads_removed.push(Revision(i));
                         }
                         for Revision(parent) in self.parents(Revision(i))? {
                             acc.remove(&parent);
                         }
                         i -= 1;
                     }
                     Ok((heads_removed, heads_added))
                 }
                 /// Return the head revisions of this index
                 pub fn head_revs_filtered(
                     &self,
                     filtered_revs: &HashSet<Revision>,
                     py_shortcut: bool,
                 ) -> Result<Option<Vec<Revision>>, GraphError> {
                     {
                         let guard = self
                             .head_revs
                             .read()
                             .expect("RwLock on Index.head_revs should not be poisoned");
                         let self_head_revs = &guard.0;
                         let self_filtered_revs = &guard.1;
                         if !self_head_revs.is_empty()
                             && filtered_revs == self_filtered_revs
                         {
                             if py_shortcut {
                                 // Don't copy the revs since we've already cached them
                                 // on the Python side.
                                 return Ok(None);
                             } else {
                                 return Ok(Some(self_head_revs.to_owned()));
                             }
                         }
                     }
                     let as_vec = if self.is_empty() {
                         vec![NULL_REVISION]
                     } else {
                         let mut not_heads = bitvec![0; self.len()];
                         dagops::retain_heads_fast(
                             self,
                             not_heads.as_mut_bitslice(),
                             filtered_revs,
                         )?;
                         not_heads
                             .into_iter()
                             .enumerate()
                             .filter_map(|(idx, is_not_head)| {
                                 if is_not_head {
                                     None
                                 } else {
                                     Some(Revision(idx as BaseRevision))
                                 }
                             })
                             .collect()
                     };
                     *self
                         .head_revs
                         .write()
                         .expect("RwLock on Index.head_revs should not be poisoned") =
                         (as_vec.to_owned(), filtered_revs.to_owned());
                     Ok(Some(as_vec))
                 }
                 /// Obtain the delta chain for a revision.
                 ///
                 /// `stop_rev` specifies a revision to stop at. If not specified, we
                 /// stop at the base of the chain.
                 ///
                 /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of
                 /// revs in ascending order and `stopped` is a bool indicating whether
                 /// `stoprev` was hit.
                 pub fn delta_chain(
                     &self,
                     rev: Revision,
                     stop_rev: Option<Revision>,
                     using_general_delta: Option<bool>,
                 ) -> Result<(Vec<Revision>, bool), HgError> {
                     let mut current_rev = rev;
                     let mut entry = self.get_entry(rev).unwrap();
                     let mut chain = vec![];
                     let using_general_delta =
                         using_general_delta.unwrap_or_else(|| self.uses_generaldelta());
                     while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0
                         && stop_rev.map(|r| r != current_rev).unwrap_or(true)
                     {
                         chain.push(current_rev);
                         let new_rev = if using_general_delta {
                             entry.base_revision_or_base_of_delta_chain()
                         } else {
                             UncheckedRevision(current_rev.0 - 1)
                         };
                         current_rev = self.check_revision(new_rev).ok_or_else(|| {
                             HgError::corrupted(format!("Revision {new_rev} out of range"))
                         })?;
                         if current_rev.0 == NULL_REVISION.0 {
                             break;
                         }
                         entry = self.get_entry(current_rev).unwrap()
                     }
                     let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
                         true
                     } else {
                         chain.push(current_rev);
                         false
                     };
                     chain.reverse();
                     Ok((chain, stopped))
                 }
                 pub fn find_snapshots(
                     &self,
                     start_rev: UncheckedRevision,
                     end_rev: UncheckedRevision,
                     cache: &mut impl SnapshotsCache,
                 ) -> Result<(), RevlogError> {
                     let mut start_rev = start_rev.0;
                     let mut end_rev = end_rev.0;
                     end_rev += 1;
                     let len = self.len().try_into().unwrap();
                     if end_rev > len {
                         end_rev = len;
                     }
                     if start_rev < 0 {
                         start_rev = 0;
                     }
                     for rev in start_rev..end_rev {
                         if !self.is_snapshot_unchecked(Revision(rev))? {
                             continue;
                         }
                         let mut base = self
                             .get_entry(Revision(rev))
                             .unwrap()
                             .base_revision_or_base_of_delta_chain();
                         if base.0 == rev {
                             base = NULL_REVISION.into();
                         }
                         cache.insert_for(base.0, rev)?;
                     }
                     Ok(())
                 }
                 fn clear_head_revs(&self) {
                     self.head_revs
                         .write()
                         .expect("RwLock on Index.head_revs should not be poisoined")
                         .0
                         .clear()
                 }
                 /// TODO move this to the trait probably, along with other things
                 pub fn append(
                     &mut self,
                     revision_data: RevisionDataParams,
                 ) -> Result<(), RevlogError> {
                     revision_data.validate()?;
                     let entry_v1 = revision_data.into_v1();
                     let entry_bytes = entry_v1.as_bytes();
                     if self.bytes.len() == 0 {
                         self.bytes.first_entry[INDEX_HEADER_SIZE..].copy_from_slice(
                             &entry_bytes[INDEX_HEADER_SIZE..INDEX_ENTRY_SIZE],
                         )
                     }
                     if self.is_inline() {
                         let new_offset = self.bytes.len();
                         if let Some(offsets) = &mut *self.get_offsets_mut() {
                             offsets.push(new_offset)
                         }
                     }
                     self.bytes.added.extend(entry_bytes);
                     self.clear_head_revs();
                     Ok(())
                 }
                 pub fn pack_header(&self, header: i32) -> [u8; 4] {
                     header.to_be_bytes()
                 }
                 pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> {
                     let offsets = if self.is_inline() {
                         self.get_offsets().clone()
                     } else {
                         None
                     };
                     self.bytes.remove(rev, offsets.as_deref())?;
                     if self.is_inline() {
                         if let Some(offsets) = &mut *self.get_offsets_mut() {
                             offsets.truncate(rev.0 as usize)
                         }
                     }
                     self.clear_head_revs();
                     Ok(())
                 }
                 pub fn clear_caches(&self) {
                     // We need to get the 'inline' value from Python at init and use this
                     // instead of offsets to determine whether we're inline since we might
                     // clear caches. This implies re-populating the offsets on-demand.
                     *self
                         .offsets
                         .write()
                         .expect("RwLock on Index.offsets should not be poisoed") = None;
                     self.clear_head_revs();
                 }
                 /// Unchecked version of `is_snapshot`.
                 /// Assumes the caller checked that `rev` is within a valid revision range.
                 pub fn is_snapshot_unchecked(
                     &self,
                     mut rev: Revision,
                 ) -> Result<bool, RevlogError> {
                     while rev.0 >= 0 {
                         let entry = self.get_entry(rev).unwrap();
                         let mut base = entry.base_revision_or_base_of_delta_chain().0;
                         if base == rev.0 {
                             base = NULL_REVISION.0;
                         }
                         if base == NULL_REVISION.0 {
                             return Ok(true);
                         }
                         let [mut p1, mut p2] = self
                             .parents(rev)
                             .map_err(|_| RevlogError::InvalidRevision)?;
                         while let Some(p1_entry) = self.get_entry(p1) {
                             if p1_entry.compressed_len() != 0 || p1.0 == 0 {
                                 break;
                             }
                             let parent_base =
                                 p1_entry.base_revision_or_base_of_delta_chain();
                             if parent_base.0 == p1.0 {
                                 break;
                             }
                             p1 = self
                                 .check_revision(parent_base)
                                 .ok_or(RevlogError::InvalidRevision)?;
                         }
                         while let Some(p2_entry) = self.get_entry(p2) {
                             if p2_entry.compressed_len() != 0 || p2.0 == 0 {
                                 break;
                             }
                             let parent_base =
                                 p2_entry.base_revision_or_base_of_delta_chain();
                             if parent_base.0 == p2.0 {
                                 break;
                             }
                             p2 = self
                                 .check_revision(parent_base)
                                 .ok_or(RevlogError::InvalidRevision)?;
                         }
                         if base == p1.0 || base == p2.0 {
                             return Ok(false);
                         }
                         rev = self
                             .check_revision(base.into())
                             .ok_or(RevlogError::InvalidRevision)?;
                     }
                     Ok(rev == NULL_REVISION)
                 }
                 /// Return whether the given revision is a snapshot. Returns an error if
                 /// `rev` is not within a valid revision range.
                 pub fn is_snapshot(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<bool, RevlogError> {
                     let rev = self
                         .check_revision(rev)
                         .ok_or_else(|| RevlogError::corrupted("test"))?;
                     self.is_snapshot_unchecked(rev)
                 }
                 /// Slice revs to reduce the amount of unrelated data to be read from disk.
                 ///
                 /// The index is sliced into groups that should be read in one time.
                 ///
                 /// The initial chunk is sliced until the overall density
                 /// (payload/chunks-span ratio) is above `target_density`.
                 /// No gap smaller than `min_gap_size` is skipped.
                 pub fn slice_chunk_to_density(
                     &self,
                     revs: &[Revision],
                     target_density: f64,
                     min_gap_size: usize,
                 ) -> Vec<Vec<Revision>> {
                     if revs.is_empty() {
                         return vec![];
                     }
                     if revs.len() == 1 {
                         return vec![revs.to_owned()];
                     }
                     let delta_chain_span = self.segment_span(revs);
                     if delta_chain_span < min_gap_size {
                         return vec![revs.to_owned()];
                     }
                     let entries: Vec<_> = revs
                         .iter()
                         .map(|r| {
                             (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
                         })
                         .collect();
                     let mut read_data = delta_chain_span;
                     let chain_payload: u32 =
                         entries.iter().map(|(_r, e)| e.compressed_len()).sum();
                     let mut density = if delta_chain_span > 0 {
                         chain_payload as f64 / delta_chain_span as f64
                     } else {
 .0
                     };
                     if density >= target_density {
                         return vec![revs.to_owned()];
                     }
                     // Store the gaps in a heap to have them sorted by decreasing size
                     let mut gaps = Vec::new();
                     let mut previous_end = None;
                     for (i, (_rev, entry)) in entries.iter().enumerate() {
                         let start = entry.c_start() as usize;
                         let length = entry.compressed_len();
                         // Skip empty revisions to form larger holes
                         if length == 0 {
                             continue;
                         }
                         if let Some(end) = previous_end {
                             let gap_size = start - end;
                             // Only consider holes that are large enough
                             if gap_size > min_gap_size {
                                 gaps.push((gap_size, i));
                             }
                         }
                         previous_end = Some(start + length as usize);
                     }
                     if gaps.is_empty() {
                         return vec![revs.to_owned()];
                     }
                     // sort the gaps to pop them from largest to small
                     gaps.sort_unstable();
                     // Collect the indices of the largest holes until
                     // the density is acceptable
                     let mut selected = vec![];
                     while let Some((gap_size, gap_id)) = gaps.pop() {
                         if density >= target_density {
                             break;
                         }
                         selected.push(gap_id);
                         // The gap sizes are stored as negatives to be sorted decreasingly
                         // by the heap
                         read_data -= gap_size;
                         density = if read_data > 0 {
                             chain_payload as f64 / read_data as f64
                         } else {
 .0
                         };
                         if density >= target_density {
                             break;
                         }
                     }
                     selected.sort_unstable();
                     selected.push(revs.len());
                     // Cut the revs at collected indices
                     let mut previous_idx = 0;
                     let mut chunks = vec![];
                     for idx in selected {
                         let chunk = self.trim_chunk(&entries, previous_idx, idx);
                         if !chunk.is_empty() {
                             chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
                         }
                         previous_idx = idx;
                     }
                     let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
                     if !chunk.is_empty() {
                         chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
                     }
                     chunks
                 }
                 /// Get the byte span of a segment of sorted revisions.
                 ///
                 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
                 /// the `revs` segment.
                 ///
                 /// panics:
                 ///  - if `revs` is empty or only made of `NULL_REVISION`
                 ///  - if cannot retrieve entry for the last or first not null element of
                 ///    `revs`.
                 fn segment_span(&self, revs: &[Revision]) -> usize {
                     if revs.is_empty() {
                         return 0;
                     }
                     let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
                     let end = last_entry.c_start() + last_entry.compressed_len() as u64;
                     let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
                     let start = if first_rev.0 == 0 {
                     } else {
                         self.get_entry(*first_rev).unwrap().c_start()
                     };
                     (end - start) as usize
                 }
                 /// Returns `&revs[startidx..endidx]` without empty trailing revs
                 fn trim_chunk<'a>(
                     &'a self,
                     revs: &'a [(Revision, IndexEntry)],
                     start: usize,
                     mut end: usize,
                 ) -> &'a [(Revision, IndexEntry)] {
                     // Trim empty revs at the end, except the very first rev of a chain
                     let last_rev = revs[end - 1].0;
                     if last_rev.0 < self.len() as BaseRevision {
                         while end > 1
                             && end > start
                             && revs[end - 1].1.compressed_len() == 0
                         {
                             end -= 1
                         }
                     }
                     &revs[start..end]
                 }
                 /// Computes the set of revisions for each non-public phase from `roots`,
                 /// which are the last known roots for each non-public phase.
                 pub fn compute_phases_map_sets(
                     &self,
                     roots: HashMap<Phase, Vec<Revision>>,
                 ) -> Result<(usize, RootsPerPhase), GraphError> {
                     let mut phases = vec![Phase::Public; self.len()];
                     let mut min_phase_rev = NULL_REVISION;
                     for phase in Phase::non_public_phases() {
                         if let Some(phase_roots) = roots.get(phase) {
                             let min_rev =
                                 self.add_roots_get_min(phase_roots, &mut phases, *phase);
                             if min_rev != NULL_REVISION
                                 && (min_phase_rev == NULL_REVISION
                                     || min_rev < min_phase_rev)
                             {
                                 min_phase_rev = min_rev;
                             }
                         } else {
                             continue;
                         };
                     }
                     let mut phase_sets: RootsPerPhase = Default::default();
                     if min_phase_rev == NULL_REVISION {
                         min_phase_rev = Revision(self.len() as BaseRevision);
                     }
                     for rev in min_phase_rev.0..self.len() as BaseRevision {
                         let rev = Revision(rev);
                         let [p1, p2] = self.parents(rev)?;
                         if p1.0 >= 0 && phases[p1.0 as usize] > phases[rev.0 as usize] {
                             phases[rev.0 as usize] = phases[p1.0 as usize];
                         }
                         if p2.0 >= 0 && phases[p2.0 as usize] > phases[rev.0 as usize] {
                             phases[rev.0 as usize] = phases[p2.0 as usize];
                         }
                         let set = match phases[rev.0 as usize] {
                             Phase::Public => continue,
                             phase => &mut phase_sets[phase as usize - 1],
                         };
                         set.push(rev);
                     }
                     Ok((self.len(), phase_sets))
                 }
                 fn add_roots_get_min(
                     &self,
                     phase_roots: &[Revision],
                     phases: &mut [Phase],
                     phase: Phase,
                 ) -> Revision {
                     let mut min_rev = NULL_REVISION;
                     for root in phase_roots {
                         phases[root.0 as usize] = phase;
                         if min_rev == NULL_REVISION || min_rev > *root {
                             min_rev = *root;
                         }
                     }
                     min_rev
                 }
                 /// Return `(heads(::(<roots> and <roots>::<heads>)))`
                 /// If `include_path` is `true`, return `(<roots>::<heads>)`."""
                 ///
                 /// `min_root` and `roots` are unchecked since they are just used as
                 /// a bound or for comparison and don't need to represent a valid revision.
                 /// In practice, the only invalid revision passed is the working directory
                 /// revision ([`i32::MAX`]).
                 pub fn reachable_roots(
                     &self,
                     min_root: UncheckedRevision,
                     mut heads: Vec<Revision>,
                     roots: HashSet<UncheckedRevision>,
                     include_path: bool,
                 ) -> Result<HashSet<Revision>, GraphError> {
                     if roots.is_empty() {
                         return Ok(HashSet::new());
                     }
                     let mut reachable = HashSet::new();
                     let mut seen = HashMap::new();
                     while let Some(rev) = heads.pop() {
                         if roots.contains(&rev.into()) {
                             reachable.insert(rev);
                             if !include_path {
                                 continue;
                             }
                         }
                         let parents = self.parents(rev)?;
                         seen.insert(rev, parents);
                         for parent in parents {
                             if parent.0 >= min_root.0 && !seen.contains_key(&parent) {
                                 heads.push(parent);
                             }
                         }
                     }
                     if !include_path {
                         return Ok(reachable);
                     }
                     let mut revs: Vec<_> = seen.keys().collect();
                     revs.sort_unstable();
                     for rev in revs {
                         for parent in seen[rev] {
                             if reachable.contains(&parent) {
                                 reachable.insert(*rev);
                             }
                         }
                     }
                     Ok(reachable)
                 }
                 /// Given a (possibly overlapping) set of revs, return all the
                 /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
                 pub fn common_ancestor_heads(
                     &self,
                     revisions: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     // given that revisions is expected to be small, we find this shortcut
                     // potentially acceptable, especially given that `hg-cpython` could
                     // very much bypass this, constructing a vector of unique values from
                     // the onset.
                     let as_set: HashSet<Revision> = revisions.iter().copied().collect();
                     // Besides deduplicating, the C version also implements the shortcut
                     // for `NULL_REVISION`:
                     if as_set.contains(&NULL_REVISION) {
                         return Ok(vec![]);
                     }
                     let revisions: Vec<Revision> = as_set.into_iter().collect();
                     if revisions.len() < 8 {
                         self.find_gca_candidates::<u8>(&revisions)
                     } else if revisions.len() < 64 {
                         self.find_gca_candidates::<u64>(&revisions)
                     } else {
                         self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
                     }
                 }
                 pub fn ancestors(
                     &self,
                     revisions: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
                 }
                 /// Given a disjoint set of revs, return all candidates for the
                 /// greatest common ancestor. In revset notation, this is the set
                 /// `heads(::a and ::b and ...)`
                 fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
                     &self,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     if revs.is_empty() {
                         return Ok(vec![]);
                     }
                     let revcount = revs.len();
                     let mut candidates = vec![];
                     let max_rev = revs.iter().max().unwrap();
                     let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
                     for (idx, rev) in revs.iter().enumerate() {
                         seen[rev.0 as usize].add(idx);
                     }
                     let mut current_rev = *max_rev;
                     // Number of revisions whose inspection in the main loop
                     // will give a result or trigger inspection of other revisions
                     let mut interesting = revcount;
                     // The algorithm works on a vector of bit sets, indexed by revision
                     // numbers and iterated on reverse order.
                     // An entry in this vector is poisoned if and only if the corresponding
                     // revision is a common, yet not maximal ancestor.
                     // The principle of the algorithm is as follows:
                     // For a revision `r`, when entering the loop, `seen[r]` is either
                     // poisoned or the sub set of `revs` of which `r` is an ancestor.
                     // In this sub set is full, then `r` is a solution and its parents
                     // have to be poisoned.
                     //
                     // At each iteration, the bit sets of the parents are updated by
                     // union with `seen[r]`.
                     // As we walk the index from the end, we are sure we have encountered
                     // all children of `r` before `r`, hence we know that `seen[r]` is
                     // fully computed.
                     //
                     // On top of that there are several optimizations that make reading
                     // less obvious than the comment above:
                     // - The `interesting` counter allows to break early
                     // - The loop starts from `max(revs)`
                     // - Early return in case it is detected that one of the incoming revs
                     //   is a common ancestor of all of them.
                     while current_rev.0 >= 0 && interesting > 0 {
                         let current_seen = seen[current_rev.0 as usize].clone();
                         if current_seen.is_empty() {
                             current_rev = Revision(current_rev.0 - 1);
                             continue;
                         }
                         let mut poison = current_seen.is_poisoned();
                         if !poison {
                             interesting -= 1;
                             if current_seen.is_full_range(revcount) {
                                 candidates.push(current_rev);
                                 poison = true;
                                 // Being a common ancestor, if `current_rev` is among
                                 // the input revisions, it is *the* answer.
                                 for rev in revs {
                                     if *rev == current_rev {
                                         return Ok(candidates);
                                     }
                                 }
                             }
                         }
                         for parent in self.parents(current_rev)? {
                             if parent == NULL_REVISION {
                                 continue;
                             }
                             let parent_seen = &mut seen[parent.0 as usize];
                             if poison {
                                 // this block is logically equivalent to poisoning parent
                                 // and counting it as non interesting if it
                                 // has been seen before (hence counted then as interesting)
                                 if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
                                     interesting -= 1;
                                 }
                                 parent_seen.poison();
                             } else {
                                 if parent_seen.is_empty() {
                                     interesting += 1;
                                 }
                                 parent_seen.union(&current_seen);
                             }
                         }
                         current_rev = Revision(current_rev.0 - 1);
                     }
                     Ok(candidates)
                 }
                 /// Given a disjoint set of revs, return the subset with the longest path
                 /// to the root.
                 fn find_deepest_revs(
                     &self,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     // TODO replace this all with just comparing rank?
                     // Also, the original implementations in C/Python are cryptic, not
                     // even sure we actually need this?
                     if revs.len() <= 1 {
                         return Ok(revs.to_owned());
                     }
                     let max_rev = revs.iter().max().unwrap().0;
                     let mut interesting = HashMap::new();
                     let mut seen = vec![0; max_rev as usize + 1];
                     let mut depth = vec![0; max_rev as usize + 1];
                     let mut mapping = vec![];
                     let mut revs = revs.to_owned();
                     revs.sort_unstable();
                     for (idx, rev) in revs.iter().enumerate() {
                         depth[rev.0 as usize] = 1;
                         let shift = 1 << idx;
                         seen[rev.0 as usize] = shift;
                         interesting.insert(shift, 1);
                         mapping.push((shift, *rev));
                     }
                     let mut current_rev = Revision(max_rev);
                     while current_rev.0 >= 0 && interesting.len() > 1 {
                         let current_depth = depth[current_rev.0 as usize];
                         if current_depth == 0 {
                             current_rev = Revision(current_rev.0 - 1);
                             continue;
                         }
                         let current_seen = seen[current_rev.0 as usize];
                         for parent in self.parents(current_rev)? {
                             if parent == NULL_REVISION {
                                 continue;
                             }
                             let parent_seen = seen[parent.0 as usize];
                             let parent_depth = depth[parent.0 as usize];
                             if parent_depth <= current_depth {
                                 depth[parent.0 as usize] = current_depth + 1;
                                 if parent_seen != current_seen {
                                     *interesting.get_mut(&current_seen).unwrap() += 1;
                                     seen[parent.0 as usize] = current_seen;
                                     if parent_seen != 0 {
                                         let parent_interesting =
                                             interesting.get_mut(&parent_seen).unwrap();
                                         *parent_interesting -= 1;
                                         if *parent_interesting == 0 {
                                             interesting.remove(&parent_seen);
                                         }
                                     }
                                 }
                             } else if current_depth == parent_depth - 1 {
                                 let either_seen = parent_seen | current_seen;
                                 if either_seen == parent_seen {
                                     continue;
                                 }
                                 seen[parent.0 as usize] = either_seen;
                                 interesting
                                     .entry(either_seen)
                                     .and_modify(|v| *v += 1)
                                     .or_insert(1);
                                 *interesting.get_mut(&parent_seen).unwrap() -= 1;
                                 if interesting[&parent_seen] == 0 {
                                     interesting.remove(&parent_seen);
                                 }
                             }
                         }
                         *interesting.get_mut(&current_seen).unwrap() -= 1;
                         if interesting[&current_seen] == 0 {
                             interesting.remove(&current_seen);
                         }
                         current_rev = Revision(current_rev.0 - 1);
                     }
                     if interesting.len() != 1 {
                         return Ok(vec![]);
                     }
                     let mask = interesting.keys().next().unwrap();
                     Ok(mapping
                         .into_iter()
                         .filter_map(|(shift, rev)| {
                             if (mask & shift) != 0 {
                                 return Some(rev);
                             }
                             None
                         })
                         .collect())
                 }
             }
             /// The kind of functionality needed by find_gca_candidates
             ///
             /// This is a bit mask which can be declared to be "poisoned", which callers
             /// interpret to break out of some loops.
             ///
             /// The maximum capacity of the bit mask is up to the actual implementation
             trait PoisonableBitSet: Sized + PartialEq {
                 /// Return a vector of exactly n elements, initialized to be empty.
                 ///
                 /// Optimization can vastly depend on implementation. Those being `Copy`
                 /// and having constant capacity typically can have a very simple
                 /// implementation.
                 fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
                 /// The size of the bit mask in memory
+                #[allow(unused)]
                 fn size(&self) -> usize;
                 /// The number of elements that can be represented in the set.
                 ///
                 /// Another way to put it is that it is the highest integer `C` such that
                 /// the set is guaranteed to always be a subset of the integer range
                 /// `[0, C)`
+                #[allow(unused)]
                 fn capacity(&self) -> usize;
                 /// Declare `n` to belong to the set
                 fn add(&mut self, n: usize);
                 /// Declare `n` not to belong to the set
+                #[allow(unused)]
                 fn discard(&mut self, n: usize);
                 /// Replace this bit set by its union with other
                 fn union(&mut self, other: &Self);
                 /// Poison the bit set
                 ///
                 /// Interpretation up to the caller
                 fn poison(&mut self);
                 /// Is the bit set poisoned?
                 ///
                 /// Interpretation is up to the caller
                 fn is_poisoned(&self) -> bool;
                 /// Is the bit set empty?
                 fn is_empty(&self) -> bool;
                 /// return `true` if and only if the bit is the full range `[0, n)`
                 /// of integers
                 fn is_full_range(&self, n: usize) -> bool;
             }
             const U64_POISON: u64 = 1 << 63;
             const U8_POISON: u8 = 1 << 7;
             impl PoisonableBitSet for u64 {
                 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
                     vec![0u64; vec_len]
                 }
                 fn size(&self) -> usize {
                 }
                 fn capacity(&self) -> usize {
                 }
                 fn add(&mut self, n: usize) {
                     (*self) |= 1u64 << n;
                 }
                 fn discard(&mut self, n: usize) {
                     (*self) &= u64::MAX - (1u64 << n);
                 }
                 fn union(&mut self, other: &Self) {
                     if *self != *other {
                         (*self) |= *other;
                     }
                 }
                 fn is_full_range(&self, n: usize) -> bool {
                     *self + 1 == (1u64 << n)
                 }
                 fn is_empty(&self) -> bool {
                     *self == 0
                 }
                 fn poison(&mut self) {
                     *self = U64_POISON;
                 }
                 fn is_poisoned(&self) -> bool {
                     // equality comparison would be tempting but would not resist
                     // operations after poisoning (even if these should be bogus).
                     *self >= U64_POISON
                 }
             }
             impl PoisonableBitSet for u8 {
                 fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
                     vec![0; vec_len]
                 }
                 fn size(&self) -> usize {
                 }
                 fn capacity(&self) -> usize {
                 }
                 fn add(&mut self, n: usize) {
                     (*self) |= 1 << n;
                 }
                 fn discard(&mut self, n: usize) {
                     (*self) &= u8::MAX - (1 << n);
                 }
                 fn union(&mut self, other: &Self) {
                     if *self != *other {
                         (*self) |= *other;
                     }
                 }
                 fn is_full_range(&self, n: usize) -> bool {
                     *self + 1 == (1 << n)
                 }
                 fn is_empty(&self) -> bool {
                     *self == 0
                 }
                 fn poison(&mut self) {
                     *self = U8_POISON;
                 }
                 fn is_poisoned(&self) -> bool {
                     // equality comparison would be tempting but would not resist
                     // operations after poisoning (even if these should be bogus).
                     *self >= U8_POISON
                 }
             }
             /// A poisonable bit set whose capacity is not known at compile time but
             /// is constant after initial construction
             ///
             /// This can be way further optimized if performance assessments (speed
             /// and/or RAM) require it.
             /// As far as RAM is concerned, for large vectors of these, the main problem
             /// would be the repetition of set_size in each item. We would need a trait
             /// to abstract over the idea of a vector of such bit sets to do better.
             #[derive(Clone, PartialEq)]
             struct NonStaticPoisonableBitSet {
                 set_size: usize,
                 bit_set: Vec<u64>,
             }
             /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
             fn non_static_poisonable_inner_len(set_size: usize) -> usize {
 + (set_size + 1) / 64
             }
             impl NonStaticPoisonableBitSet {
                 /// The index of the sub-bit set for the given n, and the index inside
                 /// the latter
                 fn index(&self, n: usize) -> (usize, usize) {
                     (n / 64, n % 64)
                 }
             }
             /// Mock implementation to ensure that the trait makes sense
             impl PoisonableBitSet for NonStaticPoisonableBitSet {
                 fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
                     let tmpl = Self {
                         set_size,
                         bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
                     };
                     vec![tmpl; vec_len]
                 }
                 fn size(&self) -> usize {
 + self.bit_set.len() * 8
                 }
                 fn capacity(&self) -> usize {
                     self.set_size
                 }
                 fn add(&mut self, n: usize) {
                     let (sub_bs, bit_pos) = self.index(n);
                     self.bit_set[sub_bs] |= 1 << bit_pos
                 }
                 fn discard(&mut self, n: usize) {
                     let (sub_bs, bit_pos) = self.index(n);
                     self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
                 }
                 fn union(&mut self, other: &Self) {
                     assert!(
                         self.set_size == other.set_size,
                         "Binary operations on bit sets can only be done on same size"
                     );
                     for i in 0..self.bit_set.len() - 1 {
                         self.bit_set[i] |= other.bit_set[i]
                     }
                 }
                 fn is_full_range(&self, n: usize) -> bool {
                     let (sub_bs, bit_pos) = self.index(n);
                     self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
                         && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
                 }
                 fn is_empty(&self) -> bool {
                     self.bit_set.iter().all(|bs| *bs == 0u64)
                 }
                 fn poison(&mut self) {
                     let (sub_bs, bit_pos) = self.index(self.set_size);
                     self.bit_set[sub_bs] = 1 << bit_pos;
                 }
                 fn is_poisoned(&self) -> bool {
                     let (sub_bs, bit_pos) = self.index(self.set_size);
                     self.bit_set[sub_bs] >= 1 << bit_pos
                 }
             }
             /// Set of roots of all non-public phases
             pub type RootsPerPhase = [Vec<Revision>; Phase::non_public_phases().len()];
             #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
             pub enum Phase {
                 Public = 0,
                 Draft = 1,
                 Secret = 2,
                 Archived = 3,
                 Internal = 4,
             }
             impl TryFrom<usize> for Phase {
                 type Error = RevlogError;
                 fn try_from(value: usize) -> Result<Self, Self::Error> {
                     Ok(match value {
 => Self::Public,
 => Self::Draft,
 => Self::Secret,
 => Self::Archived,
 => Self::Internal,
                         v => {
                             return Err(RevlogError::corrupted(format!(
                                 "invalid phase value {}",
                                 v
                             )))
                         }
                     })
                 }
             }
             impl Phase {
                 pub const fn all_phases() -> &'static [Self] {
                     &[
                         Self::Public,
                         Self::Draft,
                         Self::Secret,
                         Self::Archived,
                         Self::Internal,
                     ]
                 }
                 pub const fn non_public_phases() -> &'static [Self] {
                     &[Self::Draft, Self::Secret, Self::Archived, Self::Internal]
                 }
             }
             fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
                 let mut offset: usize = 0;
                 let mut offsets = Vec::new();
                 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
                     offsets.push(offset);
                     let end = offset + INDEX_ENTRY_SIZE;
                     let entry = IndexEntry {
                         bytes: &bytes[offset..end],
                     };
                     offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
                 }
                 (offset, offsets)
             }
             impl super::RevlogIndex for Index {
                 fn len(&self) -> usize {
                     self.len()
                 }
                 fn node(&self, rev: Revision) -> Option<&Node> {
                     if rev == NULL_REVISION {
                         return Some(&NULL_NODE);
                     }
                     self.get_entry(rev).map(|entry| entry.hash())
                 }
             }
             #[derive(Debug)]
             pub struct IndexEntry<'a> {
                 bytes: &'a [u8],
             }
             impl<'a> IndexEntry<'a> {
                 /// Return the offset of the data.
                 pub fn offset(&self) -> usize {
                     let mut bytes = [0; 8];
                     bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
                     BigEndian::read_u64(&bytes[..]) as usize
                 }
                 pub fn raw_offset(&self) -> u64 {
                     BigEndian::read_u64(&self.bytes[0..8])
                 }
                 /// Same result (except potentially for rev 0) as C `index_get_start()`
                 fn c_start(&self) -> u64 {
                     self.raw_offset() >> 16
                 }
                 pub fn flags(&self) -> u16 {
                     BigEndian::read_u16(&self.bytes[6..=7])
                 }
                 /// Return the compressed length of the data.
                 pub fn compressed_len(&self) -> u32 {
                     BigEndian::read_u32(&self.bytes[8..=11])
                 }
                 /// Return the uncompressed length of the data.
                 pub fn uncompressed_len(&self) -> i32 {
                     BigEndian::read_i32(&self.bytes[12..=15])
                 }
                 /// Return the revision upon which the data has been derived.
                 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
                     // TODO Maybe return an Option when base_revision == rev?
                     //      Requires to add rev to IndexEntry
                     BigEndian::read_i32(&self.bytes[16..]).into()
                 }
                 pub fn link_revision(&self) -> UncheckedRevision {
                     BigEndian::read_i32(&self.bytes[20..]).into()
                 }
                 pub fn p1(&self) -> UncheckedRevision {
                     BigEndian::read_i32(&self.bytes[24..]).into()
                 }
                 pub fn p2(&self) -> UncheckedRevision {
                     BigEndian::read_i32(&self.bytes[28..]).into()
                 }
                 /// Return the hash of revision's full text.
                 ///
                 /// Currently, SHA-1 is used and only the first 20 bytes of this field
                 /// are used.
                 pub fn hash(&self) -> &'a Node {
                     (&self.bytes[32..52]).try_into().unwrap()
                 }
                 pub fn as_bytes(&self) -> &'a [u8] {
                     self.bytes
                 }
             }
             #[cfg(test)]
+            pub use tests::IndexEntryBuilder;
+            #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::node::NULL_NODE;
                 #[cfg(test)]
                 #[derive(Debug, Copy, Clone)]
                 pub struct IndexEntryBuilder {
                     is_first: bool,
                     is_inline: bool,
                     is_general_delta: bool,
                     version: u16,
                     offset: usize,
                     compressed_len: usize,
                     uncompressed_len: usize,
                     base_revision_or_base_of_delta_chain: Revision,
                     link_revision: Revision,
                     p1: Revision,
                     p2: Revision,
                     node: Node,
                 }
                 #[cfg(test)]
                 impl IndexEntryBuilder {
                     #[allow(clippy::new_without_default)]
                     pub fn new() -> Self {
                         Self {
                             is_first: false,
                             is_inline: false,
                             is_general_delta: true,
                             version: 1,
                             offset: 0,
                             compressed_len: 0,
                             uncompressed_len: 0,
                             base_revision_or_base_of_delta_chain: Revision(0),
                             link_revision: Revision(0),
                             p1: NULL_REVISION,
                             p2: NULL_REVISION,
                             node: NULL_NODE,
                         }
                     }
                     pub fn is_first(&mut self, value: bool) -> &mut Self {
                         self.is_first = value;
                         self
                     }
                     pub fn with_inline(&mut self, value: bool) -> &mut Self {
                         self.is_inline = value;
                         self
                     }
                     pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
                         self.is_general_delta = value;
                         self
                     }
                     pub fn with_version(&mut self, value: u16) -> &mut Self {
                         self.version = value;
                         self
                     }
                     pub fn with_offset(&mut self, value: usize) -> &mut Self {
                         self.offset = value;
                         self
                     }
                     pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
                         self.compressed_len = value;
                         self
                     }
                     pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
                         self.uncompressed_len = value;
                         self
                     }
                     pub fn with_base_revision_or_base_of_delta_chain(
                         &mut self,
                         value: Revision,
                     ) -> &mut Self {
                         self.base_revision_or_base_of_delta_chain = value;
                         self
                     }
                     pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
                         self.link_revision = value;
                         self
                     }
                     pub fn with_p1(&mut self, value: Revision) -> &mut Self {
                         self.p1 = value;
                         self
                     }
                     pub fn with_p2(&mut self, value: Revision) -> &mut Self {
                         self.p2 = value;
                         self
                     }
                     pub fn with_node(&mut self, value: Node) -> &mut Self {
                         self.node = value;
                         self
                     }
                     pub fn build(&self) -> Vec<u8> {
                         let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
                         if self.is_first {
                             bytes.extend(match (self.is_general_delta, self.is_inline) {
                                 (false, false) => [0u8, 0],
                                 (false, true) => [0u8, 1],
                                 (true, false) => [0u8, 2],
                                 (true, true) => [0u8, 3],
                             });
                             bytes.extend(self.version.to_be_bytes());
                             // Remaining offset bytes.
                             bytes.extend([0u8; 2]);
                         } else {
                             // Offset stored on 48 bits (6 bytes)
                             bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
                         }
                         bytes.extend([0u8; 2]); // Revision flags.
                         bytes.extend((self.compressed_len as u32).to_be_bytes());
                         bytes.extend((self.uncompressed_len as u32).to_be_bytes());
                         bytes.extend(
                             self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
                         );
                         bytes.extend(self.link_revision.0.to_be_bytes());
                         bytes.extend(self.p1.0.to_be_bytes());
                         bytes.extend(self.p2.0.to_be_bytes());
                         bytes.extend(self.node.as_bytes());
                         bytes.extend(vec![0u8; 12]);
                         bytes
                     }
                 }
                 pub fn is_inline(index_bytes: &[u8]) -> bool {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .unwrap()
                         .format_flags()
                         .is_inline()
                 }
                 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .unwrap()
                         .format_flags()
                         .uses_generaldelta()
                 }
                 pub fn get_version(index_bytes: &[u8]) -> u16 {
                     IndexHeader::parse(index_bytes)
                         .expect("too short")
                         .unwrap()
                         .format_version()
                 }
                 #[test]
                 fn flags_when_no_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(false)
                         .build();
                     assert!(!is_inline(&bytes));
                     assert!(!uses_generaldelta(&bytes));
                 }
                 #[test]
                 fn flags_when_inline_flag_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(false)
                         .with_inline(true)
                         .build();
                     assert!(is_inline(&bytes));
                     assert!(!uses_generaldelta(&bytes));
                 }
                 #[test]
                 fn flags_when_inline_and_generaldelta_flags_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_general_delta(true)
                         .with_inline(true)
                         .build();
                     assert!(is_inline(&bytes));
                     assert!(uses_generaldelta(&bytes));
                 }
                 #[test]
                 fn test_offset() {
                     let bytes = IndexEntryBuilder::new().with_offset(1).build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(entry.offset(), 1)
                 }
                 #[test]
                 fn test_compressed_len() {
                     let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(entry.compressed_len(), 1)
                 }
                 #[test]
                 fn test_uncompressed_len() {
                     let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(entry.uncompressed_len(), 1)
                 }
                 #[test]
                 fn test_base_revision_or_base_of_delta_chain() {
                     let bytes = IndexEntryBuilder::new()
                         .with_base_revision_or_base_of_delta_chain(Revision(1))
                         .build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
                 }
                 #[test]
                 fn link_revision_test() {
                     let bytes = IndexEntryBuilder::new()
                         .with_link_revision(Revision(123))
                         .build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(entry.link_revision(), 123.into());
                 }
                 #[test]
                 fn p1_test() {
                     let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(entry.p1(), 123.into());
                 }
                 #[test]
                 fn p2_test() {
                     let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(entry.p2(), 123.into());
                 }
                 #[test]
                 fn node_test() {
                     let node = Node::from_hex("0123456789012345678901234567890123456789")
                         .unwrap();
                     let bytes = IndexEntryBuilder::new().with_node(node).build();
                     let entry = IndexEntry { bytes: &bytes };
                     assert_eq!(*entry.hash(), node);
                 }
                 #[test]
                 fn version_test() {
                     let bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(2)
                         .build();
                     assert_eq!(get_version(&bytes), 2)
                 }
             }
-            #[cfg(test)]
-            pub use tests::IndexEntryBuilder;

rust/hg-core/src/revlog/node.rs

0 +4 -4

             // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Definitions and utilities for Revision nodes
             //!
             //! In Mercurial code base, it is customary to call "a node" the binary SHA
             //! of a revision.
             use crate::errors::HgError;
             use bytes_cast::BytesCast;
             use std::fmt;
             /// The length in bytes of a `Node`
             ///
             /// This constant is meant to ease refactors of this module, and
             /// are private so that calling code does not expect all nodes have
             /// the same size, should we support several formats concurrently in
             /// the future.
             pub const NODE_BYTES_LENGTH: usize = 20;
             /// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only
             /// use 20 out of those 32.
             pub const STORED_NODE_ID_BYTES: usize = 32;
             /// Id of the null node.
             ///
             /// Used to indicate the absence of node.
             pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
             /// The length in bytes of a `Node`
             ///
             /// see also `NODES_BYTES_LENGTH` about it being private.
             const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
             /// Default for UI presentation
             const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
             /// Private alias for readability and to ease future change
             type NodeData = [u8; NODE_BYTES_LENGTH];
             /// Binary revision SHA
             ///
             /// ## Future changes of hash size
             ///
             /// To accomodate future changes of hash size, Rust callers
             /// should use the conversion methods at the boundaries (FFI, actual
             /// computation of hashes and I/O) only, and only if required.
             ///
             /// All other callers outside of unit tests should just handle `Node` values
             /// and never make any assumption on the actual length, using [`nybbles_len`]
             /// if they need a loop boundary.
             ///
             /// All methods that create a `Node` either take a type that enforces
             /// the size or return an error at runtime.
             ///
             /// [`nybbles_len`]: #method.nybbles_len
             #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
             #[repr(transparent)]
             pub struct Node {
                 data: NodeData,
             }
             impl fmt::Debug for Node {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     let n = format!("{:x?}", self.data);
                     // We're using debug_tuple because it makes the output a little
                     // more compact without losing data.
                     f.debug_tuple("Node").field(&n).finish()
                 }
             }
             /// The node value for NULL_REVISION
             pub const NULL_NODE: Node = Node {
                 data: [0; NODE_BYTES_LENGTH],
             };
             /// Return an error if the slice has an unexpected length
             impl<'a> TryFrom<&'a [u8]> for &'a Node {
                 type Error = ();
                 #[inline]
                 fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
                     match Node::from_bytes(bytes) {
-                        Ok((node, rest)) if rest.is_empty() => Ok(node),
+                        Ok((node, [])) => Ok(node),
                         _ => Err(()),
                     }
                 }
             }
             /// Return an error if the slice has an unexpected length
             impl TryFrom<&'_ [u8]> for Node {
                 type Error = std::array::TryFromSliceError;
                 #[inline]
                 fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
                     let data = bytes.try_into()?;
                     Ok(Self { data })
                 }
             }
             impl From<&'_ NodeData> for Node {
                 #[inline]
                 fn from(data: &'_ NodeData) -> Self {
                     Self { data: *data }
                 }
             }
             impl fmt::LowerHex for Node {
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     for &byte in &self.data {
                         write!(f, "{:02x}", byte)?
                     }
                     Ok(())
                 }
             }
             #[derive(Debug)]
             pub struct FromHexError;
             /// Low level utility function, also for prefixes
             fn get_nybble(s: &[u8], i: usize) -> u8 {
                 if i % 2 == 0 {
                     s[i / 2] >> 4
                 } else {
                     s[i / 2] & 0x0f
                 }
             }
             impl Node {
                 /// Retrieve the `i`th half-byte of the binary data.
                 ///
                 /// This is also the `i`th hexadecimal digit in numeric form,
                 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                 pub fn get_nybble(&self, i: usize) -> u8 {
                     get_nybble(&self.data, i)
                 }
                 /// Length of the data, in nybbles
                 pub fn nybbles_len(&self) -> usize {
                     // public exposure as an instance method only, so that we can
                     // easily support several sizes of hashes if needed in the future.
                     NODE_NYBBLES_LENGTH
                 }
                 /// Convert from hexadecimal string representation
                 ///
                 /// Exact length is required.
                 ///
                 /// To be used in FFI and I/O only, in order to facilitate future
                 /// changes of hash format.
                 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
                     let prefix = NodePrefix::from_hex(hex)?;
                     if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
                         Ok(Self { data: prefix.data })
                     } else {
                         Err(FromHexError)
                     }
                 }
                 /// `from_hex`, but for input from an internal file of the repository such
                 /// as a changelog or manifest entry.
                 ///
                 /// An error is treated as repository corruption.
                 pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
                     Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
                         HgError::CorruptedRepository(format!(
                             "Expected a full hexadecimal node ID, found {}",
                             String::from_utf8_lossy(hex.as_ref())
                         ))
                     })
                 }
                 /// Provide access to binary data
                 ///
                 /// This is needed by FFI layers, for instance to return expected
                 /// binary values to Python.
                 pub fn as_bytes(&self) -> &[u8] {
                     &self.data
                 }
                 pub fn short(&self) -> NodePrefix {
                     NodePrefix {
                         nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
                         data: self.data,
                     }
                 }
                 pub fn pad_to_256_bits(&self) -> [u8; 32] {
                     let mut bits = [0; 32];
                     bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
                     bits
                 }
             }
             /// The beginning of a binary revision SHA.
             ///
             /// Since it can potentially come from an hexadecimal representation with
             /// odd length, it needs to carry around whether the last 4 bits are relevant
             /// or not.
             #[derive(Debug, PartialEq, Copy, Clone)]
             pub struct NodePrefix {
                 /// In `1..=NODE_NYBBLES_LENGTH`
                 nybbles_len: u8,
                 /// The first `4 * length_in_nybbles` bits are used (considering bits
                 /// within a bytes in big-endian: most significant first), the rest
                 /// are zero.
                 data: NodeData,
             }
             impl NodePrefix {
                 /// Convert from hexadecimal string representation
                 ///
                 /// Similarly to `hex::decode`, can be used with Unicode string types
                 /// (`String`, `&str`) as well as bytes.
                 ///
                 /// To be used in FFI and I/O only, in order to facilitate future
                 /// changes of hash format.
                 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
                     let hex = hex.as_ref();
                     let len = hex.len();
                     if len > NODE_NYBBLES_LENGTH || len == 0 {
                         return Err(FromHexError);
                     }
                     let mut data = [0; NODE_BYTES_LENGTH];
                     let mut nybbles_len = 0;
                     for &ascii_byte in hex {
                         let nybble = match char::from(ascii_byte).to_digit(16) {
                             Some(digit) => digit as u8,
                             None => return Err(FromHexError),
                         };
                         // Fill in the upper half of a byte first, then the lower half.
                         let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
                         data[nybbles_len as usize / 2] |= nybble << shift;
                         nybbles_len += 1;
                     }
                     Ok(Self { data, nybbles_len })
                 }
                 pub fn nybbles_len(&self) -> usize {
                     self.nybbles_len as _
                 }
                 pub fn is_prefix_of(&self, node: &Node) -> bool {
                     let full_bytes = self.nybbles_len() / 2;
                     if self.data[..full_bytes] != node.data[..full_bytes] {
                         return false;
                     }
                     if self.nybbles_len() % 2 == 0 {
                         return true;
                     }
                     let last = self.nybbles_len() - 1;
                     self.get_nybble(last) == node.get_nybble(last)
                 }
                 /// Retrieve the `i`th half-byte from the prefix.
                 ///
                 /// This is also the `i`th hexadecimal digit in numeric form,
                 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                 pub fn get_nybble(&self, i: usize) -> u8 {
                     assert!(i < self.nybbles_len());
                     get_nybble(&self.data, i)
                 }
                 fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
                     (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
                 }
                 /// Return the index first nybble that's different from `node`
                 ///
                 /// If the return value is `None` that means that `self` is
                 /// a prefix of `node`, but the current method is a bit slower
                 /// than `is_prefix_of`.
                 ///
                 /// Returned index is as in `get_nybble`, i.e., starting at 0.
                 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
                     self.iter_nybbles()
                         .zip(NodePrefix::from(*node).iter_nybbles())
                         .position(|(a, b)| a != b)
                 }
             }
             impl fmt::LowerHex for NodePrefix {
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     let full_bytes = self.nybbles_len() / 2;
                     for &byte in &self.data[..full_bytes] {
                         write!(f, "{:02x}", byte)?
                     }
                     if self.nybbles_len() % 2 == 1 {
                         let last = self.nybbles_len() - 1;
                         write!(f, "{:x}", self.get_nybble(last))?
                     }
                     Ok(())
                 }
             }
             /// A shortcut for full `Node` references
             impl From<&'_ Node> for NodePrefix {
                 fn from(node: &'_ Node) -> Self {
                     NodePrefix {
                         nybbles_len: node.nybbles_len() as _,
                         data: node.data,
                     }
                 }
             }
             /// A shortcut for full `Node` references
             impl From<Node> for NodePrefix {
                 fn from(node: Node) -> Self {
                     NodePrefix {
                         nybbles_len: node.nybbles_len() as _,
                         data: node.data,
                     }
                 }
             }
             impl PartialEq<Node> for NodePrefix {
                 fn eq(&self, other: &Node) -> bool {
                     self.data == other.data && self.nybbles_len() == other.nybbles_len()
                 }
             }
             #[cfg(test)]
+            pub use tests::hex_pad_right;
+            #[cfg(test)]
             mod tests {
                 use super::*;
                 const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
                 const SAMPLE_NODE: Node = Node {
                     data: [
 x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
 x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
                     ],
                 };
                 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
                 /// The padding is made with zeros.
                 pub fn hex_pad_right(hex: &str) -> String {
                     let mut res = hex.to_string();
                     while res.len() < NODE_NYBBLES_LENGTH {
                         res.push('0');
                     }
                     res
                 }
                 #[test]
                 fn test_node_from_hex() {
                     let not_hex = "012... oops";
                     let too_short = "0123";
                     let too_long = format!("{}0", SAMPLE_NODE_HEX);
                     assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
                     assert!(Node::from_hex(not_hex).is_err());
                     assert!(Node::from_hex(too_short).is_err());
                     assert!(Node::from_hex(too_long).is_err());
                 }
                 #[test]
                 fn test_node_encode_hex() {
                     assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
                 }
                 #[test]
                 fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
                     assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
                     assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
                     assert_eq!(
                         format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
                         SAMPLE_NODE_HEX
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_prefix_from_hex_errors() {
                     assert!(NodePrefix::from_hex("testgr").is_err());
                     let mut long = format!("{:x}", NULL_NODE);
                     long.push('c');
                     assert!(NodePrefix::from_hex(&long).is_err())
                 }
                 #[test]
                 fn test_is_prefix_of() -> Result<(), FromHexError> {
                     let mut node_data = [0; NODE_BYTES_LENGTH];
                     node_data[0] = 0x12;
                     node_data[1] = 0xca;
                     let node = Node::from(node_data);
                     assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
                     assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
                     assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
                     assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
                     Ok(())
                 }
                 #[test]
                 fn test_get_nybble() -> Result<(), FromHexError> {
                     let prefix = NodePrefix::from_hex("dead6789cafe")?;
                     assert_eq!(prefix.get_nybble(0), 13);
                     assert_eq!(prefix.get_nybble(7), 9);
                     Ok(())
                 }
                 #[test]
                 fn test_first_different_nybble_even_prefix() {
                     let prefix = NodePrefix::from_hex("12ca").unwrap();
                     let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                     assert_eq!(prefix.first_different_nybble(&node), Some(0));
                     node.data[0] = 0x13;
                     assert_eq!(prefix.first_different_nybble(&node), Some(1));
                     node.data[0] = 0x12;
                     assert_eq!(prefix.first_different_nybble(&node), Some(2));
                     node.data[1] = 0xca;
                     // now it is a prefix
                     assert_eq!(prefix.first_different_nybble(&node), None);
                 }
                 #[test]
                 fn test_first_different_nybble_odd_prefix() {
                     let prefix = NodePrefix::from_hex("12c").unwrap();
                     let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                     assert_eq!(prefix.first_different_nybble(&node), Some(0));
                     node.data[0] = 0x13;
                     assert_eq!(prefix.first_different_nybble(&node), Some(1));
                     node.data[0] = 0x12;
                     assert_eq!(prefix.first_different_nybble(&node), Some(2));
                     node.data[1] = 0xca;
                     // now it is a prefix
                     assert_eq!(prefix.first_different_nybble(&node), None);
                 }
             }
-            #[cfg(test)]
-            pub use tests::hex_pad_right;

rust/hg-core/tests/test_missing_ancestors.rs

0 +1 0

             use hg::testing::VecGraph;
             use hg::Revision;
             use hg::*;
             use rand::distributions::{Distribution, Uniform};
             use rand::{thread_rng, Rng, RngCore, SeedableRng};
             use rand_distr::LogNormal;
             use std::cmp::min;
             use std::collections::HashSet;
             use std::env;
             use std::fmt::Debug;
             fn build_random_graph(
                 nodes_opt: Option<usize>,
                 rootprob_opt: Option<f64>,
                 mergeprob_opt: Option<f64>,
                 prevprob_opt: Option<f64>,
             ) -> VecGraph {
                 let nodes = nodes_opt.unwrap_or(100);
                 let rootprob = rootprob_opt.unwrap_or(0.05);
                 let mergeprob = mergeprob_opt.unwrap_or(0.2);
                 let prevprob = prevprob_opt.unwrap_or(0.7);
                 let mut rng = thread_rng();
                 let mut vg: VecGraph = Vec::with_capacity(nodes);
                 for i in 0..nodes {
                     if i == 0 || rng.gen_bool(rootprob) {
                         vg.push([NULL_REVISION, NULL_REVISION])
                     } else if i == 1 {
                         vg.push([Revision(0), NULL_REVISION])
                     } else if rng.gen_bool(mergeprob) {
                         let p1 = {
                             if i == 2 || rng.gen_bool(prevprob) {
                                 Revision((i - 1) as BaseRevision)
                             } else {
                                 Revision(rng.gen_range(0..i - 1) as BaseRevision)
                             }
                         };
                         // p2 is a random revision lower than i and different from p1
                         let mut p2 = Revision(rng.gen_range(0..i - 1) as BaseRevision);
                         if p2 >= p1 {
                             p2.0 += 1;
                         }
                         vg.push([p1, p2]);
                     } else if rng.gen_bool(prevprob) {
                         vg.push([Revision((i - 1) as BaseRevision), NULL_REVISION])
                     } else {
                         vg.push([
                             Revision(rng.gen_range(0..i - 1) as BaseRevision),
                             NULL_REVISION,
                         ])
                     }
                 }
                 vg
             }
             /// Compute the ancestors set of all revisions of a VecGraph
             fn ancestors_sets(vg: &VecGraph) -> Vec<HashSet<Revision>> {
                 let mut ancs: Vec<HashSet<Revision>> = Vec::new();
                 (0..vg.len()).for_each(|i| {
                     let mut ancs_i = HashSet::new();
                     ancs_i.insert(Revision(i as BaseRevision));
                     for p in vg[i].iter().cloned() {
                         if p != NULL_REVISION {
                             ancs_i.extend(&ancs[p.0 as usize]);
                         }
                     }
                     ancs.push(ancs_i);
                 });
                 ancs
             }
+            #[allow(unused)]  // Useful when debugging
             #[derive(Clone, Debug)]
             enum MissingAncestorsAction {
                 InitialBases(HashSet<Revision>),
                 AddBases(HashSet<Revision>),
                 RemoveAncestorsFrom(HashSet<Revision>),
                 MissingAncestors(HashSet<Revision>),
             }
             /// An instrumented naive yet obviously correct implementation
             ///
             /// It also records all its actions for easy reproduction for replay
             /// of problematic cases
             struct NaiveMissingAncestors<'a> {
                 ancestors_sets: &'a Vec<HashSet<Revision>>,
                 graph: &'a VecGraph, // used for error reporting only
                 bases: HashSet<Revision>,
                 history: Vec<MissingAncestorsAction>,
                 // for error reporting, assuming we are in a random test
                 random_seed: String,
             }
             impl<'a> NaiveMissingAncestors<'a> {
                 fn new(
                     graph: &'a VecGraph,
                     ancestors_sets: &'a Vec<HashSet<Revision>>,
                     bases: &HashSet<Revision>,
                     random_seed: &str,
                 ) -> Self {
                     Self {
                         ancestors_sets,
                         bases: bases.clone(),
                         graph,
                         history: vec![MissingAncestorsAction::InitialBases(bases.clone())],
                         random_seed: random_seed.into(),
                     }
                 }
                 fn add_bases(&mut self, new_bases: HashSet<Revision>) {
                     self.bases.extend(&new_bases);
                     self.history
                         .push(MissingAncestorsAction::AddBases(new_bases))
                 }
                 fn remove_ancestors_from(&mut self, revs: &mut HashSet<Revision>) {
                     revs.remove(&NULL_REVISION);
                     self.history
                         .push(MissingAncestorsAction::RemoveAncestorsFrom(revs.clone()));
                     for base in self.bases.iter().cloned() {
                         if base != NULL_REVISION {
                             for rev in &self.ancestors_sets[base.0 as usize] {
                                 revs.remove(rev);
                             }
                         }
                     }
                 }
                 fn missing_ancestors(
                     &mut self,
                     revs: impl IntoIterator<Item = Revision>,
                 ) -> Vec<Revision> {
                     let revs_as_set: HashSet<Revision> = revs.into_iter().collect();
                     let mut missing: HashSet<Revision> = HashSet::new();
                     for rev in revs_as_set.iter().cloned() {
                         if rev != NULL_REVISION {
                             missing.extend(&self.ancestors_sets[rev.0 as usize])
                         }
                     }
                     self.history
                         .push(MissingAncestorsAction::MissingAncestors(revs_as_set));
                     for base in self.bases.iter().cloned() {
                         if base != NULL_REVISION {
                             for rev in &self.ancestors_sets[base.0 as usize] {
                                 missing.remove(rev);
                             }
                         }
                     }
                     let mut res: Vec<Revision> = missing.iter().cloned().collect();
                     res.sort_unstable();
                     res
                 }
                 fn assert_eq<T>(&self, left: T, right: T)
                 where
                     T: PartialEq + Debug,
                 {
                     if left == right {
                         return;
                     }
                     panic!(
                         "Equality assertion failed (left != right)
                             left={:?}
                             right={:?}
                             graph={:?}
                             current bases={:?}
                             history={:?}
                             random seed={}
                         ",
                         left,
                         right,
                         self.graph,
                         self.bases,
                         self.history,
                         self.random_seed,
                     );
                 }
             }
             /// Choose a set of random revisions
             ///
             /// The size of the set is taken from a LogNormal distribution
             /// with default mu=1.1 and default sigma=0.8. Quoting the Python
             /// test this is taken from:
             ///     the default mu and sigma give us a nice distribution of mostly
             ///     single-digit counts (including 0) with some higher ones
             /// The sample may include NULL_REVISION
             fn sample_revs<R: RngCore>(
                 rng: &mut R,
                 maxrev: Revision,
                 mu_opt: Option<f64>,
                 sigma_opt: Option<f64>,
             ) -> HashSet<Revision> {
                 let mu = mu_opt.unwrap_or(1.1);
                 let sigma = sigma_opt.unwrap_or(0.8);
                 let log_normal = LogNormal::new(mu, sigma).unwrap();
                 let nb = min(maxrev.0 as usize, log_normal.sample(rng).floor() as usize);
                 let dist = Uniform::from(NULL_REVISION.0..maxrev.0);
                 rng.sample_iter(&dist).take(nb).map(Revision).collect()
             }
             /// Produces the hexadecimal representation of a slice of bytes
             fn hex_bytes(bytes: &[u8]) -> String {
                 let mut s = String::with_capacity(bytes.len() * 2);
                 for b in bytes {
                     s.push_str(&format!("{:x}", b));
                 }
                 s
             }
             /// Fill a random seed from its hexadecimal representation.
             ///
             /// This signature is meant to be consistent with `RngCore::fill_bytes`
             fn seed_parse_in(hex: &str, seed: &mut [u8]) {
                 if hex.len() != 32 {
                     panic!("Seed {} is too short for 128 bits hex", hex);
                 }
                 for i in 0..8 {
                     seed[i] = u8::from_str_radix(&hex[2 * i..2 * (i + 1)], 16)
                         .unwrap_or_else(|_e| panic!("Seed {} is not 128 bits hex", hex));
                 }
             }
             /// Parse the parameters for `test_missing_ancestors()`
             ///
             /// Returns (graphs, instances, calls per instance)
             fn parse_test_missing_ancestors_params(var: &str) -> (usize, usize, usize) {
                 let err_msg = "TEST_MISSING_ANCESTORS format: GRAPHS,INSTANCES,CALLS";
                 let params: Vec<usize> = var
                     .split(',')
                     .map(|n| n.trim().parse().expect(err_msg))
                     .collect();
                 if params.len() != 3 {
                     panic!("{}", err_msg);
                 }
                 (params[0], params[1], params[2])
             }
             #[test]
             /// This test creates lots of random VecGraphs,
             /// and compare a bunch of MissingAncestors for them with
             /// NaiveMissingAncestors that rely on precomputed transitive closures of
             /// these VecGraphs (ancestors_sets).
             ///
             /// For each generater graph, several instances of `MissingAncestors` are
             /// created, whose methods are called and checked a given number of times.
             ///
             /// This test can be parametrized by two environment variables:
             ///
             /// - TEST_RANDOM_SEED: must be 128 bits in hexadecimal
             /// - TEST_MISSING_ANCESTORS: "GRAPHS,INSTANCES,CALLS". The default is
             ///   "100,10,10"
             ///
             /// This is slow: it runs on my workstation in about 5 seconds with the
             /// default parameters with a plain `cargo --test`.
             ///
             /// If you want to run it faster, especially if you're changing the
             /// parameters, use `cargo test --release`.
             /// For me, that gets it down to 0.15 seconds with the default parameters
             fn test_missing_ancestors_compare_naive() {
                 let (graphcount, testcount, inccount) =
                     match env::var("TEST_MISSING_ANCESTORS") {
                         Err(env::VarError::NotPresent) => (100, 10, 10),
                         Ok(val) => parse_test_missing_ancestors_params(&val),
                         Err(env::VarError::NotUnicode(_)) => {
                             panic!("TEST_MISSING_ANCESTORS is invalid");
                         }
                     };
                 let mut seed: [u8; 16] = [0; 16];
                 match env::var("TEST_RANDOM_SEED") {
                     Ok(val) => {
                         seed_parse_in(&val, &mut seed);
                     }
                     Err(env::VarError::NotPresent) => {
                         thread_rng().fill_bytes(&mut seed);
                     }
                     Err(env::VarError::NotUnicode(_)) => {
                         panic!("TEST_RANDOM_SEED must be 128 bits in hex");
                     }
                 }
                 let hex_seed = hex_bytes(&seed);
                 eprintln!("Random seed: {}", hex_seed);
                 let mut rng = rand_pcg::Pcg32::from_seed(seed);
                 eprint!("Checking MissingAncestors against brute force implementation ");
                 eprint!("for {} random graphs, ", graphcount);
                 eprintln!(
                     "with {} instances for each and {} calls per instance",
                     testcount, inccount,
                 );
                 for g in 0..graphcount {
                     if g != 0 && g % 100 == 0 {
                         eprintln!("Tested with {} graphs", g);
                     }
                     let graph = build_random_graph(None, None, None, None);
                     let graph_len = Revision(graph.len() as BaseRevision);
                     let ancestors_sets = ancestors_sets(&graph);
                     for _testno in 0..testcount {
                         let bases: HashSet<Revision> =
                             sample_revs(&mut rng, graph_len, None, None);
                         let mut inc = MissingAncestors::<VecGraph>::new(
                             graph.clone(),
                             bases.clone(),
                         );
                         let mut naive = NaiveMissingAncestors::new(
                             &graph,
                             &ancestors_sets,
                             &bases,
                             &hex_seed,
                         );
                         for _m in 0..inccount {
                             if rng.gen_bool(0.2) {
                                 let new_bases =
                                     sample_revs(&mut rng, graph_len, None, None);
                                 inc.add_bases(new_bases.iter().cloned());
                                 naive.add_bases(new_bases);
                             }
                             if rng.gen_bool(0.4) {
                                 // larger set so that there are more revs to remove from
                                 let mut hrevs =
                                     sample_revs(&mut rng, graph_len, Some(1.5), None);
                                 let mut rrevs = hrevs.clone();
                                 inc.remove_ancestors_from(&mut hrevs).unwrap();
                                 naive.remove_ancestors_from(&mut rrevs);
                                 naive.assert_eq(hrevs, rrevs);
                             } else {
                                 let revs = sample_revs(&mut rng, graph_len, None, None);
                                 let hm =
                                     inc.missing_ancestors(revs.iter().cloned()).unwrap();
                                 let rm = naive.missing_ancestors(revs.iter().cloned());
                                 naive.assert_eq(hm, rm);
                             }
                         }
                     }
                 }
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages