upstream/mercurial-mirror Commit - r45500:26114bd6

rust: do a clippy pass...

Raphaël Gomès -

r45500:26114bd6 default

parent child

rust/hg-core/src/ancestors.rs

0 +11 -11

             // ancestors.rs
             //
             // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Rust versions of generic DAG ancestors algorithms for Mercurial
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::dagops;
             use std::cmp::max;
             use std::collections::{BinaryHeap, HashSet};
             /// Iterator over the ancestors of a given list of revisions
             /// This is a generic type, defined and implemented for any Graph, so that
             /// it's easy to
             ///
             /// - unit test in pure Rust
             /// - bind to main Mercurial code, potentially in several ways and have these
             ///   bindings evolve over time
             pub struct AncestorsIterator<G: Graph> {
                 graph: G,
                 visit: BinaryHeap<Revision>,
                 seen: HashSet<Revision>,
                 stoprev: Revision,
             }
             /// Lazy ancestors set, backed by AncestorsIterator
             pub struct LazyAncestors<G: Graph + Clone> {
                 graph: G,
                 containsiter: AncestorsIterator<G>,
                 initrevs: Vec<Revision>,
                 stoprev: Revision,
                 inclusive: bool,
             }
             pub struct MissingAncestors<G: Graph> {
                 graph: G,
                 bases: HashSet<Revision>,
                 max_base: Revision,
             }
             impl<G: Graph> AncestorsIterator<G> {
                 /// Constructor.
                 ///
                 /// if `inclusive` is true, then the init revisions are emitted in
                 /// particular, otherwise iteration starts from their parents.
                 pub fn new(
                     graph: G,
                     initrevs: impl IntoIterator<Item = Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Result<Self, GraphError> {
                     let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
                     if inclusive {
                         let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
-                        let seen = visit.iter().map(|&x| x).collect();
+                        let seen = visit.iter().cloned().collect();
                         return Ok(AncestorsIterator {
-                            visit: visit,
+                            visit,
-                            seen: seen,
+                            seen,
-                            stoprev: stoprev,
+                            stoprev,
-                            graph: graph,
+                            graph,
                         });
                     }
                     let mut this = AncestorsIterator {
                         visit: BinaryHeap::new(),
                         seen: HashSet::new(),
-                        stoprev: stoprev,
+                        stoprev,
-                        graph: graph,
+                        graph,
                     };
                     this.seen.insert(NULL_REVISION);
                     for rev in filtered_initrevs {
                         for parent in this.graph.parents(rev)?.iter().cloned() {
                             this.conditionally_push_rev(parent);
                         }
                     }
                     Ok(this)
                 }
                 #[inline]
                 fn conditionally_push_rev(&mut self, rev: Revision) {
                     if self.stoprev <= rev && self.seen.insert(rev) {
                         self.visit.push(rev);
                     }
                 }
                 /// Consumes partially the iterator to tell if the given target
                 /// revision
                 /// is in the ancestors it emits.
                 /// This is meant for iterators actually dedicated to that kind of
                 /// purpose
                 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
                     if self.seen.contains(&target) && target != NULL_REVISION {
                         return Ok(true);
                     }
                     for item in self {
                         let rev = item?;
                         if rev == target {
                             return Ok(true);
                         }
                         if rev < target {
                             return Ok(false);
                         }
                     }
                     Ok(false)
                 }
                 pub fn peek(&self) -> Option<Revision> {
-                    self.visit.peek().map(|&r| r)
+                    self.visit.peek().cloned()
                 }
                 /// Tell if the iterator is about an empty set
                 ///
                 /// The result does not depend whether the iterator has been consumed
                 /// or not.
                 /// This is mostly meant for iterators backing a lazy ancestors set
                 pub fn is_empty(&self) -> bool {
                     if self.visit.len() > 0 {
                         return false;
                     }
                     if self.seen.len() > 1 {
                         return false;
                     }
                     // at this point, the seen set is at most a singleton.
                     // If not `self.inclusive`, it's still possible that it has only
                     // the null revision
                     self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
                 }
             }
             /// Main implementation for the iterator
             ///
             /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
             /// with a few non crucial differences:
             ///
             /// - there's no filtering of invalid parent revisions. Actually, it should be
             ///   consistent and more efficient to filter them from the end caller.
             /// - we don't have the optimization for adjacent revisions (i.e., the case
             ///   where `p1 == rev - 1`), because it amounts to update the first element of
             ///   the heap without sifting, which Rust's BinaryHeap doesn't let us do.
             /// - we save a few pushes by comparing with `stoprev` before pushing
             impl<G: Graph> Iterator for AncestorsIterator<G> {
                 type Item = Result<Revision, GraphError>;
                 fn next(&mut self) -> Option<Self::Item> {
                     let current = match self.visit.peek() {
                         None => {
                             return None;
                         }
                         Some(c) => *c,
                     };
                     let [p1, p2] = match self.graph.parents(current) {
                         Ok(ps) => ps,
                         Err(e) => return Some(Err(e)),
                     };
                     if p1 < self.stoprev || !self.seen.insert(p1) {
                         self.visit.pop();
                     } else {
                         *(self.visit.peek_mut().unwrap()) = p1;
                     };
                     self.conditionally_push_rev(p2);
                     Some(Ok(current))
                 }
             }
             impl<G: Graph + Clone> LazyAncestors<G> {
                 pub fn new(
                     graph: G,
                     initrevs: impl IntoIterator<Item = Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Result<Self, GraphError> {
                     let v: Vec<Revision> = initrevs.into_iter().collect();
                     Ok(LazyAncestors {
                         graph: graph.clone(),
                         containsiter: AncestorsIterator::new(
                             graph,
                             v.iter().cloned(),
                             stoprev,
                             inclusive,
                         )?,
                         initrevs: v,
-                        stoprev: stoprev,
+                        stoprev,
-                        inclusive: inclusive,
+                        inclusive,
                     })
                 }
                 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
                     self.containsiter.contains(rev)
                 }
                 pub fn is_empty(&self) -> bool {
                     self.containsiter.is_empty()
                 }
                 pub fn iter(&self) -> AncestorsIterator<G> {
                     // the arguments being the same as for self.containsiter, we know
                     // for sure that AncestorsIterator constructor can't fail
                     AncestorsIterator::new(
                         self.graph.clone(),
                         self.initrevs.iter().cloned(),
                         self.stoprev,
                         self.inclusive,
                     )
                     .unwrap()
                 }
             }
             impl<G: Graph> MissingAncestors<G> {
                 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
                     let mut created = MissingAncestors {
-                        graph: graph,
+                        graph,
                         bases: HashSet::new(),
                         max_base: NULL_REVISION,
                     };
                     created.add_bases(bases);
                     created
                 }
                 pub fn has_bases(&self) -> bool {
                     !self.bases.is_empty()
                 }
                 /// Return a reference to current bases.
                 ///
                 /// This is useful in unit tests, but also setdiscovery.py does
                 /// read the bases attribute of a ancestor.missingancestors instance.
                 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
                     &self.bases
                 }
                 /// Computes the relative heads of current bases.
                 ///
                 /// The object is still usable after this.
                 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     dagops::heads(&self.graph, self.bases.iter())
                 }
                 /// Consumes the object and returns the relative heads of its bases.
                 pub fn into_bases_heads(
                     mut self,
                 ) -> Result<HashSet<Revision>, GraphError> {
                     dagops::retain_heads(&self.graph, &mut self.bases)?;
                     Ok(self.bases)
                 }
                 /// Add some revisions to `self.bases`
                 ///
                 /// Takes care of keeping `self.max_base` up to date.
                 pub fn add_bases(
                     &mut self,
                     new_bases: impl IntoIterator<Item = Revision>,
                 ) {
                     let mut max_base = self.max_base;
                     self.bases.extend(
                         new_bases
                             .into_iter()
                             .filter(|&rev| rev != NULL_REVISION)
                             .map(|r| {
                                 if r > max_base {
                                     max_base = r;
                                 }
                                 r
                             }),
                     );
                     self.max_base = max_base;
                 }
                 /// Remove all ancestors of self.bases from the revs set (in place)
                 pub fn remove_ancestors_from(
                     &mut self,
                     revs: &mut HashSet<Revision>,
                 ) -> Result<(), GraphError> {
                     revs.retain(|r| !self.bases.contains(r));
                     // the null revision is always an ancestor. Logically speaking
                     // it's debatable in case bases is empty, but the Python
                     // implementation always adds NULL_REVISION to bases, making it
                     // unconditionnally true.
                     revs.remove(&NULL_REVISION);
                     if revs.is_empty() {
                         return Ok(());
                     }
                     // anything in revs > start is definitely not an ancestor of bases
                     // revs <= start need to be investigated
                     if self.max_base == NULL_REVISION {
                         return Ok(());
                     }
                     // whatever happens, we'll keep at least keepcount of them
                     // knowing this gives us a earlier stop condition than
                     // going all the way to the root
                     let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
                     let mut curr = self.max_base;
                     while curr != NULL_REVISION && revs.len() > keepcount {
                         if self.bases.contains(&curr) {
                             revs.remove(&curr);
                             self.add_parents(curr)?;
                         }
                         curr -= 1;
                     }
                     Ok(())
                 }
                 /// Add the parents of `rev` to `self.bases`
                 ///
                 /// This has no effect on `self.max_base`
                 #[inline]
                 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
                     if rev == NULL_REVISION {
                         return Ok(());
                     }
                     for p in self.graph.parents(rev)?.iter().cloned() {
                         // No need to bother the set with inserting NULL_REVISION over and
                         // over
                         if p != NULL_REVISION {
                             self.bases.insert(p);
                         }
                     }
                     Ok(())
                 }
                 /// Return all the ancestors of revs that are not ancestors of self.bases
                 ///
                 /// This may include elements from revs.
                 ///
                 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
                 /// revision number order, which is a topological order.
                 pub fn missing_ancestors(
                     &mut self,
                     revs: impl IntoIterator<Item = Revision>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     // just for convenience and comparison with Python version
                     let bases_visit = &mut self.bases;
                     let mut revs: HashSet<Revision> = revs
                         .into_iter()
                         .filter(|r| !bases_visit.contains(r))
                         .collect();
                     let revs_visit = &mut revs;
                     let mut both_visit: HashSet<Revision> =
                         revs_visit.intersection(&bases_visit).cloned().collect();
                     if revs_visit.is_empty() {
                         return Ok(Vec::new());
                     }
                     let max_revs = revs_visit.iter().cloned().max().unwrap();
                     let start = max(self.max_base, max_revs);
                     // TODO heuristics for with_capacity()?
                     let mut missing: Vec<Revision> = Vec::new();
                     for curr in (0..=start).rev() {
                         if revs_visit.is_empty() {
                             break;
                         }
                         if both_visit.remove(&curr) {
                             // curr's parents might have made it into revs_visit through
                             // another path
                             for p in self.graph.parents(curr)?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 revs_visit.remove(&p);
                                 bases_visit.insert(p);
                                 both_visit.insert(p);
                             }
                         } else if revs_visit.remove(&curr) {
                             missing.push(curr);
                             for p in self.graph.parents(curr)?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 if bases_visit.contains(&p) {
                                     // p is already known to be an ancestor of revs_visit
                                     revs_visit.remove(&p);
                                     both_visit.insert(p);
                                 } else if both_visit.contains(&p) {
                                     // p should have been in bases_visit
                                     revs_visit.remove(&p);
                                     bases_visit.insert(p);
                                 } else {
                                     // visit later
                                     revs_visit.insert(p);
                                 }
                             }
                         } else if bases_visit.contains(&curr) {
                             for p in self.graph.parents(curr)?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 if revs_visit.remove(&p) || both_visit.contains(&p) {
                                     // p is an ancestor of bases_visit, and is implicitly
                                     // in revs_visit, which means p is ::revs & ::bases.
                                     bases_visit.insert(p);
                                     both_visit.insert(p);
                                 } else {
                                     bases_visit.insert(p);
                                 }
                             }
                         }
                     }
                     missing.reverse();
                     Ok(missing)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::{SampleGraph, VecGraph};
                 use std::iter::FromIterator;
                 fn list_ancestors<G: Graph>(
                     graph: G,
                     initrevs: Vec<Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Vec<Revision> {
                     AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
                         .unwrap()
                         .map(|res| res.unwrap())
                         .collect()
                 }
                 #[test]
                 /// Same tests as test-ancestor.py, without membership
                 /// (see also test-ancestor.py.out)
                 fn test_list_ancestor() {
                     assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 0, false),
                         vec![8, 7, 4, 3, 2, 1, 0]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![1, 3], 0, false),
                         vec![1, 0]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 0, true),
                         vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 6, false),
                         vec![8, 7]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 6, true),
                         vec![13, 11, 8, 7]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 11, true),
                         vec![13, 11]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 12, true),
                         vec![13]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![10, 1], 0, true),
                         vec![10, 5, 4, 2, 1, 0]
                     );
                 }
                 #[test]
                 /// Corner case that's not directly in test-ancestors.py, but
                 /// that happens quite often, as demonstrated by running the whole
                 /// suite.
                 /// For instance, run tests/test-obsolete-checkheads.t
                 fn test_nullrev_input() {
                     let mut iter =
                         AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
                     assert_eq!(iter.next(), None)
                 }
                 #[test]
                 fn test_contains() {
                     let mut lazy =
                         AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
                     assert!(lazy.contains(1).unwrap());
                     assert!(!lazy.contains(3).unwrap());
                     let mut lazy =
                         AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
                     assert!(!lazy.contains(NULL_REVISION).unwrap());
                 }
                 #[test]
                 fn test_peek() {
                     let mut iter =
                         AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
                     // peek() gives us the next value
                     assert_eq!(iter.peek(), Some(10));
                     // but it's not been consumed
                     assert_eq!(iter.next(), Some(Ok(10)));
                     // and iteration resumes normally
                     assert_eq!(iter.next(), Some(Ok(5)));
                     // let's drain the iterator to test peek() at the end
                     while iter.next().is_some() {}
                     assert_eq!(iter.peek(), None);
                 }
                 #[test]
                 fn test_empty() {
                     let mut iter =
                         AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
                     assert!(!iter.is_empty());
                     while iter.next().is_some() {}
                     assert!(!iter.is_empty());
                     let iter =
                         AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
                     assert!(iter.is_empty());
                     // case where iter.seen == {NULL_REVISION}
                     let iter =
                         AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
                     assert!(iter.is_empty());
                 }
                 /// A corrupted Graph, supporting error handling tests
                 #[derive(Clone, Debug)]
                 struct Corrupted;
                 impl Graph for Corrupted {
                     fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                         match rev {
 => Ok([0, -1]),
                             r => Err(GraphError::ParentOutOfRange(r)),
                         }
                     }
                 }
                 #[test]
                 fn test_initrev_out_of_range() {
                     // inclusive=false looks up initrev's parents right away
                     match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
                         Ok(_) => panic!("Should have been ParentOutOfRange"),
                         Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
                     }
                 }
                 #[test]
                 fn test_next_out_of_range() {
                     // inclusive=false looks up initrev's parents right away
                     let mut iter =
                         AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
                     assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
                 }
                 #[test]
                 fn test_lazy_iter_contains() {
                     let mut lazy =
                         LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
                     let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
                     // compare with iterator tests on the same initial revisions
                     assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
                     // contains() results are correct, unaffected by the fact that
                     // we consumed entirely an iterator out of lazy
                     assert_eq!(lazy.contains(2), Ok(true));
                     assert_eq!(lazy.contains(9), Ok(false));
                 }
                 #[test]
                 fn test_lazy_contains_iter() {
                     let mut lazy =
                         LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
                     assert_eq!(lazy.contains(2), Ok(true));
                     assert_eq!(lazy.contains(6), Ok(false));
                     // after consumption of 2 by the inner iterator, results stay
                     // consistent
                     assert_eq!(lazy.contains(2), Ok(true));
                     assert_eq!(lazy.contains(5), Ok(false));
                     // iter() still gives us a fresh iterator
                     let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
                     assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
                 }
                 #[test]
                 /// Test constructor, add/get bases and heads
                 fn test_missing_bases() -> Result<(), GraphError> {
                     let mut missing_ancestors =
                         MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
                     let mut as_vec: Vec<Revision> =
                         missing_ancestors.get_bases().iter().cloned().collect();
                     as_vec.sort();
                     assert_eq!(as_vec, [1, 3, 5]);
                     assert_eq!(missing_ancestors.max_base, 5);
                     missing_ancestors.add_bases([3, 7, 8].iter().cloned());
                     as_vec = missing_ancestors.get_bases().iter().cloned().collect();
                     as_vec.sort();
                     assert_eq!(as_vec, [1, 3, 5, 7, 8]);
                     assert_eq!(missing_ancestors.max_base, 8);
                     as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     assert_eq!(as_vec, [3, 5, 7, 8]);
                     Ok(())
                 }
                 fn assert_missing_remove(
                     bases: &[Revision],
                     revs: &[Revision],
                     expected: &[Revision],
                 ) {
                     let mut missing_ancestors =
                         MissingAncestors::new(SampleGraph, bases.iter().cloned());
                     let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
                     missing_ancestors
                         .remove_ancestors_from(&mut revset)
                         .unwrap();
                     let mut as_vec: Vec<Revision> = revset.into_iter().collect();
                     as_vec.sort();
                     assert_eq!(as_vec.as_slice(), expected);
                 }
                 #[test]
                 fn test_missing_remove() {
                     assert_missing_remove(
                         &[1, 2, 3, 4, 7],
                         Vec::from_iter(1..10).as_slice(),
                         &[5, 6, 8, 9],
                     );
                     assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
                     assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
                 }
                 fn assert_missing_ancestors(
                     bases: &[Revision],
                     revs: &[Revision],
                     expected: &[Revision],
                 ) {
                     let mut missing_ancestors =
                         MissingAncestors::new(SampleGraph, bases.iter().cloned());
                     let missing = missing_ancestors
                         .missing_ancestors(revs.iter().cloned())
                         .unwrap();
                     assert_eq!(missing.as_slice(), expected);
                 }
                 #[test]
                 fn test_missing_ancestors() {
                     // examples taken from test-ancestors.py by having it run
                     // on the same graph (both naive and fast Python algs)
                     assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
                     assert_missing_ancestors(&[11], &[10], &[5, 10]);
                     assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
                 }
                 /// An interesting case found by a random generator similar to
                 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
                 /// failed this, yet none of the integration tests of the whole suite
                 /// catched it.
                 #[test]
                 fn test_remove_ancestors_from_case1() {
                     let graph: VecGraph = vec![
                         [NULL_REVISION, NULL_REVISION],
                         [0, NULL_REVISION],
                         [1, 0],
                         [2, 1],
                         [3, NULL_REVISION],
                         [4, NULL_REVISION],
                         [5, 1],
                         [2, NULL_REVISION],
                         [7, NULL_REVISION],
                         [8, NULL_REVISION],
                         [9, NULL_REVISION],
                         [10, 1],
                         [3, NULL_REVISION],
                         [12, NULL_REVISION],
                         [13, NULL_REVISION],
                         [14, NULL_REVISION],
                         [4, NULL_REVISION],
                         [16, NULL_REVISION],
                         [17, NULL_REVISION],
                         [18, NULL_REVISION],
                         [19, 11],
                         [20, NULL_REVISION],
                         [21, NULL_REVISION],
                         [22, NULL_REVISION],
                         [23, NULL_REVISION],
                         [2, NULL_REVISION],
                         [3, NULL_REVISION],
                         [26, 24],
                         [27, NULL_REVISION],
                         [28, NULL_REVISION],
                         [12, NULL_REVISION],
                         [1, NULL_REVISION],
                         [1, 9],
                         [32, NULL_REVISION],
                         [33, NULL_REVISION],
                         [34, 31],
                         [35, NULL_REVISION],
                         [36, 26],
                         [37, NULL_REVISION],
                         [38, NULL_REVISION],
                         [39, NULL_REVISION],
                         [40, NULL_REVISION],
                         [41, NULL_REVISION],
                         [42, 26],
                         [0, NULL_REVISION],
                         [44, NULL_REVISION],
                         [45, 4],
                         [40, NULL_REVISION],
                         [47, NULL_REVISION],
                         [36, 0],
                         [49, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [51, NULL_REVISION],
                         [52, NULL_REVISION],
                         [53, NULL_REVISION],
                         [14, NULL_REVISION],
                         [55, NULL_REVISION],
                         [15, NULL_REVISION],
                         [23, NULL_REVISION],
                         [58, NULL_REVISION],
                         [59, NULL_REVISION],
                         [2, NULL_REVISION],
                         [61, 59],
                         [62, NULL_REVISION],
                         [63, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [65, NULL_REVISION],
                         [66, NULL_REVISION],
                         [67, NULL_REVISION],
                         [68, NULL_REVISION],
                         [37, 28],
                         [69, 25],
                         [71, NULL_REVISION],
                         [72, NULL_REVISION],
                         [50, 2],
                         [74, NULL_REVISION],
                         [12, NULL_REVISION],
                         [18, NULL_REVISION],
                         [77, NULL_REVISION],
                         [78, NULL_REVISION],
                         [79, NULL_REVISION],
                         [43, 33],
                         [81, NULL_REVISION],
                         [82, NULL_REVISION],
                         [83, NULL_REVISION],
                         [84, 45],
                         [85, NULL_REVISION],
                         [86, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [88, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [76, 83],
                         [44, NULL_REVISION],
                         [92, NULL_REVISION],
                         [93, NULL_REVISION],
                         [9, NULL_REVISION],
                         [95, 67],
                         [96, NULL_REVISION],
                         [97, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                     ];
                     let problem_rev = 28 as Revision;
                     let problem_base = 70 as Revision;
                     // making the problem obvious: problem_rev is a parent of problem_base
                     assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
                     let mut missing_ancestors: MissingAncestors<VecGraph> =
                         MissingAncestors::new(
                             graph,
                             [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
                                 .iter()
                                 .cloned(),
                         );
                     assert!(missing_ancestors.bases.contains(&problem_base));
                     let mut revs: HashSet<Revision> =
                         [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
                             .iter()
                             .cloned()
                             .collect();
                     missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
                     assert!(!revs.contains(&problem_rev));
                 }
             }

rust/hg-core/src/dagops.rs

0 +8 -7

             // dagops.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Miscellaneous DAG operations
             //!
             //! # Terminology
             //! - By *relative heads* of a collection of revision numbers (`Revision`), we
             //!   mean those revisions that have no children among the collection.
             //! - Similarly *relative roots* of a collection of `Revision`, we mean those
             //!   whose parents, if any, don't belong to the collection.
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::ancestors::AncestorsIterator;
             use std::collections::{BTreeSet, HashSet};
-            fn remove_parents(
+            fn remove_parents<S: std::hash::BuildHasher>(
                 graph: &impl Graph,
                 rev: Revision,
-                set: &mut HashSet<Revision>,
+                set: &mut HashSet<Revision, S>,
             ) -> Result<(), GraphError> {
                 for parent in graph.parents(rev)?.iter() {
                     if *parent != NULL_REVISION {
                         set.remove(parent);
                     }
                 }
                 Ok(())
             }
             /// Relative heads out of some revisions, passed as an iterator.
             ///
             /// These heads are defined as those revisions that have no children
             /// among those emitted by the iterator.
             ///
             /// # Performance notes
             /// Internally, this clones the iterator, and builds a `HashSet` out of it.
             ///
             /// This function takes an `Iterator` instead of `impl IntoIterator` to
             /// guarantee that cloning the iterator doesn't result in cloning the full
             /// construct it comes from.
             pub fn heads<'a>(
                 graph: &impl Graph,
                 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
             ) -> Result<HashSet<Revision>, GraphError> {
                 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
                 heads.remove(&NULL_REVISION);
                 for rev in iter_revs {
                     if *rev != NULL_REVISION {
                         remove_parents(graph, *rev, &mut heads)?;
                     }
                 }
                 Ok(heads)
             }
             /// Retain in `revs` only its relative heads.
             ///
             /// This is an in-place operation, so that control of the incoming
             /// set is left to the caller.
             /// - a direct Python binding would probably need to build its own `HashSet`
             ///   from an incoming iterable, even if its sole purpose is to extract the
             ///   heads.
             /// - a Rust caller can decide whether cloning beforehand is appropriate
             ///
             /// # Performance notes
             /// Internally, this function will store a full copy of `revs` in a `Vec`.
-            pub fn retain_heads(
+            pub fn retain_heads<S: std::hash::BuildHasher>(
                 graph: &impl Graph,
-                revs: &mut HashSet<Revision>,
+                revs: &mut HashSet<Revision, S>,
             ) -> Result<(), GraphError> {
                 revs.remove(&NULL_REVISION);
                 // we need to construct an iterable copy of revs to avoid itering while
                 // mutating
                 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
                 for rev in as_vec {
                     if rev != NULL_REVISION {
                         remove_parents(graph, rev, revs)?;
                     }
                 }
                 Ok(())
             }
             /// Roots of `revs`, passed as a `HashSet`
             ///
             /// They are returned in arbitrary order
-            pub fn roots<G: Graph>(
+            pub fn roots<G: Graph, S: std::hash::BuildHasher>(
                 graph: &G,
-                revs: &HashSet<Revision>,
+                revs: &HashSet<Revision, S>,
             ) -> Result<Vec<Revision>, GraphError> {
                 let mut roots: Vec<Revision> = Vec::new();
                 for rev in revs {
                     if graph
                         .parents(*rev)?
                         .iter()
                         .filter(|p| **p != NULL_REVISION)
                         .all(|p| !revs.contains(p))
                     {
                         roots.push(*rev);
                     }
                 }
                 Ok(roots)
             }
             /// Compute the topological range between two collections of revisions
             ///
             /// This is equivalent to the revset `<roots>::<heads>`.
             ///
             /// Currently, the given `Graph` has to implement `Clone`, which means
             /// actually cloning just a reference-counted Python pointer if
             /// it's passed over through `rust-cpython`. This is due to the internal
             /// use of `AncestorsIterator`
             ///
             /// # Algorithmic details
             ///
             /// This is a two-pass swipe inspired from what `reachableroots2` from
             /// `mercurial.cext.parsers` does to obtain the same results.
             ///
             /// - first, we climb up the DAG from `heads` in topological order, keeping
             ///   them in the vector `heads_ancestors` vector, and adding any element of
             ///   `roots` we find among them to the resulting range.
             /// - Then, we iterate on that recorded vector so that a revision is always
             ///   emitted after its parents and add all revisions whose parents are already
             ///   in the range to the results.
             ///
             /// # Performance notes
             ///
             /// The main difference with the C implementation is that
             /// the latter uses a flat array with bit flags, instead of complex structures
             /// like `HashSet`, making it faster in most scenarios. In theory, it's
             /// possible that the present implementation could be more memory efficient
             /// for very large repositories with many branches.
             pub fn range(
                 graph: &(impl Graph + Clone),
                 roots: impl IntoIterator<Item = Revision>,
                 heads: impl IntoIterator<Item = Revision>,
             ) -> Result<BTreeSet<Revision>, GraphError> {
                 let mut range = BTreeSet::new();
                 let roots: HashSet<Revision> = roots.into_iter().collect();
                 let min_root: Revision = match roots.iter().cloned().min() {
                     None => {
                         return Ok(range);
                     }
                     Some(r) => r,
                 };
                 // Internally, AncestorsIterator currently maintains a `HashSet`
                 // of all seen revision, which is also what we record, albeit in an ordered
                 // way. There's room for improvement on this duplication.
                 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
                 let mut heads_ancestors: Vec<Revision> = Vec::new();
                 for revres in ait {
                     let rev = revres?;
                     if roots.contains(&rev) {
                         range.insert(rev);
                     }
                     heads_ancestors.push(rev);
                 }
                 for rev in heads_ancestors.into_iter().rev() {
                     for parent in graph.parents(rev)?.iter() {
                         if *parent != NULL_REVISION && range.contains(parent) {
                             range.insert(rev);
                         }
                     }
                 }
                 Ok(range)
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
                 fn retain_heads_sorted(
                     graph: &impl Graph,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
                     retain_heads(graph, &mut revs)?;
                     let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_retain_heads() -> Result<(), GraphError> {
                     assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                     assert_eq!(
                         retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                         vec![1, 6, 12]
                     );
                     assert_eq!(
                         retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                         vec![3, 5, 8, 9]
                     );
                     Ok(())
                 }
                 /// Apply `heads()` to the given slice and return as a sorted `Vec`
                 fn heads_sorted(
                     graph: &impl Graph,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     let heads = heads(graph, revs.iter())?;
                     let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_heads() -> Result<(), GraphError> {
                     assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                     assert_eq!(
                         heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                         vec![1, 6, 12]
                     );
                     assert_eq!(
                         heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                         vec![3, 5, 8, 9]
                     );
                     Ok(())
                 }
                 /// Apply `roots()` and sort the result for easier comparison
                 fn roots_sorted(
                     graph: &impl Graph,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
-                    let mut as_vec = roots(graph, &revs.iter().cloned().collect())?;
+                    let set: HashSet<_> = revs.iter().cloned().collect();
+                    let mut as_vec = roots(graph, &set)?;
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_roots() -> Result<(), GraphError> {
                     assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
                     assert_eq!(
                         roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                         vec![0, 4, 12]
                     );
                     assert_eq!(
                         roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                         vec![1, 8]
                     );
                     Ok(())
                 }
                 /// Apply `range()` and convert the result into a Vec for easier comparison
                 fn range_vec(
                     graph: impl Graph + Clone,
                     roots: &[Revision],
                     heads: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     range(&graph, roots.iter().cloned(), heads.iter().cloned())
                         .map(|bs| bs.into_iter().collect())
                 }
                 #[test]
                 fn test_range() -> Result<(), GraphError> {
                     assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
                     assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
                     assert_eq!(
                         range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
                         vec![5, 10]
                     );
                     assert_eq!(
                         range_vec(SampleGraph, &[5, 6], &[10, 12])?,
                         vec![5, 6, 9, 10, 12]
                     );
                     Ok(())
                 }
             }

rust/hg-core/src/dirstate/dirs_multiset.rs

0 +6 -2

             // dirs_multiset.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! A multiset of directory names.
             //!
             //! Used to counts the references to directories in a manifest or dirstate.
             use crate::{
                 dirstate::EntryState,
                 utils::{
                     files,
                     hg_path::{HgPath, HgPathBuf, HgPathError},
                 },
                 DirstateEntry, DirstateMapError, FastHashMap,
             };
             use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
             // could be encapsulated if we care API stability more seriously
             pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
             #[derive(PartialEq, Debug)]
             pub struct DirsMultiset {
                 inner: FastHashMap<HgPathBuf, u32>,
             }
             impl DirsMultiset {
                 /// Initializes the multiset from a dirstate.
                 ///
                 /// If `skip_state` is provided, skips dirstate entries with equal state.
                 pub fn from_dirstate(
                     dirstate: &FastHashMap<HgPathBuf, DirstateEntry>,
                     skip_state: Option<EntryState>,
                 ) -> Result<Self, DirstateMapError> {
                     let mut multiset = DirsMultiset {
                         inner: FastHashMap::default(),
                     };
                     for (filename, DirstateEntry { state, .. }) in dirstate {
                         // This `if` is optimized out of the loop
                         if let Some(skip) = skip_state {
                             if skip != *state {
                                 multiset.add_path(filename)?;
                             }
                         } else {
                             multiset.add_path(filename)?;
                         }
                     }
                     Ok(multiset)
                 }
                 /// Initializes the multiset from a manifest.
                 pub fn from_manifest(
                     manifest: &[impl AsRef<HgPath>],
                 ) -> Result<Self, DirstateMapError> {
                     let mut multiset = DirsMultiset {
                         inner: FastHashMap::default(),
                     };
                     for filename in manifest {
                         multiset.add_path(filename.as_ref())?;
                     }
                     Ok(multiset)
                 }
                 /// Increases the count of deepest directory contained in the path.
                 ///
                 /// If the directory is not yet in the map, adds its parents.
                 pub fn add_path(
                     &mut self,
                     path: impl AsRef<HgPath>,
                 ) -> Result<(), DirstateMapError> {
                     for subpath in files::find_dirs(path.as_ref()) {
                         if subpath.as_bytes().last() == Some(&b'/') {
                             // TODO Remove this once PathAuditor is certified
                             // as the only entrypoint for path data
                             let second_slash_index = subpath.len() - 1;
                             return Err(DirstateMapError::InvalidPath(
                                 HgPathError::ConsecutiveSlashes {
                                     bytes: path.as_ref().as_bytes().to_owned(),
                                     second_slash_index,
                                 },
                             ));
                         }
                         if let Some(val) = self.inner.get_mut(subpath) {
                             *val += 1;
                             break;
                         }
                         self.inner.insert(subpath.to_owned(), 1);
                     }
                     Ok(())
                 }
                 /// Decreases the count of deepest directory contained in the path.
                 ///
                 /// If it is the only reference, decreases all parents until one is
                 /// removed.
                 /// If the directory is not in the map, something horrible has happened.
                 pub fn delete_path(
                     &mut self,
                     path: impl AsRef<HgPath>,
                 ) -> Result<(), DirstateMapError> {
                     for subpath in files::find_dirs(path.as_ref()) {
                         match self.inner.entry(subpath.to_owned()) {
                             Entry::Occupied(mut entry) => {
-                                let val = entry.get().clone();
+                                let val = *entry.get();
                                 if val > 1 {
                                     entry.insert(val - 1);
                                     break;
                                 }
                                 entry.remove();
                             }
                             Entry::Vacant(_) => {
                                 return Err(DirstateMapError::PathNotFound(
                                     path.as_ref().to_owned(),
                                 ))
                             }
                         };
                     }
                     Ok(())
                 }
                 pub fn contains(&self, key: impl AsRef<HgPath>) -> bool {
                     self.inner.contains_key(key.as_ref())
                 }
                 pub fn iter(&self) -> DirsMultisetIter {
                     self.inner.keys()
                 }
                 pub fn len(&self) -> usize {
                     self.inner.len()
                 }
+                pub fn is_empty(&self) -> bool {
+                    self.len() == 0
+                }
             }
             /// This is basically a reimplementation of `DirsMultiset` that stores the
             /// children instead of just a count of them, plus a small optional
             /// optimization to avoid some directories we don't need.
             #[derive(PartialEq, Debug)]
             pub struct DirsChildrenMultiset<'a> {
                 inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
                 only_include: Option<HashSet<&'a HgPath>>,
             }
             impl<'a> DirsChildrenMultiset<'a> {
                 pub fn new(
                     paths: impl Iterator<Item = &'a HgPathBuf>,
                     only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
                 ) -> Self {
                     let mut new = Self {
                         inner: HashMap::default(),
                         only_include: only_include
-                            .map(|s| s.iter().map(|p| p.as_ref()).collect()),
+                            .map(|s| s.iter().map(AsRef::as_ref).collect()),
                     };
                     for path in paths {
                         new.add_path(path)
                     }
                     new
                 }
                 fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
                     if path.as_ref().is_empty() {
                         return;
                     }
                     for (directory, basename) in files::find_dirs_with_base(path.as_ref())
                     {
                         if !self.is_dir_included(directory) {
                             continue;
                         }
                         self.inner
                             .entry(directory)
                             .and_modify(|e| {
                                 e.insert(basename);
                             })
                             .or_insert_with(|| {
                                 let mut set = HashSet::new();
                                 set.insert(basename);
                                 set
                             });
                     }
                 }
                 fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
                     match &self.only_include {
                         None => false,
                         Some(i) => i.contains(dir.as_ref()),
                     }
                 }
                 pub fn get(
                     &self,
                     path: impl AsRef<HgPath>,
                 ) -> Option<&HashSet<&'a HgPath>> {
                     self.inner.get(path.as_ref())
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 #[test]
                 fn test_delete_path_path_not_found() {
                     let manifest: Vec<HgPathBuf> = vec![];
                     let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
                     let path = HgPathBuf::from_bytes(b"doesnotexist/");
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(path.to_owned())),
                         map.delete_path(&path)
                     );
                 }
                 #[test]
                 fn test_delete_path_empty_path() {
                     let mut map =
                         DirsMultiset::from_manifest(&vec![HgPathBuf::new()]).unwrap();
                     let path = HgPath::new(b"");
                     assert_eq!(Ok(()), map.delete_path(path));
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(path.to_owned())),
                         map.delete_path(path)
                     );
                 }
                 #[test]
                 fn test_delete_path_successful() {
                     let mut map = DirsMultiset {
                         inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
                             .iter()
                             .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                             .collect(),
                     };
                     assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
                     eprintln!("{:?}", map);
                     assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
                     eprintln!("{:?}", map);
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
                             b"a/b/"
                         ))),
                         map.delete_path(HgPath::new(b"a/b/"))
                     );
                     assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
                     assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
                     eprintln!("{:?}", map);
                     assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/")));
                     eprintln!("{:?}", map);
                     assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/c/")));
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
                             b"a/c/"
                         ))),
                         map.delete_path(HgPath::new(b"a/c/"))
                     );
                 }
                 #[test]
                 fn test_add_path_empty_path() {
                     let manifest: Vec<HgPathBuf> = vec![];
                     let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
                     let path = HgPath::new(b"");
                     map.add_path(path).unwrap();
                     assert_eq!(1, map.len());
                 }
                 #[test]
                 fn test_add_path_successful() {
                     let manifest: Vec<HgPathBuf> = vec![];
                     let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
                     map.add_path(HgPath::new(b"a/")).unwrap();
                     assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
                     assert_eq!(1, *map.inner.get(HgPath::new(b"")).unwrap());
                     assert_eq!(2, map.len());
                     // Non directory should be ignored
                     map.add_path(HgPath::new(b"a")).unwrap();
                     assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
                     assert_eq!(2, map.len());
                     // Non directory will still add its base
                     map.add_path(HgPath::new(b"a/b")).unwrap();
                     assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
                     assert_eq!(2, map.len());
                     // Duplicate path works
                     map.add_path(HgPath::new(b"a/")).unwrap();
                     assert_eq!(3, *map.inner.get(HgPath::new(b"a")).unwrap());
                     // Nested dir adds to its base
                     map.add_path(HgPath::new(b"a/b/")).unwrap();
                     assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
                     assert_eq!(1, *map.inner.get(HgPath::new(b"a/b")).unwrap());
                     // but not its base's base, because it already existed
                     map.add_path(HgPath::new(b"a/b/c/")).unwrap();
                     assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
                     assert_eq!(2, *map.inner.get(HgPath::new(b"a/b")).unwrap());
                     map.add_path(HgPath::new(b"a/c/")).unwrap();
                     assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
                     let expected = DirsMultiset {
                         inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
                             .iter()
                             .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                             .collect(),
                     };
                     assert_eq!(map, expected);
                 }
                 #[test]
                 fn test_dirsmultiset_new_empty() {
                     let manifest: Vec<HgPathBuf> = vec![];
                     let new = DirsMultiset::from_manifest(&manifest).unwrap();
                     let expected = DirsMultiset {
                         inner: FastHashMap::default(),
                     };
                     assert_eq!(expected, new);
                     let new = DirsMultiset::from_dirstate(&FastHashMap::default(), None)
                         .unwrap();
                     let expected = DirsMultiset {
                         inner: FastHashMap::default(),
                     };
                     assert_eq!(expected, new);
                 }
                 #[test]
                 fn test_dirsmultiset_new_no_skip() {
                     let input_vec: Vec<HgPathBuf> = ["a/", "b/", "a/c", "a/d/"]
                         .iter()
                         .map(|e| HgPathBuf::from_bytes(e.as_bytes()))
                         .collect();
                     let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
                         .iter()
                         .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                         .collect();
                     let new = DirsMultiset::from_manifest(&input_vec).unwrap();
                     let expected = DirsMultiset {
                         inner: expected_inner,
                     };
                     assert_eq!(expected, new);
                     let input_map = ["a/", "b/", "a/c", "a/d/"]
                         .iter()
                         .map(|f| {
                             (
                                 HgPathBuf::from_bytes(f.as_bytes()),
                                 DirstateEntry {
                                     state: EntryState::Normal,
                                     mode: 0,
                                     mtime: 0,
                                     size: 0,
                                 },
                             )
                         })
                         .collect();
                     let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
                         .iter()
                         .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                         .collect();
                     let new = DirsMultiset::from_dirstate(&input_map, None).unwrap();
                     let expected = DirsMultiset {
                         inner: expected_inner,
                     };
                     assert_eq!(expected, new);
                 }
                 #[test]
                 fn test_dirsmultiset_new_skip() {
                     let input_map = [
                         ("a/", EntryState::Normal),
                         ("a/b/", EntryState::Normal),
                         ("a/c", EntryState::Removed),
                         ("a/d/", EntryState::Merged),
                     ]
                     .iter()
                     .map(|(f, state)| {
                         (
                             HgPathBuf::from_bytes(f.as_bytes()),
                             DirstateEntry {
                                 state: *state,
                                 mode: 0,
                                 mtime: 0,
                                 size: 0,
                             },
                         )
                     })
                     .collect();
                     // "a" incremented with "a/c" and "a/d/"
                     let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
                         .iter()
                         .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                         .collect();
                     let new =
                         DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal))
                             .unwrap();
                     let expected = DirsMultiset {
                         inner: expected_inner,
                     };
                     assert_eq!(expected, new);
                 }
             }

rust/hg-core/src/dirstate/dirstate_map.rs

0 +1 -1

             // dirstate_map.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::{
                 dirstate::{parsers::PARENT_SIZE, EntryState, SIZE_FROM_OTHER_PARENT},
                 pack_dirstate, parse_dirstate,
                 utils::{
                     files::normalize_case,
                     hg_path::{HgPath, HgPathBuf},
                 },
                 CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateMapError,
                 DirstateParents, DirstateParseError, FastHashMap, StateMap,
             };
             use core::borrow::Borrow;
             use std::collections::HashSet;
             use std::convert::TryInto;
             use std::iter::FromIterator;
             use std::ops::Deref;
             use std::time::Duration;
             pub type FileFoldMap = FastHashMap<HgPathBuf, HgPathBuf>;
             const NULL_ID: [u8; 20] = [0; 20];
             const MTIME_UNSET: i32 = -1;
             #[derive(Default)]
             pub struct DirstateMap {
                 state_map: StateMap,
                 pub copy_map: CopyMap,
                 file_fold_map: Option<FileFoldMap>,
                 pub dirs: Option<DirsMultiset>,
                 pub all_dirs: Option<DirsMultiset>,
                 non_normal_set: Option<HashSet<HgPathBuf>>,
                 other_parent_set: Option<HashSet<HgPathBuf>>,
                 parents: Option<DirstateParents>,
                 dirty_parents: bool,
             }
             /// Should only really be used in python interface code, for clarity
             impl Deref for DirstateMap {
                 type Target = StateMap;
                 fn deref(&self) -> &Self::Target {
                     &self.state_map
                 }
             }
             impl FromIterator<(HgPathBuf, DirstateEntry)> for DirstateMap {
                 fn from_iter<I: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(
                     iter: I,
                 ) -> Self {
                     Self {
                         state_map: iter.into_iter().collect(),
                         ..Self::default()
                     }
                 }
             }
             impl DirstateMap {
                 pub fn new() -> Self {
                     Self::default()
                 }
                 pub fn clear(&mut self) {
                     self.state_map.clear();
                     self.copy_map.clear();
                     self.file_fold_map = None;
                     self.non_normal_set = None;
                     self.other_parent_set = None;
                     self.set_parents(&DirstateParents {
                         p1: NULL_ID,
                         p2: NULL_ID,
                     })
                 }
                 /// Add a tracked file to the dirstate
                 pub fn add_file(
                     &mut self,
                     filename: &HgPath,
                     old_state: EntryState,
                     entry: DirstateEntry,
                 ) -> Result<(), DirstateMapError> {
                     if old_state == EntryState::Unknown || old_state == EntryState::Removed
                     {
                         if let Some(ref mut dirs) = self.dirs {
                             dirs.add_path(filename)?;
                         }
                     }
                     if old_state == EntryState::Unknown {
                         if let Some(ref mut all_dirs) = self.all_dirs {
                             all_dirs.add_path(filename)?;
                         }
                     }
                     self.state_map.insert(filename.to_owned(), entry.to_owned());
                     if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
                         self.get_non_normal_other_parent_entries()
                             .0
                             .insert(filename.to_owned());
                     }
                     if entry.size == SIZE_FROM_OTHER_PARENT {
                         self.get_non_normal_other_parent_entries()
                             .1
                             .insert(filename.to_owned());
                     }
                     Ok(())
                 }
                 /// Mark a file as removed in the dirstate.
                 ///
                 /// The `size` parameter is used to store sentinel values that indicate
                 /// the file's previous state.  In the future, we should refactor this
                 /// to be more explicit about what that state is.
                 pub fn remove_file(
                     &mut self,
                     filename: &HgPath,
                     old_state: EntryState,
                     size: i32,
                 ) -> Result<(), DirstateMapError> {
                     if old_state != EntryState::Unknown && old_state != EntryState::Removed
                     {
                         if let Some(ref mut dirs) = self.dirs {
                             dirs.delete_path(filename)?;
                         }
                     }
                     if old_state == EntryState::Unknown {
                         if let Some(ref mut all_dirs) = self.all_dirs {
                             all_dirs.add_path(filename)?;
                         }
                     }
                     if let Some(ref mut file_fold_map) = self.file_fold_map {
                         file_fold_map.remove(&normalize_case(filename));
                     }
                     self.state_map.insert(
                         filename.to_owned(),
                         DirstateEntry {
                             state: EntryState::Removed,
                             mode: 0,
                             size,
                             mtime: 0,
                         },
                     );
                     self.get_non_normal_other_parent_entries()
                         .0
                         .insert(filename.to_owned());
                     Ok(())
                 }
                 /// Remove a file from the dirstate.
                 /// Returns `true` if the file was previously recorded.
                 pub fn drop_file(
                     &mut self,
                     filename: &HgPath,
                     old_state: EntryState,
                 ) -> Result<bool, DirstateMapError> {
                     let exists = self.state_map.remove(filename).is_some();
                     if exists {
                         if old_state != EntryState::Removed {
                             if let Some(ref mut dirs) = self.dirs {
                                 dirs.delete_path(filename)?;
                             }
                         }
                         if let Some(ref mut all_dirs) = self.all_dirs {
                             all_dirs.delete_path(filename)?;
                         }
                     }
                     if let Some(ref mut file_fold_map) = self.file_fold_map {
                         file_fold_map.remove(&normalize_case(filename));
                     }
                     self.get_non_normal_other_parent_entries()
                         .0
                         .remove(filename);
                     Ok(exists)
                 }
                 pub fn clear_ambiguous_times(
                     &mut self,
                     filenames: Vec<HgPathBuf>,
                     now: i32,
                 ) {
                     for filename in filenames {
                         let mut changed = false;
                         self.state_map
                             .entry(filename.to_owned())
                             .and_modify(|entry| {
                                 if entry.state == EntryState::Normal && entry.mtime == now
                                 {
                                     changed = true;
                                     *entry = DirstateEntry {
                                         mtime: MTIME_UNSET,
                                         ..*entry
                                     };
                                 }
                             });
                         if changed {
                             self.get_non_normal_other_parent_entries()
                                 .0
                                 .insert(filename.to_owned());
                         }
                     }
                 }
                 pub fn non_normal_entries_remove(
                     &mut self,
                     key: impl AsRef<HgPath>,
                 ) -> bool {
                     self.get_non_normal_other_parent_entries()
                         .0
                         .remove(key.as_ref())
                 }
                 pub fn non_normal_entries_union(
                     &mut self,
                     other: HashSet<HgPathBuf>,
                 ) -> Vec<HgPathBuf> {
                     self.get_non_normal_other_parent_entries()
                         .0
                         .union(&other)
-                        .map(|e| e.to_owned())
+                        .map(ToOwned::to_owned)
                         .collect()
                 }
                 pub fn get_non_normal_other_parent_entries(
                     &mut self,
                 ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>) {
                     self.set_non_normal_other_parent_entries(false);
                     (
                         self.non_normal_set.as_mut().unwrap(),
                         self.other_parent_set.as_mut().unwrap(),
                     )
                 }
                 /// Useful to get immutable references to those sets in contexts where
                 /// you only have an immutable reference to the `DirstateMap`, like when
                 /// sharing references with Python.
                 ///
                 /// TODO, get rid of this along with the other "setter/getter" stuff when
                 /// a nice typestate plan is defined.
                 ///
                 /// # Panics
                 ///
                 /// Will panic if either set is `None`.
                 pub fn get_non_normal_other_parent_entries_panic(
                     &self,
                 ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>) {
                     (
                         self.non_normal_set.as_ref().unwrap(),
                         self.other_parent_set.as_ref().unwrap(),
                     )
                 }
                 pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
                     if !force
                         && self.non_normal_set.is_some()
                         && self.other_parent_set.is_some()
                     {
                         return;
                     }
                     let mut non_normal = HashSet::new();
                     let mut other_parent = HashSet::new();
                     for (
                         filename,
                         DirstateEntry {
                             state, size, mtime, ..
                         },
                     ) in self.state_map.iter()
                     {
                         if *state != EntryState::Normal || *mtime == MTIME_UNSET {
                             non_normal.insert(filename.to_owned());
                         }
                         if *state == EntryState::Normal && *size == SIZE_FROM_OTHER_PARENT
                         {
                             other_parent.insert(filename.to_owned());
                         }
                     }
                     self.non_normal_set = Some(non_normal);
                     self.other_parent_set = Some(other_parent);
                 }
                 /// Both of these setters and their uses appear to be the simplest way to
                 /// emulate a Python lazy property, but it is ugly and unidiomatic.
                 /// TODO One day, rewriting this struct using the typestate might be a
                 /// good idea.
                 pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
                     if self.all_dirs.is_none() {
                         self.all_dirs =
                             Some(DirsMultiset::from_dirstate(&self.state_map, None)?);
                     }
                     Ok(())
                 }
                 pub fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
                     if self.dirs.is_none() {
                         self.dirs = Some(DirsMultiset::from_dirstate(
                             &self.state_map,
                             Some(EntryState::Removed),
                         )?);
                     }
                     Ok(())
                 }
                 pub fn has_tracked_dir(
                     &mut self,
                     directory: &HgPath,
                 ) -> Result<bool, DirstateMapError> {
                     self.set_dirs()?;
                     Ok(self.dirs.as_ref().unwrap().contains(directory))
                 }
                 pub fn has_dir(
                     &mut self,
                     directory: &HgPath,
                 ) -> Result<bool, DirstateMapError> {
                     self.set_all_dirs()?;
                     Ok(self.all_dirs.as_ref().unwrap().contains(directory))
                 }
                 pub fn parents(
                     &mut self,
                     file_contents: &[u8],
                 ) -> Result<&DirstateParents, DirstateError> {
                     if let Some(ref parents) = self.parents {
                         return Ok(parents);
                     }
                     let parents;
                     if file_contents.len() == PARENT_SIZE * 2 {
                         parents = DirstateParents {
                             p1: file_contents[..PARENT_SIZE].try_into().unwrap(),
                             p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2]
                                 .try_into()
                                 .unwrap(),
                         };
                     } else if file_contents.is_empty() {
                         parents = DirstateParents {
                             p1: NULL_ID,
                             p2: NULL_ID,
                         };
                     } else {
                         return Err(DirstateError::Parse(DirstateParseError::Damaged));
                     }
                     self.parents = Some(parents);
                     Ok(self.parents.as_ref().unwrap())
                 }
                 pub fn set_parents(&mut self, parents: &DirstateParents) {
                     self.parents = Some(parents.clone());
                     self.dirty_parents = true;
                 }
                 pub fn read(
                     &mut self,
                     file_contents: &[u8],
                 ) -> Result<Option<DirstateParents>, DirstateError> {
                     if file_contents.is_empty() {
                         return Ok(None);
                     }
                     let parents = parse_dirstate(
                         &mut self.state_map,
                         &mut self.copy_map,
                         file_contents,
                     )?;
                     if !self.dirty_parents {
                         self.set_parents(&parents);
                     }
                     Ok(Some(parents))
                 }
                 pub fn pack(
                     &mut self,
                     parents: DirstateParents,
                     now: Duration,
                 ) -> Result<Vec<u8>, DirstateError> {
                     let packed =
                         pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
                     self.dirty_parents = false;
                     self.set_non_normal_other_parent_entries(true);
                     Ok(packed)
                 }
                 pub fn build_file_fold_map(&mut self) -> &FileFoldMap {
                     if let Some(ref file_fold_map) = self.file_fold_map {
                         return file_fold_map;
                     }
                     let mut new_file_fold_map = FileFoldMap::default();
                     for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
                     {
                         if *state == EntryState::Removed {
                             new_file_fold_map
                                 .insert(normalize_case(filename), filename.to_owned());
                         }
                     }
                     self.file_fold_map = Some(new_file_fold_map);
                     self.file_fold_map.as_ref().unwrap()
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 #[test]
                 fn test_dirs_multiset() {
                     let mut map = DirstateMap::new();
                     assert!(map.dirs.is_none());
                     assert!(map.all_dirs.is_none());
                     assert_eq!(map.has_dir(HgPath::new(b"nope")).unwrap(), false);
                     assert!(map.all_dirs.is_some());
                     assert!(map.dirs.is_none());
                     assert_eq!(map.has_tracked_dir(HgPath::new(b"nope")).unwrap(), false);
                     assert!(map.dirs.is_some());
                 }
                 #[test]
                 fn test_add_file() {
                     let mut map = DirstateMap::new();
                     assert_eq!(0, map.len());
                     map.add_file(
                         HgPath::new(b"meh"),
                         EntryState::Normal,
                         DirstateEntry {
                             state: EntryState::Normal,
                             mode: 1337,
                             mtime: 1337,
                             size: 1337,
                         },
                     )
                     .unwrap();
                     assert_eq!(1, map.len());
                     assert_eq!(0, map.get_non_normal_other_parent_entries().0.len());
                     assert_eq!(0, map.get_non_normal_other_parent_entries().1.len());
                 }
                 #[test]
                 fn test_non_normal_other_parent_entries() {
                     let mut map: DirstateMap = [
                         (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
                         (b"f2", (EntryState::Normal, 1337, 1337, -1)),
                         (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
                         (b"f4", (EntryState::Normal, 1337, -2, 1337)),
                         (b"f5", (EntryState::Added, 1337, 1337, 1337)),
                         (b"f6", (EntryState::Added, 1337, 1337, -1)),
                         (b"f7", (EntryState::Merged, 1337, 1337, -1)),
                         (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
                         (b"f9", (EntryState::Merged, 1337, -2, 1337)),
                         (b"fa", (EntryState::Added, 1337, -2, 1337)),
                         (b"fb", (EntryState::Removed, 1337, -2, 1337)),
                     ]
                     .iter()
                     .map(|(fname, (state, mode, size, mtime))| {
                         (
                             HgPathBuf::from_bytes(fname.as_ref()),
                             DirstateEntry {
                                 state: *state,
                                 mode: *mode,
                                 size: *size,
                                 mtime: *mtime,
                             },
                         )
                     })
                     .collect();
                     let mut non_normal = [
                         b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
                     ]
                     .iter()
                     .map(|x| HgPathBuf::from_bytes(x.as_ref()))
                     .collect();
                     let mut other_parent = HashSet::new();
                     other_parent.insert(HgPathBuf::from_bytes(b"f4"));
                     let entries = map.get_non_normal_other_parent_entries();
                     assert_eq!(
                         (&mut non_normal, &mut other_parent),
                         (entries.0, entries.1)
                     );
                 }
             }

rust/hg-core/src/dirstate/parsers.rs

0 +1 -1

             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::utils::hg_path::HgPath;
             use crate::{
                 dirstate::{CopyMap, EntryState, StateMap},
                 DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
             };
             use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
             use micro_timer::timed;
             use std::convert::{TryFrom, TryInto};
             use std::io::Cursor;
             use std::time::Duration;
             /// Parents are stored in the dirstate as byte hashes.
             pub const PARENT_SIZE: usize = 20;
             /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
             const MIN_ENTRY_SIZE: usize = 17;
             // TODO parse/pack: is mutate-on-loop better for performance?
             #[timed]
             pub fn parse_dirstate(
                 state_map: &mut StateMap,
                 copy_map: &mut CopyMap,
                 contents: &[u8],
             ) -> Result<DirstateParents, DirstateParseError> {
                 if contents.len() < PARENT_SIZE * 2 {
                     return Err(DirstateParseError::TooLittleData);
                 }
                 let mut curr_pos = PARENT_SIZE * 2;
                 let parents = DirstateParents {
                     p1: contents[..PARENT_SIZE].try_into().unwrap(),
                     p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(),
                 };
                 while curr_pos < contents.len() {
                     if curr_pos + MIN_ENTRY_SIZE > contents.len() {
                         return Err(DirstateParseError::Overflow);
                     }
                     let entry_bytes = &contents[curr_pos..];
                     let mut cursor = Cursor::new(entry_bytes);
                     let state = EntryState::try_from(cursor.read_u8()?)?;
                     let mode = cursor.read_i32::<BigEndian>()?;
                     let size = cursor.read_i32::<BigEndian>()?;
                     let mtime = cursor.read_i32::<BigEndian>()?;
                     let path_len = cursor.read_i32::<BigEndian>()? as usize;
                     if path_len > contents.len() - curr_pos {
                         return Err(DirstateParseError::Overflow);
                     }
                     // Slice instead of allocating a Vec needed for `read_exact`
                     let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
                     let (path, copy) = match memchr::memchr(0, path) {
                         None => (path, None),
                         Some(i) => (&path[..i], Some(&path[(i + 1)..])),
                     };
                     if let Some(copy_path) = copy {
                         copy_map.insert(
                             HgPath::new(path).to_owned(),
                             HgPath::new(copy_path).to_owned(),
                         );
                     };
                     state_map.insert(
                         HgPath::new(path).to_owned(),
                         DirstateEntry {
                             state,
                             mode,
                             size,
                             mtime,
                         },
                     );
                     curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
                 }
                 Ok(parents)
             }
             /// `now` is the duration in seconds since the Unix epoch
             pub fn pack_dirstate(
                 state_map: &mut StateMap,
                 copy_map: &CopyMap,
                 parents: DirstateParents,
                 now: Duration,
             ) -> Result<Vec<u8>, DirstatePackError> {
                 // TODO move away from i32 before 2038.
                 let now: i32 = now.as_secs().try_into().expect("time overflow");
                 let expected_size: usize = state_map
                     .iter()
                     .map(|(filename, _)| {
                         let mut length = MIN_ENTRY_SIZE + filename.len();
                         if let Some(copy) = copy_map.get(filename) {
                             length += copy.len() + 1;
                         }
                         length
                     })
                     .sum();
                 let expected_size = expected_size + PARENT_SIZE * 2;
                 let mut packed = Vec::with_capacity(expected_size);
                 let mut new_state_map = vec![];
                 packed.extend(&parents.p1);
                 packed.extend(&parents.p2);
                 for (filename, entry) in state_map.iter() {
                     let new_filename = filename.to_owned();
                     let mut new_mtime: i32 = entry.mtime;
                     if entry.state == EntryState::Normal && entry.mtime == now {
                         // The file was last modified "simultaneously" with the current
                         // write to dirstate (i.e. within the same second for file-
                         // systems with a granularity of 1 sec). This commonly happens
                         // for at least a couple of files on 'update'.
                         // The user could change the file without changing its size
                         // within the same second. Invalidate the file's mtime in
                         // dirstate, forcing future 'status' calls to compare the
                         // contents of the file if the size is the same. This prevents
                         // mistakenly treating such files as clean.
                         new_mtime = -1;
                         new_state_map.push((
                             filename.to_owned(),
                             DirstateEntry {
                                 mtime: new_mtime,
                                 ..*entry
                             },
                         ));
                     }
                     let mut new_filename = new_filename.into_vec();
                     if let Some(copy) = copy_map.get(filename) {
-                        new_filename.push('\0' as u8);
+                        new_filename.push(b'\0');
                         new_filename.extend(copy.bytes());
                     }
                     packed.write_u8(entry.state.into())?;
                     packed.write_i32::<BigEndian>(entry.mode)?;
                     packed.write_i32::<BigEndian>(entry.size)?;
                     packed.write_i32::<BigEndian>(new_mtime)?;
                     packed.write_i32::<BigEndian>(new_filename.len() as i32)?;
                     packed.extend(new_filename)
                 }
                 if packed.len() != expected_size {
                     return Err(DirstatePackError::BadSize(expected_size, packed.len()));
                 }
                 state_map.extend(new_state_map);
                 Ok(packed)
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::{utils::hg_path::HgPathBuf, FastHashMap};
                 #[test]
                 fn test_pack_dirstate_empty() {
                     let mut state_map: StateMap = FastHashMap::default();
                     let copymap = FastHashMap::default();
                     let parents = DirstateParents {
                         p1: *b"12345678910111213141",
                         p2: *b"00000000000000000000",
                     };
                     let now = Duration::new(15000000, 0);
                     let expected = b"1234567891011121314100000000000000000000".to_vec();
                     assert_eq!(
                         expected,
                         pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
                     );
                     assert!(state_map.is_empty())
                 }
                 #[test]
                 fn test_pack_dirstate_one_entry() {
                     let expected_state_map: StateMap = [(
                         HgPathBuf::from_bytes(b"f1"),
                         DirstateEntry {
                             state: EntryState::Normal,
                             mode: 0o644,
                             size: 0,
                             mtime: 791231220,
                         },
                     )]
                     .iter()
                     .cloned()
                     .collect();
                     let mut state_map = expected_state_map.clone();
                     let copymap = FastHashMap::default();
                     let parents = DirstateParents {
                         p1: *b"12345678910111213141",
                         p2: *b"00000000000000000000",
                     };
                     let now = Duration::new(15000000, 0);
                     let expected = [
 , 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
 , 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
 , 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
 , 58, 244, 0, 0, 0, 2, 102, 49,
                     ]
                     .to_vec();
                     assert_eq!(
                         expected,
                         pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
                     );
                     assert_eq!(expected_state_map, state_map);
                 }
                 #[test]
                 fn test_pack_dirstate_one_entry_with_copy() {
                     let expected_state_map: StateMap = [(
                         HgPathBuf::from_bytes(b"f1"),
                         DirstateEntry {
                             state: EntryState::Normal,
                             mode: 0o644,
                             size: 0,
                             mtime: 791231220,
                         },
                     )]
                     .iter()
                     .cloned()
                     .collect();
                     let mut state_map = expected_state_map.clone();
                     let mut copymap = FastHashMap::default();
                     copymap.insert(
                         HgPathBuf::from_bytes(b"f1"),
                         HgPathBuf::from_bytes(b"copyname"),
                     );
                     let parents = DirstateParents {
                         p1: *b"12345678910111213141",
                         p2: *b"00000000000000000000",
                     };
                     let now = Duration::new(15000000, 0);
                     let expected = [
 , 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
 , 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
 , 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
 , 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
 , 101,
                     ]
                     .to_vec();
                     assert_eq!(
                         expected,
                         pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
                     );
                     assert_eq!(expected_state_map, state_map);
                 }
                 #[test]
                 fn test_parse_pack_one_entry_with_copy() {
                     let mut state_map: StateMap = [(
                         HgPathBuf::from_bytes(b"f1"),
                         DirstateEntry {
                             state: EntryState::Normal,
                             mode: 0o644,
                             size: 0,
                             mtime: 791231220,
                         },
                     )]
                     .iter()
                     .cloned()
                     .collect();
                     let mut copymap = FastHashMap::default();
                     copymap.insert(
                         HgPathBuf::from_bytes(b"f1"),
                         HgPathBuf::from_bytes(b"copyname"),
                     );
                     let parents = DirstateParents {
                         p1: *b"12345678910111213141",
                         p2: *b"00000000000000000000",
                     };
                     let now = Duration::new(15000000, 0);
                     let result =
                         pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                             .unwrap();
                     let mut new_state_map: StateMap = FastHashMap::default();
                     let mut new_copy_map: CopyMap = FastHashMap::default();
                     let new_parents = parse_dirstate(
                         &mut new_state_map,
                         &mut new_copy_map,
                         result.as_slice(),
                     )
                     .unwrap();
                     assert_eq!(
                         (parents, state_map, copymap),
                         (new_parents, new_state_map, new_copy_map)
                     )
                 }
                 #[test]
                 fn test_parse_pack_multiple_entries_with_copy() {
                     let mut state_map: StateMap = [
                         (
                             HgPathBuf::from_bytes(b"f1"),
                             DirstateEntry {
                                 state: EntryState::Normal,
                                 mode: 0o644,
                                 size: 0,
                                 mtime: 791231220,
                             },
                         ),
                         (
                             HgPathBuf::from_bytes(b"f2"),
                             DirstateEntry {
                                 state: EntryState::Merged,
                                 mode: 0o777,
                                 size: 1000,
                                 mtime: 791231220,
                             },
                         ),
                         (
                             HgPathBuf::from_bytes(b"f3"),
                             DirstateEntry {
                                 state: EntryState::Removed,
                                 mode: 0o644,
                                 size: 234553,
                                 mtime: 791231220,
                             },
                         ),
                         (
                             HgPathBuf::from_bytes(b"f4\xF6"),
                             DirstateEntry {
                                 state: EntryState::Added,
                                 mode: 0o644,
                                 size: -1,
                                 mtime: -1,
                             },
                         ),
                     ]
                     .iter()
                     .cloned()
                     .collect();
                     let mut copymap = FastHashMap::default();
                     copymap.insert(
                         HgPathBuf::from_bytes(b"f1"),
                         HgPathBuf::from_bytes(b"copyname"),
                     );
                     copymap.insert(
                         HgPathBuf::from_bytes(b"f4\xF6"),
                         HgPathBuf::from_bytes(b"copyname2"),
                     );
                     let parents = DirstateParents {
                         p1: *b"12345678910111213141",
                         p2: *b"00000000000000000000",
                     };
                     let now = Duration::new(15000000, 0);
                     let result =
                         pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                             .unwrap();
                     let mut new_state_map: StateMap = FastHashMap::default();
                     let mut new_copy_map: CopyMap = FastHashMap::default();
                     let new_parents = parse_dirstate(
                         &mut new_state_map,
                         &mut new_copy_map,
                         result.as_slice(),
                     )
                     .unwrap();
                     assert_eq!(
                         (parents, state_map, copymap),
                         (new_parents, new_state_map, new_copy_map)
                     )
                 }
                 #[test]
                 /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
                 fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
                     let mut state_map: StateMap = [(
                         HgPathBuf::from_bytes(b"f1"),
                         DirstateEntry {
                             state: EntryState::Normal,
                             mode: 0o644,
                             size: 0,
                             mtime: 15000000,
                         },
                     )]
                     .iter()
                     .cloned()
                     .collect();
                     let mut copymap = FastHashMap::default();
                     copymap.insert(
                         HgPathBuf::from_bytes(b"f1"),
                         HgPathBuf::from_bytes(b"copyname"),
                     );
                     let parents = DirstateParents {
                         p1: *b"12345678910111213141",
                         p2: *b"00000000000000000000",
                     };
                     let now = Duration::new(15000000, 0);
                     let result =
                         pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                             .unwrap();
                     let mut new_state_map: StateMap = FastHashMap::default();
                     let mut new_copy_map: CopyMap = FastHashMap::default();
                     let new_parents = parse_dirstate(
                         &mut new_state_map,
                         &mut new_copy_map,
                         result.as_slice(),
                     )
                     .unwrap();
                     assert_eq!(
                         (
                             parents,
                             [(
                                 HgPathBuf::from_bytes(b"f1"),
                                 DirstateEntry {
                                     state: EntryState::Normal,
                                     mode: 0o644,
                                     size: 0,
                                     mtime: -1
                                 }
                             )]
                             .iter()
                             .cloned()
                             .collect::<StateMap>(),
                             copymap,
                         ),
                         (new_parents, new_state_map, new_copy_map)
                     )
                 }
             }

rust/hg-core/src/dirstate/status.rs

0 +35 -38

             // status.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Rust implementation of dirstate.status (dirstate.py).
             //! It is currently missing a lot of functionality compared to the Python one
             //! and will only be triggered in narrow cases.
             use crate::{
                 dirstate::SIZE_FROM_OTHER_PARENT,
                 filepatterns::PatternFileWarning,
                 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
                 utils::{
                     files::{find_dirs, HgMetadata},
                     hg_path::{
                         hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
                         HgPathError,
                     },
                     path_auditor::PathAuditor,
                 },
                 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
                 PatternError,
             };
             use lazy_static::lazy_static;
             use micro_timer::timed;
             use rayon::prelude::*;
             use std::{
                 borrow::Cow,
                 collections::HashSet,
                 fs::{read_dir, DirEntry},
                 io::ErrorKind,
                 ops::Deref,
                 path::{Path, PathBuf},
             };
             /// Wrong type of file from a `BadMatch`
             /// Note: a lot of those don't exist on all platforms.
             #[derive(Debug, Copy, Clone)]
             pub enum BadType {
                 CharacterDevice,
                 BlockDevice,
                 FIFO,
                 Socket,
                 Directory,
                 Unknown,
             }
             impl ToString for BadType {
                 fn to_string(&self) -> String {
                     match self {
                         BadType::CharacterDevice => "character device",
                         BadType::BlockDevice => "block device",
                         BadType::FIFO => "fifo",
                         BadType::Socket => "socket",
                         BadType::Directory => "directory",
                         BadType::Unknown => "unknown",
                     }
                     .to_string()
                 }
             }
             /// Was explicitly matched but cannot be found/accessed
             #[derive(Debug, Copy, Clone)]
             pub enum BadMatch {
                 OsError(i32),
                 BadType(BadType),
             }
             /// Marker enum used to dispatch new status entries into the right collections.
             /// Is similar to `crate::EntryState`, but represents the transient state of
             /// entries during the lifetime of a command.
             #[derive(Debug, Copy, Clone)]
             enum Dispatch {
                 Unsure,
                 Modified,
                 Added,
                 Removed,
                 Deleted,
                 Clean,
                 Unknown,
                 Ignored,
                 /// Empty dispatch, the file is not worth listing
                 None,
                 /// Was explicitly matched but cannot be found/accessed
                 Bad(BadMatch),
                 Directory {
                     /// True if the directory used to be a file in the dmap so we can say
                     /// that it's been removed.
                     was_file: bool,
                 },
             }
             type IoResult<T> = std::io::Result<T>;
             /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
             /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
             type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
             /// Dates and times that are outside the 31-bit signed range are compared
             /// modulo 2^31. This should prevent hg from behaving badly with very large
             /// files or corrupt dates while still having a high probability of detecting
             /// changes. (issue2608)
             /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
             /// is not defined for `i32`, and there is no `As` trait. This forces the
             /// caller to cast `b` as `i32`.
             fn mod_compare(a: i32, b: i32) -> bool {
                 a & i32::max_value() != b & i32::max_value()
             }
             /// Return a sorted list containing information about the entries
             /// in the directory.
             ///
             /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
             fn list_directory(
                 path: impl AsRef<Path>,
                 skip_dot_hg: bool,
             ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
                 let mut results = vec![];
                 let entries = read_dir(path.as_ref())?;
                 for entry in entries {
                     let entry = entry?;
                     let filename = os_string_to_hg_path_buf(entry.file_name())?;
                     let file_type = entry.file_type()?;
                     if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
                         return Ok(vec![]);
                     } else {
-                        results.push((HgPathBuf::from(filename), entry))
+                        results.push((filename, entry))
                     }
                 }
                 results.sort_unstable_by_key(|e| e.0.clone());
                 Ok(results)
             }
             /// The file corresponding to the dirstate entry was found on the filesystem.
             fn dispatch_found(
                 filename: impl AsRef<HgPath>,
                 entry: DirstateEntry,
                 metadata: HgMetadata,
                 copy_map: &CopyMap,
                 options: StatusOptions,
             ) -> Dispatch {
                 let DirstateEntry {
                     state,
                     mode,
                     mtime,
                     size,
                 } = entry;
                 let HgMetadata {
                     st_mode,
                     st_size,
                     st_mtime,
                     ..
                 } = metadata;
                 match state {
                     EntryState::Normal => {
                         let size_changed = mod_compare(size, st_size as i32);
                         let mode_changed =
                             (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
                         let metadata_changed = size >= 0 && (size_changed || mode_changed);
                         let other_parent = size == SIZE_FROM_OTHER_PARENT;
                         if metadata_changed
                             || other_parent
                             || copy_map.contains_key(filename.as_ref())
                         {
                             Dispatch::Modified
-                        } else if mod_compare(mtime, st_mtime as i32) {
+                        } else if mod_compare(mtime, st_mtime as i32)
-                            Dispatch::Unsure
+                            || st_mtime == options.last_normal_time
-                        } else if st_mtime == options.last_normal_time {
                             // the file may have just been marked as normal and
                             // it may have changed in the same second without
                             // changing its size. This can happen if we quickly
                             // do multiple commits. Force lookup, so we don't
                             // miss such a racy file change.
                             Dispatch::Unsure
                         } else if options.list_clean {
                             Dispatch::Clean
                         } else {
                             Dispatch::None
                         }
                     }
                     EntryState::Merged => Dispatch::Modified,
                     EntryState::Added => Dispatch::Added,
                     EntryState::Removed => Dispatch::Removed,
                     EntryState::Unknown => Dispatch::Unknown,
                 }
             }
             /// The file corresponding to this Dirstate entry is missing.
             fn dispatch_missing(state: EntryState) -> Dispatch {
                 match state {
                     // File was removed from the filesystem during commands
                     EntryState::Normal | EntryState::Merged | EntryState::Added => {
                         Dispatch::Deleted
                     }
                     // File was removed, everything is normal
                     EntryState::Removed => Dispatch::Removed,
                     // File is unknown to Mercurial, everything is normal
                     EntryState::Unknown => Dispatch::Unknown,
                 }
             }
             lazy_static! {
                 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
                     let mut h = HashSet::new();
                     h.insert(HgPath::new(b""));
                     h
                 };
             }
             /// Get stat data about the files explicitly specified by match.
             /// TODO subrepos
             #[timed]
             fn walk_explicit<'a>(
                 files: Option<&'a HashSet<&HgPath>>,
                 dmap: &'a DirstateMap,
                 root_dir: impl AsRef<Path> + Sync + Send + 'a,
                 options: StatusOptions,
                 traversed_sender: crossbeam::Sender<HgPathBuf>,
             ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
                 files
                     .unwrap_or(&DEFAULT_WORK)
                     .par_iter()
-                    .map(move |filename| {
+                    .map(move |&filename| {
                         // TODO normalization
-                        let normalized = filename.as_ref();
+                        let normalized = filename;
                         let buf = match hg_path_to_path_buf(normalized) {
                             Ok(x) => x,
                             Err(e) => return Some(Err(e.into())),
                         };
                         let target = root_dir.as_ref().join(buf);
                         let st = target.symlink_metadata();
                         let in_dmap = dmap.get(normalized);
                         match st {
                             Ok(meta) => {
                                 let file_type = meta.file_type();
                                 return if file_type.is_file() || file_type.is_symlink() {
                                     if let Some(entry) = in_dmap {
                                         return Some(Ok((
                                             normalized,
                                             dispatch_found(
                                                 &normalized,
                                                 *entry,
                                                 HgMetadata::from_metadata(meta),
                                                 &dmap.copy_map,
                                                 options,
                                             ),
                                         )));
                                     }
                                     Some(Ok((normalized, Dispatch::Unknown)))
+                                } else if file_type.is_dir() {
+                                    if options.collect_traversed_dirs {
+                                        traversed_sender
+                                            .send(normalized.to_owned())
+                                            .expect("receiver should outlive sender");
+                                    }
+                                    Some(Ok((
+                                        normalized,
+                                        Dispatch::Directory {
+                                            was_file: in_dmap.is_some(),
+                                        },
+                                    )))
                                 } else {
-                                    if file_type.is_dir() {
+                                    Some(Ok((
-                                        if options.collect_traversed_dirs {
+                                        normalized,
-                                            traversed_sender
+                                        Dispatch::Bad(BadMatch::BadType(
-                                                .send(normalized.to_owned())
+                                            // TODO do more than unknown
-                                                .expect("receiver should outlive sender");
+                                            // Support for all `BadType` variant
+                                            // varies greatly between platforms.
-                                        Some(Ok((
+                                            // So far, no tests check the type and
-                                            normalized,
+                                            // this should be good enough for most
-                                            Dispatch::Directory {
+                                            // users.
-                                                was_file: in_dmap.is_some(),
+                                            BadType::Unknown,
-                                            },
+                                        )),
-                                        )))
+                                    )))
-                                    } else {
-                                        Some(Ok((
-                                            normalized,
-                                            Dispatch::Bad(BadMatch::BadType(
-                                                // TODO do more than unknown
-                                                // Support for all `BadType` variant
-                                                // varies greatly between platforms.
-                                                // So far, no tests check the type and
-                                                // this should be good enough for most
-                                                // users.
-                                                BadType::Unknown,
-                                            )),
-                                        )))
                                 };
                             }
                             Err(_) => {
                                 if let Some(entry) = in_dmap {
                                     return Some(Ok((
                                         normalized,
                                         dispatch_missing(entry.state),
                                     )));
                                 }
                             }
                         };
                         None
                     })
                     .flatten()
             }
             #[derive(Debug, Copy, Clone)]
             pub struct StatusOptions {
                 /// Remember the most recent modification timeslot for status, to make
                 /// sure we won't miss future size-preserving file content modifications
                 /// that happen within the same timeslot.
                 pub last_normal_time: i64,
                 /// Whether we are on a filesystem with UNIX-like exec flags
                 pub check_exec: bool,
                 pub list_clean: bool,
                 pub list_unknown: bool,
                 pub list_ignored: bool,
                 /// Whether to collect traversed dirs for applying a callback later.
                 /// Used by `hg purge` for example.
                 pub collect_traversed_dirs: bool,
             }
             /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
             /// If the entry is a folder that needs to be traversed, it will be handled
             /// in a separate thread.
             fn handle_traversed_entry<'a>(
                 scope: &rayon::Scope<'a>,
                 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
                 matcher: &'a (impl Matcher + Sync),
                 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
                 dmap: &'a DirstateMap,
                 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
                 ignore_fn: &'a IgnoreFnType,
                 dir_ignore_fn: &'a IgnoreFnType,
                 options: StatusOptions,
                 filename: HgPathBuf,
                 dir_entry: DirEntry,
                 traversed_sender: crossbeam::Sender<HgPathBuf>,
             ) -> IoResult<()> {
                 let file_type = dir_entry.file_type()?;
                 let entry_option = dmap.get(&filename);
                 if filename.as_bytes() == b".hg" {
                     // Could be a directory or a symlink
                     return Ok(());
                 }
                 if file_type.is_dir() {
                     handle_traversed_dir(
                         scope,
                         files_sender,
                         matcher,
                         root_dir,
                         dmap,
                         old_results,
                         ignore_fn,
                         dir_ignore_fn,
                         options,
                         entry_option,
                         filename,
                         traversed_sender,
                     );
                 } else if file_type.is_file() || file_type.is_symlink() {
                     if let Some(entry) = entry_option {
                         if matcher.matches_everything() || matcher.matches(&filename) {
                             let metadata = dir_entry.metadata()?;
                             files_sender
                                 .send(Ok((
                                     filename.to_owned(),
                                     dispatch_found(
                                         &filename,
                                         *entry,
                                         HgMetadata::from_metadata(metadata),
                                         &dmap.copy_map,
                                         options,
                                     ),
                                 )))
                                 .unwrap();
                         }
                     } else if (matcher.matches_everything() || matcher.matches(&filename))
                         && !ignore_fn(&filename)
                     {
                         if (options.list_ignored || matcher.exact_match(&filename))
                             && dir_ignore_fn(&filename)
                         {
                             if options.list_ignored {
                                 files_sender
                                     .send(Ok((filename.to_owned(), Dispatch::Ignored)))
                                     .unwrap();
                             }
-                        } else {
+                        } else if options.list_unknown {
-                            if options.list_unknown {
+                            files_sender
-                                files_sender
+                                .send(Ok((filename.to_owned(), Dispatch::Unknown)))
-                                    .send(Ok((filename.to_owned(), Dispatch::Unknown)))
+                                .unwrap();
-                                    .unwrap();
                         }
                     } else if ignore_fn(&filename) && options.list_ignored {
                         files_sender
                             .send(Ok((filename.to_owned(), Dispatch::Ignored)))
                             .unwrap();
                     }
                 } else if let Some(entry) = entry_option {
                     // Used to be a file or a folder, now something else.
                     if matcher.matches_everything() || matcher.matches(&filename) {
                         files_sender
                             .send(Ok((filename.to_owned(), dispatch_missing(entry.state))))
                             .unwrap();
                     }
                 }
                 Ok(())
             }
             /// A directory was found in the filesystem and needs to be traversed
             fn handle_traversed_dir<'a>(
                 scope: &rayon::Scope<'a>,
                 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
                 matcher: &'a (impl Matcher + Sync),
                 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
                 dmap: &'a DirstateMap,
                 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
                 ignore_fn: &'a IgnoreFnType,
                 dir_ignore_fn: &'a IgnoreFnType,
                 options: StatusOptions,
                 entry_option: Option<&'a DirstateEntry>,
                 directory: HgPathBuf,
                 traversed_sender: crossbeam::Sender<HgPathBuf>,
             ) {
                 scope.spawn(move |_| {
                     // Nested `if` until `rust-lang/rust#53668` is stable
                     if let Some(entry) = entry_option {
                         // Used to be a file, is now a folder
                         if matcher.matches_everything() || matcher.matches(&directory) {
                             files_sender
                                 .send(Ok((
                                     directory.to_owned(),
                                     dispatch_missing(entry.state),
                                 )))
                                 .unwrap();
                         }
                     }
                     // Do we need to traverse it?
                     if !ignore_fn(&directory) || options.list_ignored {
                         traverse_dir(
                             files_sender,
                             matcher,
                             root_dir,
                             dmap,
                             directory,
                             &old_results,
                             ignore_fn,
                             dir_ignore_fn,
                             options,
                             traversed_sender,
                         )
                         .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
                     }
                 });
             }
             /// Decides whether the directory needs to be listed, and if so handles the
             /// entries in a separate thread.
             fn traverse_dir<'a>(
                 files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
                 matcher: &'a (impl Matcher + Sync),
                 root_dir: impl AsRef<Path> + Sync + Send + Copy,
                 dmap: &'a DirstateMap,
                 directory: impl AsRef<HgPath>,
                 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
                 ignore_fn: &IgnoreFnType,
                 dir_ignore_fn: &IgnoreFnType,
                 options: StatusOptions,
                 traversed_sender: crossbeam::Sender<HgPathBuf>,
             ) -> IoResult<()> {
                 let directory = directory.as_ref();
                 if options.collect_traversed_dirs {
                     traversed_sender
                         .send(directory.to_owned())
                         .expect("receiver should outlive sender");
                 }
                 let visit_entries = match matcher.visit_children_set(directory) {
                     VisitChildrenSet::Empty => return Ok(()),
                     VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
                     VisitChildrenSet::Set(set) => Some(set),
                 };
                 let buf = hg_path_to_path_buf(directory)?;
                 let dir_path = root_dir.as_ref().join(buf);
                 let skip_dot_hg = !directory.as_bytes().is_empty();
                 let entries = match list_directory(dir_path, skip_dot_hg) {
                     Err(e) => match e.kind() {
                         ErrorKind::NotFound | ErrorKind::PermissionDenied => {
                             files_sender
                                 .send(Ok((
                                     directory.to_owned(),
                                     Dispatch::Bad(BadMatch::OsError(
                                         // Unwrapping here is OK because the error always
                                         // is a real os error
                                         e.raw_os_error().unwrap(),
                                     )),
                                 )))
                                 .unwrap();
                             return Ok(());
                         }
                         _ => return Err(e),
                     },
                     Ok(entries) => entries,
                 };
                 rayon::scope(|scope| -> IoResult<()> {
                     for (filename, dir_entry) in entries {
                         if let Some(ref set) = visit_entries {
                             if !set.contains(filename.deref()) {
                                 continue;
                             }
                         }
                         // TODO normalize
                         let filename = if directory.is_empty() {
                             filename.to_owned()
                         } else {
                             directory.join(&filename)
                         };
                         if !old_results.contains_key(filename.deref()) {
                             handle_traversed_entry(
                                 scope,
                                 files_sender,
                                 matcher,
                                 root_dir,
                                 dmap,
                                 old_results,
                                 ignore_fn,
                                 dir_ignore_fn,
                                 options,
                                 filename,
                                 dir_entry,
                                 traversed_sender.clone(),
                             )?;
                         }
                     }
                     Ok(())
                 })
             }
             /// Walk the working directory recursively to look for changes compared to the
             /// current `DirstateMap`.
             ///
             /// This takes a mutable reference to the results to account for the `extend`
             /// in timings
             #[timed]
             fn traverse<'a>(
                 matcher: &'a (impl Matcher + Sync),
                 root_dir: impl AsRef<Path> + Sync + Send + Copy,
                 dmap: &'a DirstateMap,
                 path: impl AsRef<HgPath>,
                 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
                 ignore_fn: &IgnoreFnType,
                 dir_ignore_fn: &IgnoreFnType,
                 options: StatusOptions,
                 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
                 traversed_sender: crossbeam::Sender<HgPathBuf>,
             ) -> IoResult<()> {
                 let root_dir = root_dir.as_ref();
                 // The traversal is done in parallel, so use a channel to gather entries.
                 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
                 let (files_transmitter, files_receiver) = crossbeam::channel::unbounded();
                 traverse_dir(
                     &files_transmitter,
                     matcher,
                     root_dir,
                     &dmap,
                     path,
                     &old_results,
                     &ignore_fn,
                     &dir_ignore_fn,
                     options,
                     traversed_sender,
                 )?;
                 // Disconnect the channel so the receiver stops waiting
                 drop(files_transmitter);
                 // TODO don't collect. Find a way of replicating the behavior of
                 // `itertools::process_results`, but for `rayon::ParallelIterator`
                 let new_results: IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> =
                     files_receiver
                         .into_iter()
                         .map(|item| {
                             let (f, d) = item?;
                             Ok((Cow::Owned(f), d))
                         })
                         .collect();
                 results.par_extend(new_results?);
                 Ok(())
             }
             /// Stat all entries in the `DirstateMap` and mark them for dispatch.
             fn stat_dmap_entries(
                 dmap: &DirstateMap,
                 root_dir: impl AsRef<Path> + Sync + Send,
                 options: StatusOptions,
             ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
                 dmap.par_iter().map(move |(filename, entry)| {
                     let filename: &HgPath = filename;
                     let filename_as_path = hg_path_to_path_buf(filename)?;
                     let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
                     match meta {
                         Ok(ref m)
                             if !(m.file_type().is_file()
                                 || m.file_type().is_symlink()) =>
                         {
                             Ok((filename, dispatch_missing(entry.state)))
                         }
                         Ok(m) => Ok((
                             filename,
                             dispatch_found(
                                 filename,
                                 *entry,
                                 HgMetadata::from_metadata(m),
                                 &dmap.copy_map,
                                 options,
                             ),
                         )),
                         Err(ref e)
                             if e.kind() == ErrorKind::NotFound
                                 || e.raw_os_error() == Some(20) =>
                         {
                             // Rust does not yet have an `ErrorKind` for
                             // `NotADirectory` (errno 20)
                             // It happens if the dirstate contains `foo/bar` and
                             // foo is not a directory
                             Ok((filename, dispatch_missing(entry.state)))
                         }
                         Err(e) => Err(e),
                     }
                 })
             }
             /// This takes a mutable reference to the results to account for the `extend`
             /// in timings
             #[timed]
             fn extend_from_dmap<'a>(
                 dmap: &'a DirstateMap,
                 root_dir: impl AsRef<Path> + Sync + Send,
                 options: StatusOptions,
                 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
             ) {
                 results.par_extend(
                     stat_dmap_entries(dmap, root_dir, options)
                         .flatten()
                         .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)),
                 );
             }
             #[derive(Debug)]
             pub struct DirstateStatus<'a> {
                 pub modified: Vec<Cow<'a, HgPath>>,
                 pub added: Vec<Cow<'a, HgPath>>,
                 pub removed: Vec<Cow<'a, HgPath>>,
                 pub deleted: Vec<Cow<'a, HgPath>>,
                 pub clean: Vec<Cow<'a, HgPath>>,
                 pub ignored: Vec<Cow<'a, HgPath>>,
                 pub unknown: Vec<Cow<'a, HgPath>>,
                 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
                 /// Only filled if `collect_traversed_dirs` is `true`
                 pub traversed: Vec<HgPathBuf>,
             }
             #[timed]
             fn build_response<'a>(
                 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
                 traversed: Vec<HgPathBuf>,
             ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
                 let mut lookup = vec![];
                 let mut modified = vec![];
                 let mut added = vec![];
                 let mut removed = vec![];
                 let mut deleted = vec![];
                 let mut clean = vec![];
                 let mut ignored = vec![];
                 let mut unknown = vec![];
                 let mut bad = vec![];
                 for (filename, dispatch) in results.into_iter() {
                     match dispatch {
                         Dispatch::Unknown => unknown.push(filename),
                         Dispatch::Unsure => lookup.push(filename),
                         Dispatch::Modified => modified.push(filename),
                         Dispatch::Added => added.push(filename),
                         Dispatch::Removed => removed.push(filename),
                         Dispatch::Deleted => deleted.push(filename),
                         Dispatch::Clean => clean.push(filename),
                         Dispatch::Ignored => ignored.push(filename),
                         Dispatch::None => {}
                         Dispatch::Bad(reason) => bad.push((filename, reason)),
                         Dispatch::Directory { .. } => {}
                     }
                 }
                 (
                     lookup,
                     DirstateStatus {
                         modified,
                         added,
                         removed,
                         deleted,
                         clean,
                         ignored,
                         unknown,
                         bad,
                         traversed,
                     },
                 )
             }
             #[derive(Debug)]
             pub enum StatusError {
                 IO(std::io::Error),
                 Path(HgPathError),
                 Pattern(PatternError),
             }
             pub type StatusResult<T> = Result<T, StatusError>;
             impl From<PatternError> for StatusError {
                 fn from(e: PatternError) -> Self {
                     StatusError::Pattern(e)
                 }
             }
             impl From<HgPathError> for StatusError {
                 fn from(e: HgPathError) -> Self {
                     StatusError::Path(e)
                 }
             }
             impl From<std::io::Error> for StatusError {
                 fn from(e: std::io::Error) -> Self {
                     StatusError::IO(e)
                 }
             }
             impl ToString for StatusError {
                 fn to_string(&self) -> String {
                     match self {
                         StatusError::IO(e) => e.to_string(),
                         StatusError::Path(e) => e.to_string(),
                         StatusError::Pattern(e) => e.to_string(),
                     }
                 }
             }
             /// This takes a mutable reference to the results to account for the `extend`
             /// in timings
             #[timed]
             fn handle_unknowns<'a>(
                 dmap: &'a DirstateMap,
                 matcher: &(impl Matcher + Sync),
                 root_dir: impl AsRef<Path> + Sync + Send + Copy,
                 options: StatusOptions,
                 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
             ) -> IoResult<()> {
                 let to_visit: Vec<(&HgPath, &DirstateEntry)> = if results.is_empty()
                     && matcher.matches_everything()
                 {
                     dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
                 } else {
                     // Only convert to a hashmap if needed.
                     let old_results: FastHashMap<_, _> = results.iter().cloned().collect();
                     dmap.iter()
                         .filter_map(move |(f, e)| {
                             if !old_results.contains_key(f.deref()) && matcher.matches(f) {
                                 Some((f.deref(), e))
                             } else {
                                 None
                             }
                         })
                         .collect()
                 };
                 // We walked all dirs under the roots that weren't ignored, and
                 // everything that matched was stat'ed and is already in results.
                 // The rest must thus be ignored or under a symlink.
                 let path_auditor = PathAuditor::new(root_dir);
                 // TODO don't collect. Find a way of replicating the behavior of
                 // `itertools::process_results`, but for `rayon::ParallelIterator`
                 let new_results: IoResult<Vec<_>> = to_visit
                     .into_par_iter()
                     .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
                         // Report ignored items in the dmap as long as they are not
                         // under a symlink directory.
                         if path_auditor.check(filename) {
                             // TODO normalize for case-insensitive filesystems
                             let buf = match hg_path_to_path_buf(filename) {
                                 Ok(x) => x,
                                 Err(e) => return Some(Err(e.into())),
                             };
                             Some(Ok((
                                 Cow::Borrowed(filename),
                                 match root_dir.as_ref().join(&buf).symlink_metadata() {
                                     // File was just ignored, no links, and exists
                                     Ok(meta) => {
                                         let metadata = HgMetadata::from_metadata(meta);
                                         dispatch_found(
                                             filename,
                                             *entry,
                                             metadata,
                                             &dmap.copy_map,
                                             options,
                                         )
                                     }
                                     // File doesn't exist
                                     Err(_) => dispatch_missing(entry.state),
                                 },
                             )))
                         } else {
                             // It's either missing or under a symlink directory which
                             // we, in this case, report as missing.
                             Some(Ok((
                                 Cow::Borrowed(filename),
                                 dispatch_missing(entry.state),
                             )))
                         }
                     })
                     .collect();
                 results.par_extend(new_results?);
                 Ok(())
             }
             /// Get the status of files in the working directory.
             ///
             /// This is the current entry-point for `hg-core` and is realistically unusable
             /// outside of a Python context because its arguments need to provide a lot of
             /// information that will not be necessary in the future.
             #[timed]
             pub fn status<'a: 'c, 'b: 'c, 'c>(
                 dmap: &'a DirstateMap,
                 matcher: &'b (impl Matcher + Sync),
                 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
                 ignore_files: Vec<PathBuf>,
                 options: StatusOptions,
             ) -> StatusResult<(
                 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
                 Vec<PatternFileWarning>,
             )> {
                 // Needs to outlive `dir_ignore_fn` since it's captured.
                 let ignore_fn: IgnoreFnType;
                 // Only involve real ignore mechanism if we're listing unknowns or ignored.
                 let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored
                     || options.list_unknown
                 {
                     let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?;
                     ignore_fn = ignore;
                     let dir_ignore_fn = Box::new(|dir: &_| {
                         // Is the path or one of its ancestors ignored?
                         if ignore_fn(dir) {
                             true
                         } else {
                             for p in find_dirs(dir) {
                                 if ignore_fn(p) {
                                     return true;
                                 }
                             }
                             false
                         }
                     });
                     (dir_ignore_fn, warnings)
                 } else {
                     ignore_fn = Box::new(|&_| true);
                     (Box::new(|&_| true), vec![])
                 };
                 let files = matcher.file_set();
                 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
                 let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
                 // Step 1: check the files explicitly mentioned by the user
                 let explicit = walk_explicit(
                     files,
                     &dmap,
                     root_dir,
                     options,
                     traversed_sender.clone(),
                 );
                 // Collect results into a `Vec` because we do very few lookups in most
                 // cases.
                 let (work, mut results): (Vec<_>, Vec<_>) = explicit
                     .filter_map(Result::ok)
                     .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
                     .partition(|(_, dispatch)| match dispatch {
                         Dispatch::Directory { .. } => true,
                         _ => false,
                     });
                 if !work.is_empty() {
                     // Hashmaps are quite a bit slower to build than vecs, so only build it
                     // if needed.
                     let old_results = results.iter().cloned().collect();
                     // Step 2: recursively check the working directory for changes if
                     // needed
                     for (dir, dispatch) in work {
                         match dispatch {
                             Dispatch::Directory { was_file } => {
                                 if was_file {
                                     results.push((dir.to_owned(), Dispatch::Removed));
                                 }
                                 if options.list_ignored
                                     || options.list_unknown && !dir_ignore_fn(&dir)
                                 {
                                     traverse(
                                         matcher,
                                         root_dir,
                                         &dmap,
                                         &dir,
                                         &old_results,
                                         &ignore_fn,
                                         &dir_ignore_fn,
                                         options,
                                         &mut results,
                                         traversed_sender.clone(),
                                     )?;
                                 }
                             }
                             _ => unreachable!("There can only be directories in `work`"),
                         }
                     }
                 }
                 if !matcher.is_exact() {
                     // Step 3: Check the remaining files from the dmap.
                     // If a dmap file is not in results yet, it was either
                     // a) not matched b) ignored, c) missing, or d) under a
                     // symlink directory.
                     if options.list_unknown {
                         handle_unknowns(dmap, matcher, root_dir, options, &mut results)?;
                     } else {
                         // We may not have walked the full directory tree above, so stat
                         // and check everything we missed.
                         extend_from_dmap(&dmap, root_dir, options, &mut results);
                     }
                 }
                 // Close the channel
                 drop(traversed_sender);
                 let traversed_dirs = traversed_recv.into_iter().collect();
                 Ok((build_response(results, traversed_dirs), warnings))
             }

rust/hg-core/src/discovery.rs

0 +5 -5

             // discovery.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Discovery operations
             //!
             //! This is a Rust counterpart to the `partialdiscovery` class of
             //! `mercurial.setdiscovery`
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
             use rand::seq::SliceRandom;
             use rand::{thread_rng, RngCore, SeedableRng};
             use std::cmp::{max, min};
             use std::collections::{HashSet, VecDeque};
             type Rng = rand_pcg::Pcg32;
             type Seed = [u8; 16];
             pub struct PartialDiscovery<G: Graph + Clone> {
                 target_heads: Option<Vec<Revision>>,
                 graph: G, // plays the role of self._repo
                 common: MissingAncestors<G>,
                 undecided: Option<HashSet<Revision>>,
                 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
                 missing: HashSet<Revision>,
                 rng: Rng,
                 respect_size: bool,
                 randomize: bool,
             }
             pub struct DiscoveryStats {
                 pub undecided: Option<usize>,
             }
             /// Update an existing sample to match the expected size
             ///
             /// The sample is updated with revisions exponentially distant from each
             /// element of `heads`.
             ///
             /// If a target size is specified, the sampling will stop once this size is
             /// reached. Otherwise sampling will happen until roots of the <revs> set are
             /// reached.
             ///
             /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
             ///   represented by `parentfn`
             /// - `heads`: set of DAG head revs
             /// - `sample`: a sample to update
             /// - `parentfn`: a callable to resolve parents for a revision
             /// - `quicksamplesize`: optional target size of the sample
             fn update_sample<I>(
                 revs: Option<&HashSet<Revision>>,
                 heads: impl IntoIterator<Item = Revision>,
                 sample: &mut HashSet<Revision>,
                 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                 quicksamplesize: Option<usize>,
             ) -> Result<(), GraphError>
             where
                 I: Iterator<Item = Revision>,
             {
                 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
                 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                 let mut factor: u32 = 1;
                 let mut seen: HashSet<Revision> = HashSet::new();
                 while let Some(current) = visit.pop_front() {
                     if !seen.insert(current) {
                         continue;
                     }
                     let d = *distances.entry(current).or_insert(1);
                     if d > factor {
                         factor *= 2;
                     }
                     if d == factor {
                         sample.insert(current);
                         if let Some(sz) = quicksamplesize {
                             if sample.len() >= sz {
                                 return Ok(());
                             }
                         }
                     }
                     for p in parentsfn(current)? {
                         if let Some(revs) = revs {
                             if !revs.contains(&p) {
                                 continue;
                             }
                         }
                         distances.entry(p).or_insert(d + 1);
                         visit.push_back(p);
                     }
                 }
                 Ok(())
             }
             struct ParentsIterator {
                 parents: [Revision; 2],
                 cur: usize,
             }
             impl ParentsIterator {
                 fn graph_parents(
                     graph: &impl Graph,
                     r: Revision,
                 ) -> Result<ParentsIterator, GraphError> {
                     Ok(ParentsIterator {
                         parents: graph.parents(r)?,
                         cur: 0,
                     })
                 }
             }
             impl Iterator for ParentsIterator {
                 type Item = Revision;
                 fn next(&mut self) -> Option<Revision> {
                     if self.cur > 1 {
                         return None;
                     }
                     let rev = self.parents[self.cur];
                     self.cur += 1;
                     if rev == NULL_REVISION {
                         return self.next();
                     }
                     Some(rev)
                 }
             }
             impl<G: Graph + Clone> PartialDiscovery<G> {
                 /// Create a PartialDiscovery object, with the intent
                 /// of comparing our `::<target_heads>` revset to the contents of another
                 /// repo.
                 ///
                 /// For now `target_heads` is passed as a vector, and will be used
                 /// at the first call to `ensure_undecided()`.
                 ///
                 /// If we want to make the signature more flexible,
                 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                 /// object since we'll keep it in the meanwhile
                 ///
                 /// The `respect_size` boolean controls how the sampling methods
                 /// will interpret the size argument requested by the caller. If it's
                 /// `false`, they are allowed to produce a sample whose size is more
                 /// appropriate to the situation (typically bigger).
                 ///
                 /// The `randomize` boolean affects sampling, and specifically how
                 /// limiting or last-minute expanding is been done:
                 ///
                 /// If `true`, both will perform random picking from `self.undecided`.
                 /// This is currently the best for actual discoveries.
                 ///
                 /// If `false`, a reproductible picking strategy is performed. This is
                 /// useful for integration tests.
                 pub fn new(
                     graph: G,
                     target_heads: Vec<Revision>,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     let mut seed = [0; 16];
                     if randomize {
                         thread_rng().fill_bytes(&mut seed);
                     }
                     Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                 }
                 pub fn new_with_seed(
                     graph: G,
                     target_heads: Vec<Revision>,
                     seed: Seed,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     PartialDiscovery {
                         undecided: None,
                         children_cache: None,
                         target_heads: Some(target_heads),
                         graph: graph.clone(),
                         common: MissingAncestors::new(graph, vec![]),
                         missing: HashSet::new(),
                         rng: Rng::from_seed(seed),
-                        respect_size: respect_size,
+                        respect_size,
-                        randomize: randomize,
+                        randomize,
                     }
                 }
                 /// Extract at most `size` random elements from sample and return them
                 /// as a vector
                 fn limit_sample(
                     &mut self,
                     mut sample: Vec<Revision>,
                     size: usize,
                 ) -> Vec<Revision> {
                     if !self.randomize {
                         sample.sort();
                         sample.truncate(size);
                         return sample;
                     }
                     let sample_len = sample.len();
                     if sample_len <= size {
                         return sample;
                     }
                     let rng = &mut self.rng;
                     let dropped_size = sample_len - size;
                     let limited_slice = if size < dropped_size {
                         sample.partial_shuffle(rng, size).0
                     } else {
                         sample.partial_shuffle(rng, dropped_size).1
                     };
                     limited_slice.to_owned()
                 }
                 /// Register revisions known as being common
                 pub fn add_common_revisions(
                     &mut self,
                     common: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let before_len = self.common.get_bases().len();
                     self.common.add_bases(common);
                     if self.common.get_bases().len() == before_len {
                         return Ok(());
                     }
                     if let Some(ref mut undecided) = self.undecided {
                         self.common.remove_ancestors_from(undecided)?;
                     }
                     Ok(())
                 }
                 /// Register revisions known as being missing
                 ///
                 /// # Performance note
                 ///
                 /// Except in the most trivial case, the first call of this method has
                 /// the side effect of computing `self.undecided` set for the first time,
                 /// and the related caches it might need for efficiency of its internal
                 /// computation. This is typically faster if more information is
                 /// available in `self.common`. Therefore, for good performance, the
                 /// caller should avoid calling this too early.
                 pub fn add_missing_revisions(
                     &mut self,
                     missing: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
                     if tovisit.is_empty() {
                         return Ok(());
                     }
                     self.ensure_children_cache()?;
                     self.ensure_undecided()?; // for safety of possible future refactors
                     let children = self.children_cache.as_ref().unwrap();
                     let mut seen: HashSet<Revision> = HashSet::new();
                     let undecided_mut = self.undecided.as_mut().unwrap();
                     while let Some(rev) = tovisit.pop_front() {
                         if !self.missing.insert(rev) {
                             // either it's known to be missing from a previous
                             // invocation, and there's no need to iterate on its
                             // children (we now they are all missing)
                             // or it's from a previous iteration of this loop
                             // and its children have already been queued
                             continue;
                         }
                         undecided_mut.remove(&rev);
                         match children.get(&rev) {
                             None => {
                                 continue;
                             }
                             Some(this_children) => {
                                 for child in this_children.iter().cloned() {
                                     if seen.insert(child) {
                                         tovisit.push_back(child);
                                     }
                                 }
                             }
                         }
                     }
                     Ok(())
                 }
                 /// Do we have any information about the peer?
                 pub fn has_info(&self) -> bool {
                     self.common.has_bases()
                 }
                 /// Did we acquire full knowledge of our Revisions that the peer has?
                 pub fn is_complete(&self) -> bool {
-                    self.undecided.as_ref().map_or(false, |s| s.is_empty())
+                    self.undecided.as_ref().map_or(false, HashSet::is_empty)
                 }
                 /// Return the heads of the currently known common set of revisions.
                 ///
                 /// If the discovery process is not complete (see `is_complete()`), the
                 /// caller must be aware that this is an intermediate state.
                 ///
                 /// On the other hand, if it is complete, then this is currently
                 /// the only way to retrieve the end results of the discovery process.
                 ///
                 /// We may introduce in the future an `into_common_heads` call that
                 /// would be more appropriate for normal Rust callers, dropping `self`
                 /// if it is complete.
                 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     self.common.bases_heads()
                 }
                 /// Force first computation of `self.undecided`
                 ///
                 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                 /// unwrapped to get workable immutable or mutable references without
                 /// any panic.
                 ///
                 /// This is an imperative call instead of an access with added lazyness
                 /// to reduce easily the scope of mutable borrow for the caller,
                 /// compared to undecided(&'a mut self) -> &'a… that would keep it
                 /// as long as the resulting immutable one.
                 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                     if self.undecided.is_some() {
                         return Ok(());
                     }
                     let tgt = self.target_heads.take().unwrap();
                     self.undecided =
                         Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                     Ok(())
                 }
                 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                     if self.children_cache.is_some() {
                         return Ok(());
                     }
                     self.ensure_undecided()?;
                     let mut children: FastHashMap<Revision, Vec<Revision>> =
                         FastHashMap::default();
                     for &rev in self.undecided.as_ref().unwrap() {
                         for p in ParentsIterator::graph_parents(&self.graph, rev)? {
-                            children.entry(p).or_insert_with(|| Vec::new()).push(rev);
+                            children.entry(p).or_insert_with(Vec::new).push(rev);
                         }
                     }
                     self.children_cache = Some(children);
                     Ok(())
                 }
                 /// Provide statistics about the current state of the discovery process
                 pub fn stats(&self) -> DiscoveryStats {
                     DiscoveryStats {
-                        undecided: self.undecided.as_ref().map(|s| s.len()),
+                        undecided: self.undecided.as_ref().map(HashSet::len),
                     }
                 }
                 pub fn take_quick_sample(
                     &mut self,
                     headrevs: impl IntoIterator<Item = Revision>,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     self.ensure_undecided()?;
                     let mut sample = {
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok(undecided.iter().cloned().collect());
                         }
                         dagops::heads(&self.graph, undecided.iter())?
                     };
                     if sample.len() >= size {
                         return Ok(self.limit_sample(sample.into_iter().collect(), size));
                     }
                     update_sample(
                         None,
                         headrevs,
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         Some(size),
                     )?;
                     Ok(sample.into_iter().collect())
                 }
                 /// Extract a sample from `self.undecided`, going from its heads and roots.
                 ///
                 /// The `size` parameter is used to avoid useless computations if
                 /// it turns out to be bigger than the whole set of undecided Revisions.
                 ///
                 /// The sample is taken by using `update_sample` from the heads, then
                 /// from the roots, working on the reverse DAG,
                 /// expressed by `self.children_cache`.
                 ///
                 /// No effort is being made to complete or limit the sample to `size`
                 /// but this method returns another interesting size that it derives
                 /// from its knowledge of the structure of the various sets, leaving
                 /// to the caller the decision to use it or not.
                 fn bidirectional_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<(HashSet<Revision>, usize), GraphError> {
                     self.ensure_undecided()?;
                     {
                         // we don't want to compute children_cache before this
                         // but doing it after extracting self.undecided takes a mutable
                         // ref to self while a shareable one is still active.
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok((undecided.clone(), size));
                         }
                     }
                     self.ensure_children_cache()?;
                     let revs = self.undecided.as_ref().unwrap();
                     let mut sample: HashSet<Revision> = revs.clone();
                     // it's possible that leveraging the children cache would be more
                     // efficient here
                     dagops::retain_heads(&self.graph, &mut sample)?;
                     let revsheads = sample.clone(); // was again heads(revs) in python
                     // update from heads
                     update_sample(
                         Some(revs),
                         revsheads.iter().cloned(),
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         None,
                     )?;
                     // update from roots
                     let revroots: HashSet<Revision> =
                         dagops::roots(&self.graph, revs)?.into_iter().collect();
                     let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                     let children = self.children_cache.as_ref().unwrap();
                     let empty_vec: Vec<Revision> = Vec::new();
                     update_sample(
                         Some(revs),
                         revroots,
                         &mut sample,
                         |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                         None,
                     )?;
                     Ok((sample, prescribed_size))
                 }
                 /// Fill up sample up to the wished size with random undecided Revisions.
                 ///
                 /// This is intended to be used as a last resort completion if the
                 /// regular sampling algorithm returns too few elements.
                 fn random_complete_sample(
                     &mut self,
                     sample: &mut Vec<Revision>,
                     size: usize,
                 ) {
                     let sample_len = sample.len();
                     if size <= sample_len {
                         return;
                     }
                     let take_from: Vec<Revision> = self
                         .undecided
                         .as_ref()
                         .unwrap()
                         .iter()
                         .filter(|&r| !sample.contains(r))
                         .cloned()
                         .collect();
                     sample.extend(self.limit_sample(take_from, size - sample_len));
                 }
                 pub fn take_full_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                     let size = if self.respect_size {
                         size
                     } else {
                         prescribed_size
                     };
                     let mut sample =
                         self.limit_sample(sample_set.into_iter().collect(), size);
                     self.random_complete_sample(&mut sample, size);
                     Ok(sample)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                 ///
                 /// To avoid actual randomness in these tests, we give it a fixed
                 /// random seed, but by default we'll test the random version.
                 fn full_disco() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![10, 11, 12, 13],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                 ///
                 /// To avoid actual randomness in tests, we give it a fixed random seed.
                 fn disco12() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![12],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 fn sorted_undecided(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.undecided.as_ref().unwrap().iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.missing.iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_common_heads(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut as_vec: Vec<Revision> =
                         disco.common_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_add_common_get_undecided() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     assert_eq!(disco.undecided, None);
                     assert!(!disco.has_info());
                     assert_eq!(disco.stats().undecided, None);
                     disco.add_common_revisions(vec![11, 12])?;
                     assert!(disco.has_info());
                     assert!(!disco.is_complete());
                     assert!(disco.missing.is_empty());
                     // add_common_revisions did not trigger a premature computation
                     // of `undecided`, let's check that and ask for them
                     assert_eq!(disco.undecided, None);
                     disco.ensure_undecided()?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                     assert_eq!(disco.stats().undecided, Some(4));
                     Ok(())
                 }
                 /// in this test, we pretend that our peer misses exactly (8+10)::
                 /// and we're comparing all our repo to it (as in a bare push)
                 #[test]
                 fn test_discovery() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![11, 12])?;
                     disco.add_missing_revisions(vec![8, 10])?;
                     assert_eq!(sorted_undecided(&disco), vec![5]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(!disco.is_complete());
                     disco.add_common_revisions(vec![5])?;
                     assert_eq!(sorted_undecided(&disco), vec![]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(disco.is_complete());
                     assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_add_missing_early_continue() -> Result<(), GraphError> {
                     eprintln!("test_add_missing_early_stop");
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![13, 3, 4])?;
                     disco.ensure_children_cache()?;
                     // 12 is grand-child of 6 through 9
                     // passing them in this order maximizes the chances of the
                     // early continue to do the wrong thing
                     disco.add_missing_revisions(vec![6, 9, 12])?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
                     assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
                     assert!(!disco.is_complete());
                     Ok(())
                 }
                 #[test]
                 fn test_limit_sample_no_need_to() {
                     let sample = vec![1, 2, 3, 4];
                     assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                 }
                 #[test]
                 fn test_limit_sample_less_than_half() {
                     assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
                 }
                 #[test]
                 fn test_limit_sample_more_than_half() {
                     assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
                 }
                 #[test]
                 fn test_limit_sample_no_random() {
                     let mut disco = full_disco();
                     disco.randomize = false;
                     assert_eq!(
                         disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
                         vec![1, 3, 5, 7]
                     );
                 }
                 #[test]
                 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((1..=13).collect());
                     let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                     sample_vec.sort();
                     assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                     Ok(())
                 }
                 #[test]
                 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                     let mut disco = disco12();
                     disco.ensure_undecided()?;
                     let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
                     sample_vec.sort();
                     // r12's only parent is r9, whose unique grand-parent through the
                     // diamond shape is r4. This ends there because the distance from r4
                     // to the root is only 3.
                     assert_eq!(sample_vec, vec![4, 9, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_children_cache() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.ensure_children_cache()?;
                     let cache = disco.children_cache.unwrap();
                     assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
                     assert_eq!(cache.get(&10).cloned(), None);
                     let mut children_4 = cache.get(&4).cloned().unwrap();
                     children_4.sort();
                     assert_eq!(children_4, vec![5, 6, 7]);
                     let mut children_7 = cache.get(&7).cloned().unwrap();
                     children_7.sort();
                     assert_eq!(children_7, vec![9, 11]);
                     Ok(())
                 }
                 #[test]
                 fn test_complete_sample() {
                     let mut disco = full_disco();
                     let undecided: HashSet<Revision> =
                         [4, 7, 9, 2, 3].iter().cloned().collect();
                     disco.undecided = Some(undecided);
                     let mut sample = vec![0];
                     disco.random_complete_sample(&mut sample, 3);
                     assert_eq!(sample.len(), 3);
                     let mut sample = vec![2, 4, 7];
                     disco.random_complete_sample(&mut sample, 1);
                     assert_eq!(sample.len(), 3);
                 }
                 #[test]
                 fn test_bidirectional_sample() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((0..=13).into_iter().collect());
                     let (sample_set, size) = disco.bidirectional_sample(7)?;
                     assert_eq!(size, 7);
                     let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                     sample.sort();
                     // our DAG is a bit too small for the results to be really interesting
                     // at least it shows that
                     // - we went both ways
                     // - we didn't take all Revisions (6 is not in the sample)
                     assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                     Ok(())
                 }
             }

rust/hg-core/src/filepatterns.rs

0 +6 -5

             // filepatterns.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Handling of Mercurial-specific patterns.
             use crate::{
                 utils::{
                     files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
                     hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
                     SliceExt,
                 },
                 FastHashMap, PatternError,
             };
             use lazy_static::lazy_static;
             use regex::bytes::{NoExpand, Regex};
             use std::fs::File;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::{Path, PathBuf};
             use std::vec::Vec;
             lazy_static! {
                 static ref RE_ESCAPE: Vec<Vec<u8>> = {
                     let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
                     let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
                     for byte in to_escape {
                         v[*byte as usize].insert(0, b'\\');
                     }
                     v
                 };
             }
             /// These are matched in order
             const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
                 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
             /// Appended to the regexp of globs
             const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
             #[derive(Debug, Copy, Clone, PartialEq, Eq)]
             pub enum PatternSyntax {
                 /// A regular expression
                 Regexp,
                 /// Glob that matches at the front of the path
                 RootGlob,
                 /// Glob that matches at any suffix of the path (still anchored at
                 /// slashes)
                 Glob,
                 /// a path relative to repository root, which is matched recursively
                 Path,
                 /// A path relative to cwd
                 RelPath,
                 /// an unrooted glob (*.rs matches Rust files in all dirs)
                 RelGlob,
                 /// A regexp that needn't match the start of a name
                 RelRegexp,
                 /// A path relative to repository root, which is matched non-recursively
                 /// (will not match subdirectories)
                 RootFiles,
                 /// A file of patterns to read and include
                 Include,
                 /// A file of patterns to match against files under the same directory
                 SubInclude,
             }
             /// Transforms a glob pattern into a regex
             fn glob_to_re(pat: &[u8]) -> Vec<u8> {
                 let mut input = pat;
                 let mut res: Vec<u8> = vec![];
                 let mut group_depth = 0;
                 while let Some((c, rest)) = input.split_first() {
                     input = rest;
                     match c {
                         b'*' => {
                             for (source, repl) in GLOB_REPLACEMENTS {
                                 if let Some(rest) = input.drop_prefix(source) {
                                     input = rest;
                                     res.extend(*repl);
                                     break;
                                 }
                             }
                         }
                         b'?' => res.extend(b"."),
                         b'[' => {
                             match input.iter().skip(1).position(|b| *b == b']') {
                                 None => res.extend(b"\\["),
                                 Some(end) => {
                                     // Account for the one we skipped
                                     let end = end + 1;
                                     res.extend(b"[");
                                     for (i, b) in input[..end].iter().enumerate() {
                                         if *b == b'!' && i == 0 {
                                             res.extend(b"^")
                                         } else if *b == b'^' && i == 0 {
                                             res.extend(b"\\^")
                                         } else if *b == b'\\' {
                                             res.extend(b"\\\\")
                                         } else {
                                             res.push(*b)
                                         }
                                     }
                                     res.extend(b"]");
                                     input = &input[end + 1..];
                                 }
                             }
                         }
                         b'{' => {
                             group_depth += 1;
                             res.extend(b"(?:")
                         }
                         b'}' if group_depth > 0 => {
                             group_depth -= 1;
                             res.extend(b")");
                         }
                         b',' if group_depth > 0 => res.extend(b"|"),
                         b'\\' => {
                             let c = {
                                 if let Some((c, rest)) = input.split_first() {
                                     input = rest;
                                     c
                                 } else {
                                     c
                                 }
                             };
                             res.extend(&RE_ESCAPE[*c as usize])
                         }
                         _ => res.extend(&RE_ESCAPE[*c as usize]),
                     }
                 }
                 res
             }
             fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
                 pattern
                     .iter()
                     .flat_map(|c| RE_ESCAPE[*c as usize].clone())
                     .collect()
             }
             pub fn parse_pattern_syntax(
                 kind: &[u8],
             ) -> Result<PatternSyntax, PatternError> {
                 match kind {
                     b"re:" => Ok(PatternSyntax::Regexp),
                     b"path:" => Ok(PatternSyntax::Path),
                     b"relpath:" => Ok(PatternSyntax::RelPath),
                     b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
                     b"relglob:" => Ok(PatternSyntax::RelGlob),
                     b"relre:" => Ok(PatternSyntax::RelRegexp),
                     b"glob:" => Ok(PatternSyntax::Glob),
                     b"rootglob:" => Ok(PatternSyntax::RootGlob),
                     b"include:" => Ok(PatternSyntax::Include),
                     b"subinclude:" => Ok(PatternSyntax::SubInclude),
                     _ => Err(PatternError::UnsupportedSyntax(
                         String::from_utf8_lossy(kind).to_string(),
                     )),
                 }
             }
             /// Builds the regex that corresponds to the given pattern.
             /// If within a `syntax: regexp` context, returns the pattern,
             /// otherwise, returns the corresponding regex.
             fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
                 let IgnorePattern {
                     syntax, pattern, ..
                 } = entry;
                 if pattern.is_empty() {
                     return vec![];
                 }
                 match syntax {
                     PatternSyntax::Regexp => pattern.to_owned(),
                     PatternSyntax::RelRegexp => {
                         // The `regex` crate accepts `**` while `re2` and Python's `re`
                         // do not. Checking for `*` correctly triggers the same error all
                         // engines.
                         if pattern[0] == b'^'
                             || pattern[0] == b'*'
                             || pattern.starts_with(b".*")
                         {
                             return pattern.to_owned();
                         }
                         [&b".*"[..], pattern].concat()
                     }
                     PatternSyntax::Path | PatternSyntax::RelPath => {
                         if pattern == b"." {
                             return vec![];
                         }
                         [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
                     }
                     PatternSyntax::RootFiles => {
                         let mut res = if pattern == b"." {
                             vec![]
                         } else {
                             // Pattern is a directory name.
                             [escape_pattern(pattern).as_slice(), b"/"].concat()
                         };
                         // Anything after the pattern must be a non-directory.
                         res.extend(b"[^/]+$");
                         res
                     }
                     PatternSyntax::RelGlob => {
                         let glob_re = glob_to_re(pattern);
                         if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
                             [b".*", rest, GLOB_SUFFIX].concat()
                         } else {
                             [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
                         }
                     }
                     PatternSyntax::Glob | PatternSyntax::RootGlob => {
                         [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
                     }
                     PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
                 }
             }
             const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
                 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
             /// TODO support other platforms
             #[cfg(unix)]
             pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
                 if bytes.is_empty() {
                     return b".".to_vec();
                 }
                 let sep = b'/';
                 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
                 if initial_slashes > 2 {
                     // POSIX allows one or two initial slashes, but treats three or more
                     // as single slash.
                     initial_slashes = 1;
                 }
                 let components = bytes
                     .split(|b| *b == sep)
                     .filter(|c| !(c.is_empty() || c == b"."))
                     .fold(vec![], |mut acc, component| {
                         if component != b".."
                             || (initial_slashes == 0 && acc.is_empty())
                             || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
                         {
                             acc.push(component)
                         } else if !acc.is_empty() {
                             acc.pop();
                         }
                         acc
                     });
                 let mut new_bytes = components.join(&sep);
                 if initial_slashes > 0 {
                     let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
                     buf.extend(new_bytes);
                     new_bytes = buf;
                 }
                 if new_bytes.is_empty() {
                     b".".to_vec()
                 } else {
                     new_bytes
                 }
             }
             /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
             /// that don't need to be transformed into a regex.
             pub fn build_single_regex(
                 entry: &IgnorePattern,
             ) -> Result<Option<Vec<u8>>, PatternError> {
                 let IgnorePattern {
                     pattern, syntax, ..
                 } = entry;
                 let pattern = match syntax {
                     PatternSyntax::RootGlob
                     | PatternSyntax::Path
                     | PatternSyntax::RelGlob
                     | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
                     PatternSyntax::Include | PatternSyntax::SubInclude => {
                         return Err(PatternError::NonRegexPattern(entry.clone()))
                     }
                     _ => pattern.to_owned(),
                 };
                 if *syntax == PatternSyntax::RootGlob
                     && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
                 {
                     Ok(None)
                 } else {
                     let mut entry = entry.clone();
                     entry.pattern = pattern;
                     Ok(Some(_build_single_regex(&entry)))
                 }
             }
             lazy_static! {
                 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
                     let mut m = FastHashMap::default();
                     m.insert(b"re".as_ref(), b"relre:".as_ref());
                     m.insert(b"regexp".as_ref(), b"relre:".as_ref());
                     m.insert(b"glob".as_ref(), b"relglob:".as_ref());
                     m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
                     m.insert(b"include".as_ref(), b"include:".as_ref());
                     m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
                     m
                 };
             }
             #[derive(Debug)]
             pub enum PatternFileWarning {
                 /// (file path, syntax bytes)
                 InvalidSyntax(PathBuf, Vec<u8>),
                 /// File path
                 NoSuchFile(PathBuf),
             }
             pub fn parse_pattern_file_contents<P: AsRef<Path>>(
                 lines: &[u8],
                 file_path: P,
                 warn: bool,
             ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
+                #[allow(clippy::trivial_regex)]
                 let comment_escape_regex = Regex::new(r"\\#").unwrap();
                 let mut inputs: Vec<IgnorePattern> = vec![];
                 let mut warnings: Vec<PatternFileWarning> = vec![];
                 let mut current_syntax = b"relre:".as_ref();
                 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
                     let line_number = line_number + 1;
                     let line_buf;
                     if line.contains(&b'#') {
                         if let Some(cap) = comment_regex.captures(line) {
                             line = &line[..cap.get(1).unwrap().end()]
                         }
                         line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
                         line = &line_buf;
                     }
                     let mut line = line.trim_end();
                     if line.is_empty() {
                         continue;
                     }
                     if let Some(syntax) = line.drop_prefix(b"syntax:") {
                         let syntax = syntax.trim();
                         if let Some(rel_syntax) = SYNTAXES.get(syntax) {
                             current_syntax = rel_syntax;
                         } else if warn {
                             warnings.push(PatternFileWarning::InvalidSyntax(
                                 file_path.as_ref().to_owned(),
                                 syntax.to_owned(),
                             ));
                         }
                         continue;
                     }
                     let mut line_syntax: &[u8] = &current_syntax;
                     for (s, rels) in SYNTAXES.iter() {
                         if let Some(rest) = line.drop_prefix(rels) {
                             line_syntax = rels;
                             line = rest;
                             break;
                         }
                         if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
                             line_syntax = rels;
                             line = rest;
                             break;
                         }
                     }
                     inputs.push(IgnorePattern::new(
                         parse_pattern_syntax(&line_syntax).map_err(|e| match e {
                             PatternError::UnsupportedSyntax(syntax) => {
                                 PatternError::UnsupportedSyntaxInFile(
                                     syntax,
                                     file_path.as_ref().to_string_lossy().into(),
                                     line_number,
                                 )
                             }
                             _ => e,
                         })?,
                         &line,
                         &file_path,
                     ));
                 }
                 Ok((inputs, warnings))
             }
             pub fn read_pattern_file<P: AsRef<Path>>(
                 file_path: P,
                 warn: bool,
             ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                 let mut f = match File::open(file_path.as_ref()) {
                     Ok(f) => Ok(f),
                     Err(e) => match e.kind() {
                         std::io::ErrorKind::NotFound => {
                             return Ok((
                                 vec![],
                                 vec![PatternFileWarning::NoSuchFile(
                                     file_path.as_ref().to_owned(),
                                 )],
                             ))
                         }
                         _ => Err(e),
                     },
                 }?;
                 let mut contents = Vec::new();
                 f.read_to_end(&mut contents)?;
                 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
             }
             /// Represents an entry in an "ignore" file.
             #[derive(Debug, Eq, PartialEq, Clone)]
             pub struct IgnorePattern {
                 pub syntax: PatternSyntax,
                 pub pattern: Vec<u8>,
                 pub source: PathBuf,
             }
             impl IgnorePattern {
                 pub fn new(
                     syntax: PatternSyntax,
                     pattern: &[u8],
                     source: impl AsRef<Path>,
                 ) -> Self {
                     Self {
                         syntax,
                         pattern: pattern.to_owned(),
                         source: source.as_ref().to_owned(),
                     }
                 }
             }
             pub type PatternResult<T> = Result<T, PatternError>;
             /// Wrapper for `read_pattern_file` that also recursively expands `include:`
             /// patterns.
             ///
             /// `subinclude:` is not treated as a special pattern here: unraveling them
             /// needs to occur in the "ignore" phase.
             pub fn get_patterns_from_file(
                 pattern_file: impl AsRef<Path>,
                 root_dir: impl AsRef<Path>,
             ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
                 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
                 let patterns = patterns
                     .into_iter()
                     .flat_map(|entry| -> PatternResult<_> {
                         let IgnorePattern {
-                            syntax,
+                            syntax, pattern, ..
-                            pattern,
-                            source: _,
                         } = &entry;
                         Ok(match syntax {
                             PatternSyntax::Include => {
                                 let inner_include =
                                     root_dir.as_ref().join(get_path_from_bytes(&pattern));
                                 let (inner_pats, inner_warnings) = get_patterns_from_file(
                                     &inner_include,
                                     root_dir.as_ref(),
                                 )?;
                                 warnings.extend(inner_warnings);
                                 inner_pats
                             }
                             _ => vec![entry],
                         })
                     })
                     .flatten()
                     .collect();
                 Ok((patterns, warnings))
             }
             /// Holds all the information needed to handle a `subinclude:` pattern.
             pub struct SubInclude {
                 /// Will be used for repository (hg) paths that start with this prefix.
                 /// It is relative to the current working directory, so comparing against
                 /// repository paths is painless.
                 pub prefix: HgPathBuf,
                 /// The file itself, containing the patterns
                 pub path: PathBuf,
                 /// Folder in the filesystem where this it applies
                 pub root: PathBuf,
             }
             impl SubInclude {
                 pub fn new(
                     root_dir: impl AsRef<Path>,
                     pattern: &[u8],
                     source: impl AsRef<Path>,
                 ) -> Result<SubInclude, HgPathError> {
                     let normalized_source =
                         normalize_path_bytes(&get_bytes_from_path(source));
                     let source_root = get_path_from_bytes(&normalized_source);
-                    let source_root = source_root.parent().unwrap_or(source_root.deref());
+                    let source_root =
+                        source_root.parent().unwrap_or_else(|| source_root.deref());
                     let path = source_root.join(get_path_from_bytes(pattern));
-                    let new_root = path.parent().unwrap_or(path.deref());
+                    let new_root = path.parent().unwrap_or_else(|| path.deref());
                     let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
                     Ok(Self {
                         prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
                             if !p.is_empty() {
                                 p.push(b'/');
                             }
                             Ok(p)
                         })?,
                         path: path.to_owned(),
                         root: new_root.to_owned(),
                     })
                 }
             }
             /// Separate and pre-process subincludes from other patterns for the "ignore"
             /// phase.
             pub fn filter_subincludes(
                 ignore_patterns: &[IgnorePattern],
                 root_dir: impl AsRef<Path>,
             ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
                 let mut subincludes = vec![];
                 let mut others = vec![];
                 for ignore_pattern in ignore_patterns.iter() {
                     let IgnorePattern {
                         syntax,
                         pattern,
                         source,
                     } = ignore_pattern;
                     if *syntax == PatternSyntax::SubInclude {
                         subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
                     } else {
                         others.push(ignore_pattern)
                     }
                 }
                 Ok((subincludes, others))
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn escape_pattern_test() {
                     let untouched =
                         br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
                     assert_eq!(escape_pattern(untouched), untouched.to_vec());
                     // All escape codes
                     assert_eq!(
                         escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
                         br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
                             .to_vec()
                     );
                 }
                 #[test]
                 fn glob_test() {
                     assert_eq!(glob_to_re(br#"?"#), br#"."#);
                     assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
                     assert_eq!(glob_to_re(br#"**"#), br#".*"#);
                     assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
                     assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
                     assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
                     assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
                     assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
                 }
                 #[test]
                 fn test_parse_pattern_file_contents() {
                     let lines = b"syntax: glob\n*.elc";
                     assert_eq!(
                         parse_pattern_file_contents(lines, Path::new("file_path"), false)
                             .unwrap()
                             .0,
                         vec![IgnorePattern::new(
                             PatternSyntax::RelGlob,
                             b"*.elc",
                             Path::new("file_path")
                         )],
                     );
                     let lines = b"syntax: include\nsyntax: glob";
                     assert_eq!(
                         parse_pattern_file_contents(lines, Path::new("file_path"), false)
                             .unwrap()
                             .0,
                         vec![]
                     );
                     let lines = b"glob:**.o";
                     assert_eq!(
                         parse_pattern_file_contents(lines, Path::new("file_path"), false)
                             .unwrap()
                             .0,
                         vec![IgnorePattern::new(
                             PatternSyntax::RelGlob,
                             b"**.o",
                             Path::new("file_path")
                         )]
                     );
                 }
                 #[test]
                 fn test_build_single_regex() {
                     assert_eq!(
                         build_single_regex(&IgnorePattern::new(
                             PatternSyntax::RelGlob,
                             b"rust/target/",
                             Path::new("")
                         ))
                         .unwrap(),
                         Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
                     );
                     assert_eq!(
                         build_single_regex(&IgnorePattern::new(
                             PatternSyntax::Regexp,
                             br"rust/target/\d+",
                             Path::new("")
                         ))
                         .unwrap(),
                         Some(br"rust/target/\d+".to_vec()),
                     );
                 }
                 #[test]
                 fn test_build_single_regex_shortcut() {
                     assert_eq!(
                         build_single_regex(&IgnorePattern::new(
                             PatternSyntax::RootGlob,
                             b"",
                             Path::new("")
                         ))
                         .unwrap(),
                         None,
                     );
                     assert_eq!(
                         build_single_regex(&IgnorePattern::new(
                             PatternSyntax::RootGlob,
                             b"whatever",
                             Path::new("")
                         ))
                         .unwrap(),
                         None,
                     );
                     assert_eq!(
                         build_single_regex(&IgnorePattern::new(
                             PatternSyntax::RootGlob,
                             b"*.o",
                             Path::new("")
                         ))
                         .unwrap(),
                         Some(br"[^/]*\.o(?:/|$)".to_vec()),
                     );
                 }
             }

rust/hg-core/src/matchers.rs

0 +6 -6

             // matchers.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Structs and types for matching files and directories.
             use crate::{
                 dirstate::dirs_multiset::DirsChildrenMultiset,
                 filepatterns::{
                     build_single_regex, filter_subincludes, get_patterns_from_file,
                     PatternFileWarning, PatternResult, SubInclude,
                 },
                 utils::{
                     files::find_dirs,
                     hg_path::{HgPath, HgPathBuf},
                     Escaped,
                 },
                 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
                 PatternSyntax,
             };
             use crate::filepatterns::normalize_path_bytes;
             use std::borrow::ToOwned;
             use std::collections::HashSet;
             use std::fmt::{Display, Error, Formatter};
             use std::iter::FromIterator;
             use std::ops::Deref;
             use std::path::{Path, PathBuf};
             use micro_timer::timed;
             #[derive(Debug, PartialEq)]
             pub enum VisitChildrenSet<'a> {
                 /// Don't visit anything
                 Empty,
                 /// Only visit this directory
                 This,
                 /// Visit this directory and these subdirectories
                 /// TODO Should we implement a `NonEmptyHashSet`?
                 Set(HashSet<&'a HgPath>),
                 /// Visit this directory and all subdirectories
                 Recursive,
             }
             pub trait Matcher {
                 /// Explicitly listed files
                 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
                 /// Returns whether `filename` is in `file_set`
                 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
                 /// Returns whether `filename` is matched by this matcher
                 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
                 /// Decides whether a directory should be visited based on whether it
                 /// has potential matches in it or one of its subdirectories, and
                 /// potentially lists which subdirectories of that directory should be
                 /// visited. This is based on the match's primary, included, and excluded
                 /// patterns.
                 ///
                 /// # Example
                 ///
                 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
                 /// return the following values (assuming the implementation of
                 /// visit_children_set is capable of recognizing this; some implementations
                 /// are not).
                 ///
                 /// ```text
                 /// ```ignore
                 /// '' -> {'foo', 'qux'}
                 /// 'baz' -> set()
                 /// 'foo' -> {'bar'}
                 /// // Ideally this would be `Recursive`, but since the prefix nature of
                 /// // matchers is applied to the entire matcher, we have to downgrade this
                 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
                 /// // `RootFilesIn'-kind matcher being mixed in.
                 /// 'foo/bar' -> 'this'
                 /// 'qux' -> 'this'
                 /// ```
                 /// # Important
                 ///
                 /// Most matchers do not know if they're representing files or
                 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
                 /// file or a directory, so `visit_children_set('dir')` for most matchers
                 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
                 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
                 /// it may return `VisitChildrenSet::This`.
                 /// Do not rely on the return being a `HashSet` indicating that there are
                 /// no files in this dir to investigate (or equivalently that if there are
                 /// files to investigate in 'dir' that it will always return
                 /// `VisitChildrenSet::This`).
                 fn visit_children_set(
                     &self,
                     directory: impl AsRef<HgPath>,
                 ) -> VisitChildrenSet;
                 /// Matcher will match everything and `files_set()` will be empty:
                 /// optimization might be possible.
                 fn matches_everything(&self) -> bool;
                 /// Matcher will match exactly the files in `files_set()`: optimization
                 /// might be possible.
                 fn is_exact(&self) -> bool;
             }
             /// Matches everything.
             ///```
             /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
             ///
             /// let matcher = AlwaysMatcher;
             ///
             /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
             /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
             /// ```
             #[derive(Debug)]
             pub struct AlwaysMatcher;
             impl Matcher for AlwaysMatcher {
                 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
                     None
                 }
                 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
                     false
                 }
                 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
                     true
                 }
                 fn visit_children_set(
                     &self,
                     _directory: impl AsRef<HgPath>,
                 ) -> VisitChildrenSet {
                     VisitChildrenSet::Recursive
                 }
                 fn matches_everything(&self) -> bool {
                     true
                 }
                 fn is_exact(&self) -> bool {
                     false
                 }
             }
             /// Matches the input files exactly. They are interpreted as paths, not
             /// patterns.
             ///
             ///```
             /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
             ///
             /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
             /// let matcher = FileMatcher::new(&files).unwrap();
             ///
             /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
             /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
             /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
             /// ```
             #[derive(Debug)]
             pub struct FileMatcher<'a> {
                 files: HashSet<&'a HgPath>,
                 dirs: DirsMultiset,
             }
             impl<'a> FileMatcher<'a> {
                 pub fn new(
                     files: &'a [impl AsRef<HgPath>],
                 ) -> Result<Self, DirstateMapError> {
                     Ok(Self {
-                        files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
+                        files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
                         dirs: DirsMultiset::from_manifest(files)?,
                     })
                 }
                 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
                     self.files.contains(filename.as_ref())
                 }
             }
             impl<'a> Matcher for FileMatcher<'a> {
                 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
                     Some(&self.files)
                 }
                 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
                     self.inner_matches(filename)
                 }
                 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
                     self.inner_matches(filename)
                 }
                 fn visit_children_set(
                     &self,
                     directory: impl AsRef<HgPath>,
                 ) -> VisitChildrenSet {
                     if self.files.is_empty() || !self.dirs.contains(&directory) {
                         return VisitChildrenSet::Empty;
                     }
-                    let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
+                    let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
                     let mut candidates: HashSet<&HgPath> =
-                        self.files.union(&dirs_as_set).map(|k| *k).collect();
+                        self.files.union(&dirs_as_set).cloned().collect();
                     candidates.remove(HgPath::new(b""));
                     if !directory.as_ref().is_empty() {
                         let directory = [directory.as_ref().as_bytes(), b"/"].concat();
                         candidates = candidates
                             .iter()
                             .filter_map(|c| {
                                 if c.as_bytes().starts_with(&directory) {
                                     Some(HgPath::new(&c.as_bytes()[directory.len()..]))
                                 } else {
                                     None
                                 }
                             })
                             .collect();
                     }
                     // `self.dirs` includes all of the directories, recursively, so if
                     // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
                     // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                     // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
                     // subdir will be in there without a slash.
                     VisitChildrenSet::Set(
                         candidates
                             .iter()
                             .filter_map(|c| {
                                 if c.bytes().all(|b| *b != b'/') {
                                     Some(*c)
                                 } else {
                                     None
                                 }
                             })
                             .collect(),
                     )
                 }
                 fn matches_everything(&self) -> bool {
                     false
                 }
                 fn is_exact(&self) -> bool {
                     true
                 }
             }
             /// Matches files that are included in the ignore rules.
             /// ```
             /// use hg::{
             ///     matchers::{IncludeMatcher, Matcher},
             ///     IgnorePattern,
             ///     PatternSyntax,
             ///     utils::hg_path::HgPath
             /// };
             /// use std::path::Path;
             /// ///
             /// let ignore_patterns =
             /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
             /// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
             /// ///
             /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
             /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
             /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
             /// ```
             pub struct IncludeMatcher<'a> {
                 patterns: Vec<u8>,
                 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
                 /// Whether all the patterns match a prefix (i.e. recursively)
                 prefix: bool,
                 roots: HashSet<HgPathBuf>,
                 dirs: HashSet<HgPathBuf>,
                 parents: HashSet<HgPathBuf>,
             }
             impl<'a> Matcher for IncludeMatcher<'a> {
                 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
                     None
                 }
                 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
                     false
                 }
                 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
                     (self.match_fn)(filename.as_ref())
                 }
                 fn visit_children_set(
                     &self,
                     directory: impl AsRef<HgPath>,
                 ) -> VisitChildrenSet {
                     let dir = directory.as_ref();
                     if self.prefix && self.roots.contains(dir) {
                         return VisitChildrenSet::Recursive;
                     }
                     if self.roots.contains(HgPath::new(b""))
                         || self.roots.contains(dir)
                         || self.dirs.contains(dir)
                         || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
                     {
                         return VisitChildrenSet::This;
                     }
                     if self.parents.contains(directory.as_ref()) {
                         let multiset = self.get_all_parents_children();
                         if let Some(children) = multiset.get(dir) {
                             return VisitChildrenSet::Set(children.to_owned());
                         }
                     }
                     VisitChildrenSet::Empty
                 }
                 fn matches_everything(&self) -> bool {
                     false
                 }
                 fn is_exact(&self) -> bool {
                     false
                 }
             }
             /// Returns a function that matches an `HgPath` against the given regex
             /// pattern.
             ///
             /// This can fail when the pattern is invalid or not supported by the
             /// underlying engine (the `regex` crate), for instance anything with
             /// back-references.
             #[timed]
             fn re_matcher(
                 pattern: &[u8],
             ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
                 use std::io::Write;
                 // The `regex` crate adds `.*` to the start and end of expressions if there
                 // are no anchors, so add the start anchor.
                 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
                 for byte in pattern {
                     if *byte > 127 {
                         write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
                     } else {
                         escaped_bytes.push(*byte);
                     }
                 }
                 escaped_bytes.push(b')');
                 // Avoid the cost of UTF8 checking
                 //
                 // # Safety
                 // This is safe because we escaped all non-ASCII bytes.
                 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
                 let re = regex::bytes::RegexBuilder::new(&pattern_string)
                     .unicode(false)
                     // Big repos with big `.hgignore` will hit the default limit and
                     // incur a significant performance hit. One repo's `hg status` hit
                     // multiple *minutes*.
                     .dfa_size_limit(50 * (1 << 20))
                     .build()
                     .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
                 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
             }
             /// Returns the regex pattern and a function that matches an `HgPath` against
             /// said regex formed by the given ignore patterns.
             fn build_regex_match<'a>(
                 ignore_patterns: &'a [&'a IgnorePattern],
             ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
                 let mut regexps = vec![];
                 let mut exact_set = HashSet::new();
                 for pattern in ignore_patterns {
                     if let Some(re) = build_single_regex(pattern)? {
                         regexps.push(re);
                     } else {
                         let exact = normalize_path_bytes(&pattern.pattern);
                         exact_set.insert(HgPathBuf::from_bytes(&exact));
                     }
                 }
                 let full_regex = regexps.join(&b'|');
                 // An empty pattern would cause the regex engine to incorrectly match the
                 // (empty) root directory
                 let func = if !(regexps.is_empty()) {
                     let matcher = re_matcher(&full_regex)?;
                     let func = move |filename: &HgPath| {
                         exact_set.contains(filename) || matcher(filename)
                     };
                     Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
                 } else {
                     let func = move |filename: &HgPath| exact_set.contains(filename);
                     Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
                 };
                 Ok((full_regex, func))
             }
             /// Returns roots and directories corresponding to each pattern.
             ///
             /// This calculates the roots and directories exactly matching the patterns and
             /// returns a tuple of (roots, dirs). It does not return other directories
             /// which may also need to be considered, like the parent directories.
             fn roots_and_dirs(
                 ignore_patterns: &[IgnorePattern],
             ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
                 let mut roots = Vec::new();
                 let mut dirs = Vec::new();
                 for ignore_pattern in ignore_patterns {
                     let IgnorePattern {
                         syntax, pattern, ..
                     } = ignore_pattern;
                     match syntax {
                         PatternSyntax::RootGlob | PatternSyntax::Glob => {
                             let mut root = vec![];
                             for p in pattern.split(|c| *c == b'/') {
                                 if p.iter().any(|c| match *c {
                                     b'[' | b'{' | b'*' | b'?' => true,
                                     _ => false,
                                 }) {
                                     break;
                                 }
                                 root.push(HgPathBuf::from_bytes(p));
                             }
                             let buf =
                                 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
                             roots.push(buf);
                         }
                         PatternSyntax::Path | PatternSyntax::RelPath => {
                             let pat = HgPath::new(if pattern == b"." {
                                 &[] as &[u8]
                             } else {
                                 pattern
                             });
                             roots.push(pat.to_owned());
                         }
                         PatternSyntax::RootFiles => {
                             let pat = if pattern == b"." {
                                 &[] as &[u8]
                             } else {
                                 pattern
                             };
                             dirs.push(HgPathBuf::from_bytes(pat));
                         }
                         _ => {
                             roots.push(HgPathBuf::new());
                         }
                     }
                 }
                 (roots, dirs)
             }
             /// Paths extracted from patterns
             #[derive(Debug, PartialEq)]
             struct RootsDirsAndParents {
                 /// Directories to match recursively
                 pub roots: HashSet<HgPathBuf>,
                 /// Directories to match non-recursively
                 pub dirs: HashSet<HgPathBuf>,
                 /// Implicitly required directories to go to items in either roots or dirs
                 pub parents: HashSet<HgPathBuf>,
             }
             /// Extract roots, dirs and parents from patterns.
             fn roots_dirs_and_parents(
                 ignore_patterns: &[IgnorePattern],
             ) -> PatternResult<RootsDirsAndParents> {
                 let (roots, dirs) = roots_and_dirs(ignore_patterns);
                 let mut parents = HashSet::new();
                 parents.extend(
                     DirsMultiset::from_manifest(&dirs)
                         .map_err(|e| match e {
                             DirstateMapError::InvalidPath(e) => e,
                             _ => unreachable!(),
                         })?
                         .iter()
-                        .map(|k| k.to_owned()),
+                        .map(ToOwned::to_owned),
                 );
                 parents.extend(
                     DirsMultiset::from_manifest(&roots)
                         .map_err(|e| match e {
                             DirstateMapError::InvalidPath(e) => e,
                             _ => unreachable!(),
                         })?
                         .iter()
-                        .map(|k| k.to_owned()),
+                        .map(ToOwned::to_owned),
                 );
                 Ok(RootsDirsAndParents {
                     roots: HashSet::from_iter(roots),
                     dirs: HashSet::from_iter(dirs),
                     parents,
                 })
             }
             /// Returns a function that checks whether a given file (in the general sense)
             /// should be matched.
             fn build_match<'a, 'b>(
                 ignore_patterns: &'a [IgnorePattern],
                 root_dir: impl AsRef<Path>,
             ) -> PatternResult<(
                 Vec<u8>,
                 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
                 Vec<PatternFileWarning>,
             )> {
                 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
                 // For debugging and printing
                 let mut patterns = vec![];
                 let mut all_warnings = vec![];
                 let (subincludes, ignore_patterns) =
                     filter_subincludes(ignore_patterns, root_dir)?;
                 if !subincludes.is_empty() {
                     // Build prefix-based matcher functions for subincludes
                     let mut submatchers = FastHashMap::default();
                     let mut prefixes = vec![];
                     for SubInclude { prefix, root, path } in subincludes.into_iter() {
                         let (match_fn, warnings) =
                             get_ignore_function(vec![path.to_path_buf()], root)?;
                         all_warnings.extend(warnings);
                         prefixes.push(prefix.to_owned());
                         submatchers.insert(prefix.to_owned(), match_fn);
                     }
                     let match_subinclude = move |filename: &HgPath| {
                         for prefix in prefixes.iter() {
                             if let Some(rel) = filename.relative_to(prefix) {
-                                if (submatchers.get(prefix).unwrap())(rel) {
+                                if (submatchers[prefix])(rel) {
                                     return true;
                                 }
                             }
                         }
                         false
                     };
                     match_funcs.push(Box::new(match_subinclude));
                 }
                 if !ignore_patterns.is_empty() {
                     // Either do dumb matching if all patterns are rootfiles, or match
                     // with a regex.
                     if ignore_patterns
                         .iter()
                         .all(|k| k.syntax == PatternSyntax::RootFiles)
                     {
                         let dirs: HashSet<_> = ignore_patterns
                             .iter()
                             .map(|k| k.pattern.to_owned())
                             .collect();
                         let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
                         let match_func = move |path: &HgPath| -> bool {
                             let path = path.as_bytes();
                             let i = path.iter().rfind(|a| **a == b'/');
                             let dir = if let Some(i) = i {
                                 &path[..*i as usize]
                             } else {
                                 b"."
                             };
                             dirs.contains(dir.deref())
                         };
                         match_funcs.push(Box::new(match_func));
                         patterns.extend(b"rootfilesin: ");
                         dirs_vec.sort();
                         patterns.extend(dirs_vec.escaped_bytes());
                     } else {
                         let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
                         patterns = new_re;
                         match_funcs.push(match_func)
                     }
                 }
                 Ok(if match_funcs.len() == 1 {
                     (patterns, match_funcs.remove(0), all_warnings)
                 } else {
                     (
                         patterns,
                         Box::new(move |f: &HgPath| -> bool {
                             match_funcs.iter().any(|match_func| match_func(f))
                         }),
                         all_warnings,
                     )
                 })
             }
             /// Parses all "ignore" files with their recursive includes and returns a
             /// function that checks whether a given file (in the general sense) should be
             /// ignored.
             pub fn get_ignore_function<'a>(
                 all_pattern_files: Vec<PathBuf>,
                 root_dir: impl AsRef<Path>,
             ) -> PatternResult<(
                 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
                 Vec<PatternFileWarning>,
             )> {
                 let mut all_patterns = vec![];
                 let mut all_warnings = vec![];
                 for pattern_file in all_pattern_files.into_iter() {
                     let (patterns, warnings) =
                         get_patterns_from_file(pattern_file, &root_dir)?;
                     all_patterns.extend(patterns.to_owned());
                     all_warnings.extend(warnings);
                 }
                 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
                 all_warnings.extend(warnings);
                 Ok((
                     Box::new(move |path: &HgPath| matcher.matches(path)),
                     all_warnings,
                 ))
             }
             impl<'a> IncludeMatcher<'a> {
                 pub fn new(
                     ignore_patterns: Vec<IgnorePattern>,
                     root_dir: impl AsRef<Path>,
                 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
                     let (patterns, match_fn, warnings) =
                         build_match(&ignore_patterns, root_dir)?;
                     let RootsDirsAndParents {
                         roots,
                         dirs,
                         parents,
                     } = roots_dirs_and_parents(&ignore_patterns)?;
                     let prefix = ignore_patterns.iter().any(|k| match k.syntax {
                         PatternSyntax::Path | PatternSyntax::RelPath => true,
                         _ => false,
                     });
                     Ok((
                         Self {
                             patterns,
                             match_fn,
                             prefix,
                             roots,
                             dirs,
                             parents,
                         },
                         warnings,
                     ))
                 }
                 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
                     // TODO cache
                     let thing = self
                         .dirs
                         .iter()
                         .chain(self.roots.iter())
                         .chain(self.parents.iter());
                     DirsChildrenMultiset::new(thing, Some(&self.parents))
                 }
             }
             impl<'a> Display for IncludeMatcher<'a> {
                 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
                     // XXX What about exact matches?
                     // I'm not sure it's worth it to clone the HashSet and keep it
                     // around just in case someone wants to display the matcher, plus
                     // it's going to be unreadable after a few entries, but we need to
                     // inform in this display that exact matches are being used and are
                     // (on purpose) missing from the `includes`.
                     write!(
                         f,
                         "IncludeMatcher(includes='{}')",
                         String::from_utf8_lossy(&self.patterns.escaped_bytes())
                     )
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use pretty_assertions::assert_eq;
                 use std::path::Path;
                 #[test]
                 fn test_roots_and_dirs() {
                     let pats = vec![
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                     ];
                     let (roots, dirs) = roots_and_dirs(&pats);
                     assert_eq!(
                         roots,
                         vec!(
                             HgPathBuf::from_bytes(b"g/h"),
                             HgPathBuf::from_bytes(b"g/h"),
                             HgPathBuf::new()
                         ),
                     );
                     assert_eq!(dirs, vec!());
                 }
                 #[test]
                 fn test_roots_dirs_and_parents() {
                     let pats = vec![
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                         IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                     ];
                     let mut roots = HashSet::new();
                     roots.insert(HgPathBuf::from_bytes(b"g/h"));
                     roots.insert(HgPathBuf::new());
                     let dirs = HashSet::new();
                     let mut parents = HashSet::new();
                     parents.insert(HgPathBuf::new());
                     parents.insert(HgPathBuf::from_bytes(b"g"));
                     assert_eq!(
                         roots_dirs_and_parents(&pats).unwrap(),
                         RootsDirsAndParents {
                             roots,
                             dirs,
                             parents
                         }
                     );
                 }
                 #[test]
                 fn test_filematcher_visit_children_set() {
                     // Visitchildrenset
                     let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
                     let matcher = FileMatcher::new(&files).unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"foo.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                 }
                 #[test]
                 fn test_filematcher_visit_children_set_files_and_dirs() {
                     let files = vec![
                         HgPath::new(b"rootfile.txt"),
                         HgPath::new(b"a/file1.txt"),
                         HgPath::new(b"a/b/file2.txt"),
                         // No file in a/b/c
                         HgPath::new(b"a/b/c/d/file4.txt"),
                     ];
                     let matcher = FileMatcher::new(&files).unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"a"));
                     set.insert(HgPath::new(b"rootfile.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"b"));
                     set.insert(HgPath::new(b"file1.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"c"));
                     set.insert(HgPath::new(b"file2.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"d"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b/c")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"file4.txt"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                 }
                 #[test]
                 fn test_includematcher() {
                     // VisitchildrensetPrefix
                     let (matcher, _) = IncludeMatcher::new(
                         vec![IgnorePattern::new(
                             PatternSyntax::RelPath,
                             b"dir/subdir",
                             Path::new(""),
                         )],
                         "",
                     )
                     .unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::Recursive
                     );
                     // OPT: This should probably be 'all' if its parent is?
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // VisitchildrensetRootfilesin
                     let (matcher, _) = IncludeMatcher::new(
                         vec![IgnorePattern::new(
                             PatternSyntax::RootFiles,
                             b"dir/subdir",
                             Path::new(""),
                         )],
                         "",
                     )
                     .unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"subdir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     // VisitchildrensetGlob
                     let (matcher, _) = IncludeMatcher::new(
                         vec![IgnorePattern::new(
                             PatternSyntax::Glob,
                             b"dir/z*",
                             Path::new(""),
                         )],
                         "",
                     )
                     .unwrap();
                     let mut set = HashSet::new();
                     set.insert(HgPath::new(b"dir"));
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"")),
                         VisitChildrenSet::Set(set)
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"folder")),
                         VisitChildrenSet::Empty
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir")),
                         VisitChildrenSet::This
                     );
                     // OPT: these should probably be set().
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                         VisitChildrenSet::This
                     );
                     assert_eq!(
                         matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                         VisitChildrenSet::This
                     );
                 }
             }

rust/hg-core/src/revlog.rs

0 +5 0

             // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Mercurial concepts for handling revision history
             pub mod node;
             pub mod nodemap;
             pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
             /// Mercurial revision numbers
             ///
             /// As noted in revlog.c, revision numbers are actually encoded in
             /// 4 bytes, and are liberally converted to ints, whence the i32
             pub type Revision = i32;
             /// Marker expressing the absence of a parent
             ///
             /// Independently of the actual representation, `NULL_REVISION` is guaranteed
             /// to be smaller than all existing revisions.
             pub const NULL_REVISION: Revision = -1;
             /// Same as `mercurial.node.wdirrev`
             ///
             /// This is also equal to `i32::max_value()`, but it's better to spell
             /// it out explicitely, same as in `mercurial.node`
+            #[allow(clippy::unreadable_literal)]
             pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
             /// The simplest expression of what we need of Mercurial DAGs.
             pub trait Graph {
                 /// Return the two parents of the given `Revision`.
                 ///
                 /// Each of the parents can be independently `NULL_REVISION`
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
             }
             #[derive(Clone, Debug, PartialEq)]
             pub enum GraphError {
                 ParentOutOfRange(Revision),
                 WorkingDirectoryUnsupported,
             }
             /// The Mercurial Revlog Index
             ///
             /// This is currently limited to the minimal interface that is needed for
             /// the [`nodemap`](nodemap/index.html) module
             pub trait RevlogIndex {
                 /// Total number of Revisions referenced in this index
                 fn len(&self) -> usize;
+                fn is_empty(&self) -> bool {
+                    self.len() == 0
+                }
                 /// Return a reference to the Node or `None` if rev is out of bounds
                 ///
                 /// `NULL_REVISION` is not considered to be out of bounds.
                 fn node(&self, rev: Revision) -> Option<&Node>;
             }

rust/hg-core/src/revlog/node.rs

0 +10 -6

             // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Definitions and utilities for Revision nodes
             //!
             //! In Mercurial code base, it is customary to call "a node" the binary SHA
             //! of a revision.
             use hex::{self, FromHex, FromHexError};
             /// The length in bytes of a `Node`
             ///
             /// This constant is meant to ease refactors of this module, and
             /// are private so that calling code does not expect all nodes have
             /// the same size, should we support several formats concurrently in
             /// the future.
             const NODE_BYTES_LENGTH: usize = 20;
             /// The length in bytes of a `Node`
             ///
             /// see also `NODES_BYTES_LENGTH` about it being private.
             const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
             /// Private alias for readability and to ease future change
             type NodeData = [u8; NODE_BYTES_LENGTH];
             /// Binary revision SHA
             ///
             /// ## Future changes of hash size
             ///
             /// To accomodate future changes of hash size, Rust callers
             /// should use the conversion methods at the boundaries (FFI, actual
             /// computation of hashes and I/O) only, and only if required.
             ///
             /// All other callers outside of unit tests should just handle `Node` values
             /// and never make any assumption on the actual length, using [`nybbles_len`]
             /// if they need a loop boundary.
             ///
             /// All methods that create a `Node` either take a type that enforces
             /// the size or fail immediately at runtime with [`ExactLengthRequired`].
             ///
             /// [`nybbles_len`]: #method.nybbles_len
             /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
             #[derive(Clone, Debug, PartialEq)]
             #[repr(transparent)]
             pub struct Node {
                 data: NodeData,
             }
             /// The node value for NULL_REVISION
             pub const NULL_NODE: Node = Node {
                 data: [0; NODE_BYTES_LENGTH],
             };
             impl From<NodeData> for Node {
                 fn from(data: NodeData) -> Node {
                     Node { data }
                 }
             }
             #[derive(Debug, PartialEq)]
             pub enum NodeError {
                 ExactLengthRequired(usize, String),
                 PrefixTooLong(String),
                 HexError(FromHexError, String),
             }
             /// Low level utility function, also for prefixes
             fn get_nybble(s: &[u8], i: usize) -> u8 {
                 if i % 2 == 0 {
                     s[i / 2] >> 4
                 } else {
                     s[i / 2] & 0x0f
                 }
             }
             impl Node {
                 /// Retrieve the `i`th half-byte of the binary data.
                 ///
                 /// This is also the `i`th hexadecimal digit in numeric form,
                 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                 pub fn get_nybble(&self, i: usize) -> u8 {
                     get_nybble(&self.data, i)
                 }
                 /// Length of the data, in nybbles
                 pub fn nybbles_len(&self) -> usize {
                     // public exposure as an instance method only, so that we can
                     // easily support several sizes of hashes if needed in the future.
                     NODE_NYBBLES_LENGTH
                 }
                 /// Convert from hexadecimal string representation
                 ///
                 /// Exact length is required.
                 ///
                 /// To be used in FFI and I/O only, in order to facilitate future
                 /// changes of hash format.
                 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
                     Ok(NodeData::from_hex(hex)
                         .map_err(|e| NodeError::from((e, hex)))?
                         .into())
                 }
                 /// Convert to hexadecimal string representation
                 ///
                 /// To be used in FFI and I/O only, in order to facilitate future
                 /// changes of hash format.
                 pub fn encode_hex(&self) -> String {
                     hex::encode(self.data)
                 }
                 /// Provide access to binary data
                 ///
                 /// This is needed by FFI layers, for instance to return expected
                 /// binary values to Python.
                 pub fn as_bytes(&self) -> &[u8] {
                     &self.data
                 }
             }
             impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
                 fn from(err_offender: (FromHexError, T)) -> Self {
                     let (err, offender) = err_offender;
                     match err {
                         FromHexError::InvalidStringLength => {
                             NodeError::ExactLengthRequired(
                                 NODE_NYBBLES_LENGTH,
                                 offender.as_ref().to_owned(),
                             )
                         }
                         _ => NodeError::HexError(err, offender.as_ref().to_owned()),
                     }
                 }
             }
             /// The beginning of a binary revision SHA.
             ///
             /// Since it can potentially come from an hexadecimal representation with
             /// odd length, it needs to carry around whether the last 4 bits are relevant
             /// or not.
             #[derive(Debug, PartialEq)]
             pub struct NodePrefix {
                 buf: Vec<u8>,
                 is_odd: bool,
             }
             impl NodePrefix {
                 /// Convert from hexadecimal string representation
                 ///
                 /// Similarly to `hex::decode`, can be used with Unicode string types
                 /// (`String`, `&str`) as well as bytes.
                 ///
                 /// To be used in FFI and I/O only, in order to facilitate future
                 /// changes of hash format.
                 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
                     let hex = hex.as_ref();
                     let len = hex.len();
                     if len > NODE_NYBBLES_LENGTH {
                         return Err(NodeError::PrefixTooLong(
                             String::from_utf8_lossy(hex).to_owned().to_string(),
                         ));
                     }
                     let is_odd = len % 2 == 1;
                     let even_part = if is_odd { &hex[..len - 1] } else { hex };
                     let mut buf: Vec<u8> = Vec::from_hex(&even_part)
                         .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
                     if is_odd {
                         let latest_char = char::from(hex[len - 1]);
                         let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
                             (
                                 FromHexError::InvalidHexCharacter {
                                     c: latest_char,
                                     index: len - 1,
                                 },
                                 String::from_utf8_lossy(hex),
                             )
                         })? as u8;
                         buf.push(latest_nybble << 4);
                     }
                     Ok(NodePrefix { buf, is_odd })
                 }
                 pub fn borrow(&self) -> NodePrefixRef {
                     NodePrefixRef {
                         buf: &self.buf,
                         is_odd: self.is_odd,
                     }
                 }
             }
             #[derive(Clone, Debug, PartialEq)]
             pub struct NodePrefixRef<'a> {
                 buf: &'a [u8],
                 is_odd: bool,
             }
             impl<'a> NodePrefixRef<'a> {
                 pub fn len(&self) -> usize {
                     if self.is_odd {
                         self.buf.len() * 2 - 1
                     } else {
                         self.buf.len() * 2
                     }
                 }
+                pub fn is_empty(&self) -> bool {
+                    self.len() == 0
+                }
                 pub fn is_prefix_of(&self, node: &Node) -> bool {
                     if self.is_odd {
                         let buf = self.buf;
                         let last_pos = buf.len() - 1;
                         node.data.starts_with(buf.split_at(last_pos).0)
                             && node.data[last_pos] >> 4 == buf[last_pos] >> 4
                     } else {
                         node.data.starts_with(self.buf)
                     }
                 }
                 /// Retrieve the `i`th half-byte from the prefix.
                 ///
                 /// This is also the `i`th hexadecimal digit in numeric form,
                 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                 pub fn get_nybble(&self, i: usize) -> u8 {
                     assert!(i < self.len());
                     get_nybble(self.buf, i)
                 }
                 /// Return the index first nybble that's different from `node`
                 ///
                 /// If the return value is `None` that means that `self` is
                 /// a prefix of `node`, but the current method is a bit slower
                 /// than `is_prefix_of`.
                 ///
                 /// Returned index is as in `get_nybble`, i.e., starting at 0.
                 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
                     let buf = self.buf;
                     let until = if self.is_odd {
                         buf.len() - 1
                     } else {
                         buf.len()
                     };
-                    for i in 0..until {
+                    for (i, item) in buf.iter().enumerate().take(until) {
-                        if buf[i] != node.data[i] {
+                        if *item != node.data[i] {
-                            if buf[i] & 0xf0 == node.data[i] & 0xf0 {
+                            return if *item & 0xf0 == node.data[i] & 0xf0 {
-                                return Some(2 * i + 1);
+                                Some(2 * i + 1)
                             } else {
-                                return Some(2 * i);
+                                Some(2 * i)
+                            };
                         }
                     }
                     if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
                         Some(until * 2)
                     } else {
                         None
                     }
                 }
             }
             /// A shortcut for full `Node` references
             impl<'a> From<&'a Node> for NodePrefixRef<'a> {
                 fn from(node: &'a Node) -> Self {
                     NodePrefixRef {
                         buf: &node.data,
                         is_odd: false,
                     }
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 fn sample_node() -> Node {
                     let mut data = [0; NODE_BYTES_LENGTH];
                     data.copy_from_slice(&[
 x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
 x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
                     ]);
                     data.into()
                 }
                 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
                 ///
                 /// The padding is made with zeros
                 pub fn hex_pad_right(hex: &str) -> String {
                     let mut res = hex.to_string();
                     while res.len() < NODE_NYBBLES_LENGTH {
                         res.push('0');
                     }
                     res
                 }
                 fn sample_node_hex() -> String {
                     hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
                 }
                 #[test]
                 fn test_node_from_hex() {
                     assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
                     let mut short = hex_pad_right("0123");
                     short.pop();
                     short.pop();
                     assert_eq!(
                         Node::from_hex(&short),
                         Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
                     );
                     let not_hex = hex_pad_right("012... oops");
                     assert_eq!(
                         Node::from_hex(&not_hex),
                         Err(NodeError::HexError(
                             FromHexError::InvalidHexCharacter { c: '.', index: 3 },
                             not_hex,
                         )),
                     );
                 }
                 #[test]
                 fn test_node_encode_hex() {
                     assert_eq!(sample_node().encode_hex(), sample_node_hex());
                 }
                 #[test]
                 fn test_prefix_from_hex() -> Result<(), NodeError> {
                     assert_eq!(
                         NodePrefix::from_hex("0e1")?,
                         NodePrefix {
                             buf: vec![14, 16],
                             is_odd: true
                         }
                     );
                     assert_eq!(
                         NodePrefix::from_hex("0e1a")?,
                         NodePrefix {
                             buf: vec![14, 26],
                             is_odd: false
                         }
                     );
                     // checking limit case
                     let node_as_vec = sample_node().data.iter().cloned().collect();
                     assert_eq!(
                         NodePrefix::from_hex(sample_node_hex())?,
                         NodePrefix {
                             buf: node_as_vec,
                             is_odd: false
                         }
                     );
                     Ok(())
                 }
                 #[test]
                 fn test_prefix_from_hex_errors() {
                     assert_eq!(
                         NodePrefix::from_hex("testgr"),
                         Err(NodeError::HexError(
                             FromHexError::InvalidHexCharacter { c: 't', index: 0 },
                             "testgr".to_string()
                         ))
                     );
                     let mut long = NULL_NODE.encode_hex();
                     long.push('c');
                     match NodePrefix::from_hex(&long)
                         .expect_err("should be refused as too long")
                     {
                         NodeError::PrefixTooLong(s) => assert_eq!(s, long),
                         err => panic!(format!("Should have been TooLong, got {:?}", err)),
                     }
                 }
                 #[test]
                 fn test_is_prefix_of() -> Result<(), NodeError> {
                     let mut node_data = [0; NODE_BYTES_LENGTH];
                     node_data[0] = 0x12;
                     node_data[1] = 0xca;
                     let node = Node::from(node_data);
                     assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
                     assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
                     assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
                     assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
                     Ok(())
                 }
                 #[test]
                 fn test_get_nybble() -> Result<(), NodeError> {
                     let prefix = NodePrefix::from_hex("dead6789cafe")?;
                     assert_eq!(prefix.borrow().get_nybble(0), 13);
                     assert_eq!(prefix.borrow().get_nybble(7), 9);
                     Ok(())
                 }
                 #[test]
                 fn test_first_different_nybble_even_prefix() {
                     let prefix = NodePrefix::from_hex("12ca").unwrap();
                     let prefref = prefix.borrow();
                     let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                     assert_eq!(prefref.first_different_nybble(&node), Some(0));
                     node.data[0] = 0x13;
                     assert_eq!(prefref.first_different_nybble(&node), Some(1));
                     node.data[0] = 0x12;
                     assert_eq!(prefref.first_different_nybble(&node), Some(2));
                     node.data[1] = 0xca;
                     // now it is a prefix
                     assert_eq!(prefref.first_different_nybble(&node), None);
                 }
                 #[test]
                 fn test_first_different_nybble_odd_prefix() {
                     let prefix = NodePrefix::from_hex("12c").unwrap();
                     let prefref = prefix.borrow();
                     let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                     assert_eq!(prefref.first_different_nybble(&node), Some(0));
                     node.data[0] = 0x13;
                     assert_eq!(prefref.first_different_nybble(&node), Some(1));
                     node.data[0] = 0x12;
                     assert_eq!(prefref.first_different_nybble(&node), Some(2));
                     node.data[1] = 0xca;
                     // now it is a prefix
                     assert_eq!(prefref.first_different_nybble(&node), None);
                 }
             }
             #[cfg(test)]
             pub use tests::hex_pad_right;

rust/hg-core/src/revlog/nodemap.rs

0 +8 -12

             // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Indexing facilities for fast retrieval of `Revision` from `Node`
             //!
             //! This provides a variation on the 16-ary radix tree that is
             //! provided as "nodetree" in revlog.c, ready for append-only persistence
             //! on disk.
             //!
             //! Following existing implicit conventions, the "nodemap" terminology
             //! is used in a more abstract context.
             use super::{
                 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
                 RevlogIndex, NULL_REVISION,
             };
             use std::cmp::max;
             use std::fmt;
             use std::mem;
             use std::ops::Deref;
             use std::ops::Index;
             use std::slice;
             #[derive(Debug, PartialEq)]
             pub enum NodeMapError {
                 MultipleResults,
                 InvalidNodePrefix(NodeError),
                 /// A `Revision` stored in the nodemap could not be found in the index
                 RevisionNotInIndex(Revision),
             }
             impl From<NodeError> for NodeMapError {
                 fn from(err: NodeError) -> Self {
                     NodeMapError::InvalidNodePrefix(err)
                 }
             }
             /// Mapping system from Mercurial nodes to revision numbers.
             ///
             /// ## `RevlogIndex` and `NodeMap`
             ///
             /// One way to think about their relationship is that
             /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
             /// carried by a [`RevlogIndex`].
             ///
             /// Many of the methods in this trait take a `RevlogIndex` argument
             /// which is used for validation of their results. This index must naturally
             /// be the one the `NodeMap` is about, and it must be consistent.
             ///
             /// Notably, the `NodeMap` must not store
             /// information about more `Revision` values than there are in the index.
             /// In these methods, an encountered `Revision` is not in the index, a
             /// [`RevisionNotInIndex`] error is returned.
             ///
             /// In insert operations, the rule is thus that the `NodeMap` must always
             /// be updated after the `RevlogIndex`
             /// be updated first, and the `NodeMap` second.
             ///
             /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
             /// [`RevlogIndex`]: ../trait.RevlogIndex.html
             pub trait NodeMap {
                 /// Find the unique `Revision` having the given `Node`
                 ///
                 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
                 fn find_node(
                     &self,
                     index: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     self.find_bin(index, node.into())
                 }
                 /// Find the unique Revision whose `Node` starts with a given binary prefix
                 ///
                 /// If no Revision matches the given prefix, `Ok(None)` is returned.
                 ///
                 /// If several Revisions match the given prefix, a [`MultipleResults`]
                 /// error is returned.
                 fn find_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefixRef<'a>,
                 ) -> Result<Option<Revision>, NodeMapError>;
                 /// Find the unique Revision whose `Node` hexadecimal string representation
                 /// starts with a given prefix
                 ///
                 /// If no Revision matches the given prefix, `Ok(None)` is returned.
                 ///
                 /// If several Revisions match the given prefix, a [`MultipleResults`]
                 /// error is returned.
                 fn find_hex(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: &str,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
                 }
                 /// Give the size of the shortest node prefix that determines
                 /// the revision uniquely.
                 ///
                 /// From a binary node prefix, if it is matched in the node map, this
                 /// returns the number of hexadecimal digits that would had sufficed
                 /// to find the revision uniquely.
                 ///
                 /// Returns `None` if no `Revision` could be found for the prefix.
                 ///
                 /// If several Revisions match the given prefix, a [`MultipleResults`]
                 /// error is returned.
                 fn unique_prefix_len_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     node_prefix: NodePrefixRef<'a>,
                 ) -> Result<Option<usize>, NodeMapError>;
                 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
                 /// of the prefix as input.
                 fn unique_prefix_len_hex(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: &str,
                 ) -> Result<Option<usize>, NodeMapError> {
                     self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
                 }
                 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
                 fn unique_prefix_len_node(
                     &self,
                     idx: &impl RevlogIndex,
                     node: &Node,
                 ) -> Result<Option<usize>, NodeMapError> {
                     self.unique_prefix_len_bin(idx, node.into())
                 }
             }
             pub trait MutableNodeMap: NodeMap {
                 fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError>;
             }
             /// Low level NodeTree [`Blocks`] elements
             ///
             /// These are exactly as for instance on persistent storage.
             type RawElement = i32;
             /// High level representation of values in NodeTree
             /// [`Blocks`](struct.Block.html)
             ///
             /// This is the high level representation that most algorithms should
             /// use.
             #[derive(Clone, Debug, Eq, PartialEq)]
             enum Element {
                 Rev(Revision),
                 Block(usize),
                 None,
             }
             impl From<RawElement> for Element {
                 /// Conversion from low level representation, after endianness conversion.
                 ///
                 /// See [`Block`](struct.Block.html) for explanation about the encoding.
                 fn from(raw: RawElement) -> Element {
                     if raw >= 0 {
                         Element::Block(raw as usize)
                     } else if raw == -1 {
                         Element::None
                     } else {
                         Element::Rev(-raw - 2)
                     }
                 }
             }
             impl From<Element> for RawElement {
                 fn from(element: Element) -> RawElement {
                     match element {
                         Element::None => 0,
                         Element::Block(i) => i as RawElement,
                         Element::Rev(rev) => -rev - 2,
                     }
                 }
             }
             /// A logical block of the `NodeTree`, packed with a fixed size.
             ///
             /// These are always used in container types implementing `Index<Block>`,
             /// such as `&Block`
             ///
             /// As an array of integers, its ith element encodes that the
             /// ith potential edge from the block, representing the ith hexadecimal digit
             /// (nybble) `i` is either:
             ///
             /// - absent (value -1)
             /// - another `Block` in the same indexable container (value ≥ 0)
             ///  - a `Revision` leaf (value ≤ -2)
             ///
             /// Endianness has to be fixed for consistency on shared storage across
             /// different architectures.
             ///
             /// A key difference with the C `nodetree` is that we need to be
             /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
             /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
             ///
             /// Another related difference is that `NULL_REVISION` (-1) is not
             /// represented at all, because we want an immutable empty nodetree
             /// to be valid.
             #[derive(Copy, Clone)]
             pub struct Block([u8; BLOCK_SIZE]);
             /// Not derivable for arrays of length >32 until const generics are stable
             impl PartialEq for Block {
                 fn eq(&self, other: &Self) -> bool {
-                    &self.0[..] == &other.0[..]
+                    self.0[..] == other.0[..]
                 }
             }
             pub const BLOCK_SIZE: usize = 64;
             impl Block {
                 fn new() -> Self {
                     // -1 in 2's complement to create an absent node
                     let byte: u8 = 255;
                     Block([byte; BLOCK_SIZE])
                 }
                 fn get(&self, nybble: u8) -> Element {
                     let index = nybble as usize * mem::size_of::<RawElement>();
                     Element::from(RawElement::from_be_bytes([
                         self.0[index],
                         self.0[index + 1],
                         self.0[index + 2],
                         self.0[index + 3],
                     ]))
                 }
                 fn set(&mut self, nybble: u8, element: Element) {
                     let values = RawElement::to_be_bytes(element.into());
                     let index = nybble as usize * mem::size_of::<RawElement>();
                     self.0[index] = values[0];
                     self.0[index + 1] = values[1];
                     self.0[index + 2] = values[2];
                     self.0[index + 3] = values[3];
                 }
             }
             impl fmt::Debug for Block {
                 /// sparse representation for testing and debugging purposes
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     f.debug_map()
                         .entries((0..16).filter_map(|i| match self.get(i) {
                             Element::None => None,
                             element => Some((i, element)),
                         }))
                         .finish()
                 }
             }
             /// A mutable 16-radix tree with the root block logically at the end
             ///
             /// Because of the append only nature of our node trees, we need to
             /// keep the original untouched and store new blocks separately.
             ///
             /// The mutable root `Block` is kept apart so that we don't have to rebump
             /// it on each insertion.
             pub struct NodeTree {
                 readonly: Box<dyn Deref<Target = [Block]> + Send>,
                 growable: Vec<Block>,
                 root: Block,
                 masked_inner_blocks: usize,
             }
             impl Index<usize> for NodeTree {
                 type Output = Block;
                 fn index(&self, i: usize) -> &Block {
                     let ro_len = self.readonly.len();
                     if i < ro_len {
                         &self.readonly[i]
                     } else if i == ro_len + self.growable.len() {
                         &self.root
                     } else {
                         &self.growable[i - ro_len]
                     }
                 }
             }
             /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
             fn has_prefix_or_none(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefixRef,
                 rev: Revision,
             ) -> Result<Option<Revision>, NodeMapError> {
                 idx.node(rev)
                     .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
                     .map(|node| {
                         if prefix.is_prefix_of(node) {
                             Some(rev)
                         } else {
                             None
                         }
                     })
             }
             /// validate that the candidate's node starts indeed with given prefix,
             /// and treat ambiguities related to `NULL_REVISION`.
             ///
             /// From the data in the NodeTree, one can only conclude that some
             /// revision is the only one for a *subprefix* of the one being looked up.
             fn validate_candidate(
                 idx: &impl RevlogIndex,
                 prefix: NodePrefixRef,
                 candidate: (Option<Revision>, usize),
             ) -> Result<(Option<Revision>, usize), NodeMapError> {
                 let (rev, steps) = candidate;
                 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
                     rev.map_or(Ok((None, steps)), |r| {
                         has_prefix_or_none(idx, prefix, r)
                             .map(|opt| (opt, max(steps, nz_nybble + 1)))
                     })
                 } else {
                     // the prefix is only made of zeros; NULL_REVISION always matches it
                     // and any other *valid* result is an ambiguity
                     match rev {
                         None => Ok((Some(NULL_REVISION), steps + 1)),
                         Some(r) => match has_prefix_or_none(idx, prefix, r)? {
                             None => Ok((Some(NULL_REVISION), steps + 1)),
                             _ => Err(NodeMapError::MultipleResults),
                         },
                     }
                 }
             }
             impl NodeTree {
                 /// Initiate a NodeTree from an immutable slice-like of `Block`
                 ///
                 /// We keep `readonly` and clone its root block if it isn't empty.
                 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
-                    let root = readonly
+                    let root = readonly.last().cloned().unwrap_or_else(Block::new);
-                        .last()
-                        .map(|b| b.clone())
-                        .unwrap_or_else(|| Block::new());
                     NodeTree {
-                        readonly: readonly,
+                        readonly,
                         growable: Vec::new(),
-                        root: root,
+                        root,
                         masked_inner_blocks: 0,
                     }
                 }
                 /// Create from an opaque bunch of bytes
                 ///
                 /// The created `NodeTreeBytes` from `buffer`,
                 /// of which exactly `amount` bytes are used.
                 ///
                 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
                 /// - `offset` allows for the final file format to include fixed data
                 ///   (generation number, behavioural flags)
                 /// - `amount` is expressed in bytes, and is not automatically derived from
                 ///   `bytes`, so that a caller that manages them atomically can perform
                 ///   temporary disk serializations and still rollback easily if needed.
                 ///   First use-case for this would be to support Mercurial shell hooks.
                 ///
                 /// panics if `buffer` is smaller than `amount`
                 pub fn load_bytes(
                     bytes: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
                 }
                 /// Retrieve added `Block` and the original immutable data
                 pub fn into_readonly_and_added(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
                     let mut vec = self.growable;
                     let readonly = self.readonly;
                     if readonly.last() != Some(&self.root) {
                         vec.push(self.root);
                     }
                     (readonly, vec)
                 }
                 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
                 /// storage
                 pub fn into_readonly_and_added_bytes(
                     self,
                 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
                     let (readonly, vec) = self.into_readonly_and_added();
                     // Prevent running `v`'s destructor so we are in complete control
                     // of the allocation.
                     let vec = mem::ManuallyDrop::new(vec);
                     // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
                     // bytes, so this is perfectly safe.
                     let bytes = unsafe {
                         // Assert that `Block` hasn't been changed and has no padding
                         let _: [u8; 4 * BLOCK_SIZE] =
                             std::mem::transmute([Block::new(); 4]);
                         // /!\ Any use of `vec` after this is use-after-free.
                         // TODO: use `into_raw_parts` once stabilized
                         Vec::from_raw_parts(
                             vec.as_ptr() as *mut u8,
                             vec.len() * BLOCK_SIZE,
                             vec.capacity() * BLOCK_SIZE,
                         )
                     };
                     (readonly, bytes)
                 }
                 /// Total number of blocks
                 fn len(&self) -> usize {
                     self.readonly.len() + self.growable.len() + 1
                 }
                 /// Implemented for completeness
                 ///
                 /// A `NodeTree` always has at least the mutable root block.
                 #[allow(dead_code)]
                 fn is_empty(&self) -> bool {
                     false
                 }
                 /// Main working method for `NodeTree` searches
                 ///
                 /// The first returned value is the result of analysing `NodeTree` data
                 /// *alone*: whereas `None` guarantees that the given prefix is absent
                 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
                 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
                 /// that could match the prefix. Actually, all that can be inferred from
                 /// the `NodeTree` data is that `rev` is the revision with the longest
                 /// common node prefix with the given prefix.
                 ///
                 /// The second returned value is the size of the smallest subprefix
                 /// of `prefix` that would give the same result, i.e. not the
                 /// `MultipleResults` error variant (again, using only the data of the
                 /// `NodeTree`).
                 fn lookup(
                     &self,
                     prefix: NodePrefixRef,
                 ) -> Result<(Option<Revision>, usize), NodeMapError> {
                     for (i, visit_item) in self.visit(prefix).enumerate() {
                         if let Some(opt) = visit_item.final_revision() {
                             return Ok((opt, i + 1));
                         }
                     }
                     Err(NodeMapError::MultipleResults)
                 }
                 fn visit<'n, 'p>(
                     &'n self,
                     prefix: NodePrefixRef<'p>,
                 ) -> NodeTreeVisitor<'n, 'p> {
                     NodeTreeVisitor {
                         nt: self,
-                        prefix: prefix,
+                        prefix,
                         visit: self.len() - 1,
                         nybble_idx: 0,
                         done: false,
                     }
                 }
                 /// Return a mutable reference for `Block` at index `idx`.
                 ///
                 /// If `idx` lies in the immutable area, then the reference is to
                 /// a newly appended copy.
                 ///
                 /// Returns (new_idx, glen, mut_ref) where
                 ///
                 /// - `new_idx` is the index of the mutable `Block`
                 /// - `mut_ref` is a mutable reference to the mutable Block.
                 /// - `glen` is the new length of `self.growable`
                 ///
                 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
                 /// itself because of the mutable borrow taken with the returned `Block`
                 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
                     let ro_blocks = &self.readonly;
                     let ro_len = ro_blocks.len();
                     let glen = self.growable.len();
                     if idx < ro_len {
                         self.masked_inner_blocks += 1;
-                        // TODO OPTIM I think this makes two copies
+                        self.growable.push(ro_blocks[idx]);
-                        self.growable.push(ro_blocks[idx].clone());
                         (glen + ro_len, &mut self.growable[glen], glen + 1)
                     } else if glen + ro_len == idx {
                         (idx, &mut self.root, glen)
                     } else {
                         (idx, &mut self.growable[idx - ro_len], glen)
                     }
                 }
                 /// Main insertion method
                 ///
                 /// This will dive in the node tree to find the deepest `Block` for
                 /// `node`, split it as much as needed and record `node` in there.
                 /// The method then backtracks, updating references in all the visited
                 /// blocks from the root.
                 ///
                 /// All the mutated `Block` are copied first to the growable part if
                 /// needed. That happens for those in the immutable part except the root.
                 pub fn insert<I: RevlogIndex>(
                     &mut self,
                     index: &I,
                     node: &Node,
                     rev: Revision,
                 ) -> Result<(), NodeMapError> {
                     let ro_len = &self.readonly.len();
                     let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
                     let read_nybbles = visit_steps.len();
                     // visit_steps cannot be empty, since we always visit the root block
                     let deepest = visit_steps.pop().unwrap();
                     let (mut block_idx, mut block, mut glen) =
                         self.mutable_block(deepest.block_idx);
                     if let Element::Rev(old_rev) = deepest.element {
                         let old_node = index
                             .node(old_rev)
                             .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
                         if old_node == node {
                             return Ok(()); // avoid creating lots of useless blocks
                         }
                         // Looping over the tail of nybbles in both nodes, creating
                         // new blocks until we find the difference
                         let mut new_block_idx = ro_len + glen;
                         let mut nybble = deepest.nybble;
                         for nybble_pos in read_nybbles..node.nybbles_len() {
                             block.set(nybble, Element::Block(new_block_idx));
                             let new_nybble = node.get_nybble(nybble_pos);
                             let old_nybble = old_node.get_nybble(nybble_pos);
                             if old_nybble == new_nybble {
                                 self.growable.push(Block::new());
                                 block = &mut self.growable[glen];
                                 glen += 1;
                                 new_block_idx += 1;
                                 nybble = new_nybble;
                             } else {
                                 let mut new_block = Block::new();
                                 new_block.set(old_nybble, Element::Rev(old_rev));
                                 new_block.set(new_nybble, Element::Rev(rev));
                                 self.growable.push(new_block);
                                 break;
                             }
                         }
                     } else {
                         // Free slot in the deepest block: no splitting has to be done
                         block.set(deepest.nybble, Element::Rev(rev));
                     }
                     // Backtrack over visit steps to update references
                     while let Some(visited) = visit_steps.pop() {
                         let to_write = Element::Block(block_idx);
                         if visit_steps.is_empty() {
                             self.root.set(visited.nybble, to_write);
                             break;
                         }
                         let (new_idx, block, _) = self.mutable_block(visited.block_idx);
                         if block.get(visited.nybble) == to_write {
                             break;
                         }
                         block.set(visited.nybble, to_write);
                         block_idx = new_idx;
                     }
                     Ok(())
                 }
                 /// Make the whole `NodeTree` logically empty, without touching the
                 /// immutable part.
                 pub fn invalidate_all(&mut self) {
                     self.root = Block::new();
                     self.growable = Vec::new();
                     self.masked_inner_blocks = self.readonly.len();
                 }
                 /// Return the number of blocks in the readonly part that are currently
                 /// masked in the mutable part.
                 ///
                 /// The `NodeTree` structure has no efficient way to know how many blocks
                 /// are already unreachable in the readonly part.
                 ///
                 /// After a call to `invalidate_all()`, the returned number can be actually
                 /// bigger than the whole readonly part, a conventional way to mean that
                 /// all the readonly blocks have been masked. This is what is really
                 /// useful to the caller and does not require to know how many were
                 /// actually unreachable to begin with.
                 pub fn masked_readonly_blocks(&self) -> usize {
                     if let Some(readonly_root) = self.readonly.last() {
                         if readonly_root == &self.root {
                             return 0;
                         }
                     } else {
                         return 0;
                     }
                     self.masked_inner_blocks + 1
                 }
             }
             pub struct NodeTreeBytes {
                 buffer: Box<dyn Deref<Target = [u8]> + Send>,
                 len_in_blocks: usize,
             }
             impl NodeTreeBytes {
                 fn new(
                     buffer: Box<dyn Deref<Target = [u8]> + Send>,
                     amount: usize,
                 ) -> Self {
                     assert!(buffer.len() >= amount);
                     let len_in_blocks = amount / BLOCK_SIZE;
                     NodeTreeBytes {
                         buffer,
                         len_in_blocks,
                     }
                 }
             }
             impl Deref for NodeTreeBytes {
                 type Target = [Block];
                 fn deref(&self) -> &[Block] {
                     unsafe {
                         slice::from_raw_parts(
                             (&self.buffer).as_ptr() as *const Block,
                             self.len_in_blocks,
                         )
                     }
                 }
             }
             struct NodeTreeVisitor<'n, 'p> {
                 nt: &'n NodeTree,
                 prefix: NodePrefixRef<'p>,
                 visit: usize,
                 nybble_idx: usize,
                 done: bool,
             }
             #[derive(Debug, PartialEq, Clone)]
             struct NodeTreeVisitItem {
                 block_idx: usize,
                 nybble: u8,
                 element: Element,
             }
             impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
                 type Item = NodeTreeVisitItem;
                 fn next(&mut self) -> Option<Self::Item> {
                     if self.done || self.nybble_idx >= self.prefix.len() {
                         return None;
                     }
                     let nybble = self.prefix.get_nybble(self.nybble_idx);
                     self.nybble_idx += 1;
                     let visit = self.visit;
                     let element = self.nt[visit].get(nybble);
                     if let Element::Block(idx) = element {
                         self.visit = idx;
                     } else {
                         self.done = true;
                     }
                     Some(NodeTreeVisitItem {
                         block_idx: visit,
-                        nybble: nybble,
+                        nybble,
-                        element: element,
+                        element,
                     })
                 }
             }
             impl NodeTreeVisitItem {
                 // Return `Some(opt)` if this item is final, with `opt` being the
                 // `Revision` that it may represent.
                 //
                 // If the item is not terminal, return `None`
                 fn final_revision(&self) -> Option<Option<Revision>> {
                     match self.element {
                         Element::Block(_) => None,
                         Element::Rev(r) => Some(Some(r)),
                         Element::None => Some(None),
                     }
                 }
             }
             impl From<Vec<Block>> for NodeTree {
                 fn from(vec: Vec<Block>) -> Self {
                     Self::new(Box::new(vec))
                 }
             }
             impl fmt::Debug for NodeTree {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     let readonly: &[Block] = &*self.readonly;
                     write!(
                         f,
                         "readonly: {:?}, growable: {:?}, root: {:?}",
                         readonly, self.growable, self.root
                     )
                 }
             }
             impl Default for NodeTree {
                 /// Create a fully mutable empty NodeTree
                 fn default() -> Self {
                     NodeTree::new(Box::new(Vec::new()))
                 }
             }
             impl NodeMap for NodeTree {
                 fn find_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefixRef<'a>,
                 ) -> Result<Option<Revision>, NodeMapError> {
                     validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
                         .map(|(opt, _shortest)| opt)
                 }
                 fn unique_prefix_len_bin<'a>(
                     &self,
                     idx: &impl RevlogIndex,
                     prefix: NodePrefixRef<'a>,
                 ) -> Result<Option<usize>, NodeMapError> {
                     validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
                         .map(|(opt, shortest)| opt.map(|_rev| shortest))
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::NodeMapError::*;
                 use super::*;
                 use crate::revlog::node::{hex_pad_right, Node};
                 use std::collections::HashMap;
                 /// Creates a `Block` using a syntax close to the `Debug` output
                 macro_rules! block {
                     {$($nybble:tt : $variant:ident($val:tt)),*} => (
                         {
                             let mut block = Block::new();
                             $(block.set($nybble, Element::$variant($val)));*;
                             block
                         }
                     )
                 }
                 #[test]
                 fn test_block_debug() {
                     let mut block = Block::new();
                     block.set(1, Element::Rev(3));
                     block.set(10, Element::Block(0));
                     assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
                 }
                 #[test]
                 fn test_block_macro() {
                     let block = block! {5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2)}");
                     let block = block! {13: Rev(15), 5: Block(2)};
                     assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
                 }
                 #[test]
                 fn test_raw_block() {
                     let mut raw = [255u8; 64];
                     let mut counter = 0;
                     for val in [0, 15, -2, -1, -3].iter() {
                         for byte in RawElement::to_be_bytes(*val).iter() {
                             raw[counter] = *byte;
                             counter += 1;
                         }
                     }
                     let block = Block(raw);
                     assert_eq!(block.get(0), Element::Block(0));
                     assert_eq!(block.get(1), Element::Block(15));
                     assert_eq!(block.get(3), Element::None);
                     assert_eq!(block.get(2), Element::Rev(0));
                     assert_eq!(block.get(4), Element::Rev(1));
                 }
                 type TestIndex = HashMap<Revision, Node>;
                 impl RevlogIndex for TestIndex {
                     fn node(&self, rev: Revision) -> Option<&Node> {
                         self.get(&rev)
                     }
                     fn len(&self) -> usize {
                         self.len()
                     }
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right
                 ///
                 /// This avoids having to repeatedly write very long hexadecimal
                 /// strings for test data, and brings actual hash size independency.
                 #[cfg(test)]
                 fn pad_node(hex: &str) -> Node {
                     Node::from_hex(&hex_pad_right(hex)).unwrap()
                 }
                 /// Pad hexadecimal Node prefix with zeros on the right, then insert
                 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
                     idx.insert(rev, pad_node(hex));
                 }
                 fn sample_nodetree() -> NodeTree {
                     NodeTree::from(vec![
                         block![0: Rev(9)],
                         block![0: Rev(0), 1: Rev(9)],
                         block![0: Block(1), 1:Rev(1)],
                     ])
                 }
                 #[test]
                 fn test_nt_debug() {
                     let nt = sample_nodetree();
                     assert_eq!(
                         format!("{:?}", nt),
                         "readonly: \
                          [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
                          growable: [], \
                          root: {0: Block(1), 1: Rev(1)}",
                     );
                 }
                 #[test]
                 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
                     let mut idx: TestIndex = HashMap::new();
                     pad_insert(&mut idx, 1, "1234deadcafe");
                     let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
                     assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
                     assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
                     assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
                     assert_eq!(nt.find_hex(&idx, "1a")?, None);
                     assert_eq!(nt.find_hex(&idx, "ab")?, None);
                     // and with full binary Nodes
                     assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
                     let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
                     assert_eq!(nt.find_node(&idx, &unknown)?, None);
                     Ok(())
                 }
                 #[test]
                 fn test_immutable_find_one_jump() {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     let nt = sample_nodetree();
                     assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
                     assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
                     assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
                     assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
                     assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
                     assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
                 }
                 #[test]
                 fn test_mutated_find() -> Result<(), NodeMapError> {
                     let mut idx = TestIndex::new();
                     pad_insert(&mut idx, 9, "012");
                     pad_insert(&mut idx, 0, "00a");
                     pad_insert(&mut idx, 2, "cafe");
                     pad_insert(&mut idx, 3, "15");
                     pad_insert(&mut idx, 1, "10");
                     let nt = NodeTree {
                         readonly: sample_nodetree().readonly,
                         growable: vec![block![0: Rev(1), 5: Rev(3)]],
                         root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
                         masked_inner_blocks: 1,
                     };
                     assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
                     assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
                     assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
                     assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
                     assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
                     assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
                     assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
                     assert_eq!(nt.masked_readonly_blocks(), 2);
                     Ok(())
                 }
                 struct TestNtIndex {
                     index: TestIndex,
                     nt: NodeTree,
                 }
                 impl TestNtIndex {
                     fn new() -> Self {
                         TestNtIndex {
                             index: HashMap::new(),
                             nt: NodeTree::default(),
                         }
                     }
                     fn insert(
                         &mut self,
                         rev: Revision,
                         hex: &str,
                     ) -> Result<(), NodeMapError> {
                         let node = pad_node(hex);
                         self.index.insert(rev, node.clone());
                         self.nt.insert(&self.index, &node, rev)?;
                         Ok(())
                     }
                     fn find_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<Revision>, NodeMapError> {
                         self.nt.find_hex(&self.index, prefix)
                     }
                     fn unique_prefix_len_hex(
                         &self,
                         prefix: &str,
                     ) -> Result<Option<usize>, NodeMapError> {
                         self.nt.unique_prefix_len_hex(&self.index, prefix)
                     }
                     /// Drain `added` and restart a new one
                     fn commit(self) -> Self {
                         let mut as_vec: Vec<Block> =
                             self.nt.readonly.iter().map(|block| block.clone()).collect();
                         as_vec.extend(self.nt.growable);
                         as_vec.push(self.nt.root);
                         Self {
                             index: self.index,
                             nt: NodeTree::from(as_vec).into(),
                         }
                     }
                 }
                 #[test]
                 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     assert_eq!(idx.find_hex("1")?, Some(0));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     // let's trigger a simple split
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     // reinserting is a no_op
                     idx.insert(1, "1a34")?;
                     assert_eq!(idx.nt.growable.len(), 1);
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a")?, Some(1));
                     idx.insert(2, "1a01")?;
                     assert_eq!(idx.nt.growable.len(), 2);
                     assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
                     assert_eq!(idx.find_hex("12")?, Some(0));
                     assert_eq!(idx.find_hex("1a3")?, Some(1));
                     assert_eq!(idx.find_hex("1a0")?, Some(2));
                     assert_eq!(idx.find_hex("1a12")?, None);
                     // now let's make it split and create more than one additional block
                     idx.insert(3, "1a345")?;
                     assert_eq!(idx.nt.growable.len(), 4);
                     assert_eq!(idx.find_hex("1a340")?, Some(1));
                     assert_eq!(idx.find_hex("1a345")?, Some(3));
                     assert_eq!(idx.find_hex("1a341")?, None);
                     // there's no readonly block to mask
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     Ok(())
                 }
                 #[test]
                 fn test_unique_prefix_len_zero_prefix() {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "00000abcd").unwrap();
                     assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
                     // in the nodetree proper, this will be found at the first nybble
                     // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
                     // but the first difference with `NULL_NODE`
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                     // same with odd result
                     idx.insert(1, "00123").unwrap();
                     assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
                     assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
                     // these are unchanged of course
                     assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                     assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                 }
                 #[test]
                 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
                     // check that the splitting loop is long enough
                     let mut nt_idx = TestNtIndex::new();
                     let nt = &mut nt_idx.nt;
                     let idx = &mut nt_idx.index;
                     let node0_hex = hex_pad_right("444444");
                     let mut node1_hex = hex_pad_right("444444").clone();
                     node1_hex.pop();
                     node1_hex.push('5');
                     let node0 = Node::from_hex(&node0_hex).unwrap();
                     let node1 = Node::from_hex(&node1_hex).unwrap();
                     idx.insert(0, node0.clone());
                     nt.insert(idx, &node0, 0)?;
                     idx.insert(1, node1.clone());
                     nt.insert(idx, &node1, 1)?;
                     assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
                     assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
                     Ok(())
                 }
                 #[test]
                 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     // we did not add anything since init from readonly
                     assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                     idx.insert(4, "123A")?;
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     assert_eq!(idx.find_hex("1235")?, Some(1));
                     assert_eq!(idx.find_hex("131")?, Some(2));
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("123A")?, Some(4));
                     // we masked blocks for all prefixes of "123", including the root
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     eprintln!("{:?}", idx.nt);
                     idx.insert(5, "c0")?;
                     assert_eq!(idx.find_hex("cafe")?, Some(3));
                     assert_eq!(idx.find_hex("c0")?, Some(5));
                     assert_eq!(idx.find_hex("c1")?, None);
                     assert_eq!(idx.find_hex("1234")?, Some(0));
                     // inserting "c0" is just splitting the 'c' slot of the mutable root,
                     // it doesn't mask anything
                     assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                     Ok(())
                 }
                 #[test]
                 fn test_invalidate_all() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     idx.insert(1, "1235")?;
                     idx.insert(2, "131")?;
                     idx.insert(3, "cafe")?;
                     let mut idx = idx.commit();
                     idx.nt.invalidate_all();
                     assert_eq!(idx.find_hex("1234")?, None);
                     assert_eq!(idx.find_hex("1235")?, None);
                     assert_eq!(idx.find_hex("131")?, None);
                     assert_eq!(idx.find_hex("cafe")?, None);
                     // all the readonly blocks have been masked, this is the
                     // conventional expected response
                     assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
                     Ok(())
                 }
                 #[test]
                 fn test_into_added_empty() {
                     assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
                     assert!(sample_nodetree()
                         .into_readonly_and_added_bytes()
                         .1
                         .is_empty());
                 }
                 #[test]
                 fn test_into_added_bytes() -> Result<(), NodeMapError> {
                     let mut idx = TestNtIndex::new();
                     idx.insert(0, "1234")?;
                     let mut idx = idx.commit();
                     idx.insert(4, "cafe")?;
                     let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
                     // only the root block has been changed
                     assert_eq!(bytes.len(), BLOCK_SIZE);
                     // big endian for -2
                     assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
                     // big endian for -6
                     assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
                     Ok(())
                 }
             }

rust/hg-core/src/utils.rs

0 +3 -2

             // utils module
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Contains useful functions, traits, structs, etc. for use in core.
             use crate::utils::hg_path::HgPath;
             use std::{io::Write, ops::Deref};
             pub mod files;
             pub mod hg_path;
             pub mod path_auditor;
             /// Useful until rust/issues/56345 is stable
             ///
             /// # Examples
             ///
             /// ```
             /// use crate::hg::utils::find_slice_in_slice;
             ///
             /// let haystack = b"This is the haystack".to_vec();
             /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
             /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
             /// ```
             pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
             where
                 for<'a> &'a [T]: PartialEq,
             {
                 slice
                     .windows(needle.len())
                     .position(|window| window == needle)
             }
             /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
             ///
             /// # Examples
             ///
             /// ```
             /// use crate::hg::utils::replace_slice;
             /// let mut line = b"I hate writing tests!".to_vec();
             /// replace_slice(&mut line, b"hate", b"love");
             /// assert_eq!(
             ///     line,
             ///     b"I love writing tests!".to_vec()
             /// );
             /// ```
             pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
             where
                 T: Clone + PartialEq,
             {
                 if buf.len() < from.len() || from.len() != to.len() {
                     return;
                 }
                 for i in 0..=buf.len() - from.len() {
                     if buf[i..].starts_with(from) {
                         buf[i..(i + from.len())].clone_from_slice(to);
                     }
                 }
             }
             pub trait SliceExt {
                 fn trim_end(&self) -> &Self;
                 fn trim_start(&self) -> &Self;
                 fn trim(&self) -> &Self;
                 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
             }
+            #[allow(clippy::trivially_copy_pass_by_ref)]
             fn is_not_whitespace(c: &u8) -> bool {
                 !(*c as char).is_whitespace()
             }
             impl SliceExt for [u8] {
                 fn trim_end(&self) -> &[u8] {
                     if let Some(last) = self.iter().rposition(is_not_whitespace) {
-                        &self[..last + 1]
+                        &self[..=last]
                     } else {
                         &[]
                     }
                 }
                 fn trim_start(&self) -> &[u8] {
                     if let Some(first) = self.iter().position(is_not_whitespace) {
                         &self[first..]
                     } else {
                         &[]
                     }
                 }
                 /// ```
                 /// use hg::utils::SliceExt;
                 /// assert_eq!(
                 ///     b"  to trim  ".trim(),
                 ///     b"to trim"
                 /// );
                 /// assert_eq!(
                 ///     b"to trim  ".trim(),
                 ///     b"to trim"
                 /// );
                 /// assert_eq!(
                 ///     b"  to trim".trim(),
                 ///     b"to trim"
                 /// );
                 /// ```
                 fn trim(&self) -> &[u8] {
                     self.trim_start().trim_end()
                 }
                 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
                     if self.starts_with(needle) {
                         Some(&self[needle.len()..])
                     } else {
                         None
                     }
                 }
             }
             pub trait Escaped {
                 /// Return bytes escaped for display to the user
                 fn escaped_bytes(&self) -> Vec<u8>;
             }
             impl Escaped for u8 {
                 fn escaped_bytes(&self) -> Vec<u8> {
                     let mut acc = vec![];
                     match self {
                         c @ b'\'' | c @ b'\\' => {
                             acc.push(b'\\');
                             acc.push(*c);
                         }
                         b'\t' => {
                             acc.extend(br"\\t");
                         }
                         b'\n' => {
                             acc.extend(br"\\n");
                         }
                         b'\r' => {
                             acc.extend(br"\\r");
                         }
                         c if (*c < b' ' || *c >= 127) => {
                             write!(acc, "\\x{:x}", self).unwrap();
                         }
                         c => {
                             acc.push(*c);
                         }
                     }
                     acc
                 }
             }
             impl<'a, T: Escaped> Escaped for &'a [T] {
                 fn escaped_bytes(&self) -> Vec<u8> {
-                    self.iter().flat_map(|item| item.escaped_bytes()).collect()
+                    self.iter().flat_map(Escaped::escaped_bytes).collect()
                 }
             }
             impl<T: Escaped> Escaped for Vec<T> {
                 fn escaped_bytes(&self) -> Vec<u8> {
                     self.deref().escaped_bytes()
                 }
             }
             impl<'a> Escaped for &'a HgPath {
                 fn escaped_bytes(&self) -> Vec<u8> {
                     self.as_bytes().escaped_bytes()
                 }
             }

rust/hg-core/src/utils/files.rs

0 +4 -6

             // files.rs
             //
             // Copyright 2019
             // Raphaël Gomès <rgomes@octobus.net>,
             // Yuya Nishihara <yuya@tcha.org>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Functions for fiddling with files.
             use crate::utils::{
                 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
                 path_auditor::PathAuditor,
                 replace_slice,
             };
             use lazy_static::lazy_static;
             use same_file::is_same_file;
             use std::borrow::ToOwned;
             use std::fs::Metadata;
             use std::iter::FusedIterator;
             use std::ops::Deref;
             use std::path::{Path, PathBuf};
             pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
                 let os_str;
                 #[cfg(unix)]
                 {
                     use std::os::unix::ffi::OsStrExt;
                     os_str = std::ffi::OsStr::from_bytes(bytes);
                 }
                 // TODO Handle other platforms
                 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                 // Perhaps, the return type would have to be Result<PathBuf>.
                 Path::new(os_str)
             }
             // TODO: need to convert from WTF8 to MBCS bytes on Windows.
             // that's why Vec<u8> is returned.
             #[cfg(unix)]
             pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
                 use std::os::unix::ffi::OsStrExt;
                 path.as_ref().as_os_str().as_bytes().to_vec()
             }
             /// An iterator over repository path yielding itself and its ancestors.
             #[derive(Copy, Clone, Debug)]
             pub struct Ancestors<'a> {
                 next: Option<&'a HgPath>,
             }
             impl<'a> Iterator for Ancestors<'a> {
                 type Item = &'a HgPath;
                 fn next(&mut self) -> Option<Self::Item> {
                     let next = self.next;
                     self.next = match self.next {
                         Some(s) if s.is_empty() => None,
                         Some(s) => {
                             let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
                             Some(HgPath::new(&s.as_bytes()[..p]))
                         }
                         None => None,
                     };
                     next
                 }
             }
             impl<'a> FusedIterator for Ancestors<'a> {}
             /// An iterator over repository path yielding itself and its ancestors.
             #[derive(Copy, Clone, Debug)]
             pub(crate) struct AncestorsWithBase<'a> {
                 next: Option<(&'a HgPath, &'a HgPath)>,
             }
             impl<'a> Iterator for AncestorsWithBase<'a> {
                 type Item = (&'a HgPath, &'a HgPath);
                 fn next(&mut self) -> Option<Self::Item> {
                     let next = self.next;
                     self.next = match self.next {
                         Some((s, _)) if s.is_empty() => None,
                         Some((s, _)) => Some(s.split_filename()),
                         None => None,
                     };
                     next
                 }
             }
             impl<'a> FusedIterator for AncestorsWithBase<'a> {}
             /// Returns an iterator yielding ancestor directories of the given repository
             /// path.
             ///
             /// The path is separated by '/', and must not start with '/'.
             ///
             /// The path itself isn't included unless it is b"" (meaning the root
             /// directory.)
-            pub fn find_dirs<'a>(path: &'a HgPath) -> Ancestors<'a> {
+            pub fn find_dirs(path: &HgPath) -> Ancestors {
                 let mut dirs = Ancestors { next: Some(path) };
                 if !path.is_empty() {
                     dirs.next(); // skip itself
                 }
                 dirs
             }
             /// Returns an iterator yielding ancestor directories of the given repository
             /// path.
             ///
             /// The path is separated by '/', and must not start with '/'.
             ///
             /// The path itself isn't included unless it is b"" (meaning the root
             /// directory.)
-            pub(crate) fn find_dirs_with_base<'a>(
+            pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
-                path: &'a HgPath,
-            ) -> AncestorsWithBase<'a> {
                 let mut dirs = AncestorsWithBase {
                     next: Some((path, HgPath::new(b""))),
                 };
                 if !path.is_empty() {
                     dirs.next(); // skip itself
                 }
                 dirs
             }
             /// TODO more than ASCII?
             pub fn normalize_case(path: &HgPath) -> HgPathBuf {
                 #[cfg(windows)] // NTFS compares via upper()
                 return path.to_ascii_uppercase();
                 #[cfg(unix)]
                 path.to_ascii_lowercase()
             }
             lazy_static! {
                 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
                     [
 x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
 x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
                     ]
                     .iter()
                     .map(|code| {
                         std::char::from_u32(*code)
                             .unwrap()
                             .encode_utf8(&mut [0; 3])
                             .bytes()
                             .collect()
                     })
                     .collect()
                 };
             }
             fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
                 let mut buf = bytes.to_owned();
                 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
                 if needs_escaping {
                     for forbidden in IGNORED_CHARS.iter() {
                         replace_slice(&mut buf, forbidden, &[])
                     }
                     buf
                 } else {
                     buf
                 }
             }
             pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
                 hfs_ignore_clean(&bytes.to_ascii_lowercase())
             }
             #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
             pub struct HgMetadata {
                 pub st_dev: u64,
                 pub st_mode: u32,
                 pub st_nlink: u64,
                 pub st_size: u64,
                 pub st_mtime: i64,
                 pub st_ctime: i64,
             }
             // TODO support other plaforms
             #[cfg(unix)]
             impl HgMetadata {
                 pub fn from_metadata(metadata: Metadata) -> Self {
                     use std::os::unix::fs::MetadataExt;
                     Self {
                         st_dev: metadata.dev(),
                         st_mode: metadata.mode(),
                         st_nlink: metadata.nlink(),
                         st_size: metadata.size(),
                         st_mtime: metadata.mtime(),
                         st_ctime: metadata.ctime(),
                     }
                 }
             }
             /// Returns the canonical path of `name`, given `cwd` and `root`
             pub fn canonical_path(
                 root: impl AsRef<Path>,
                 cwd: impl AsRef<Path>,
                 name: impl AsRef<Path>,
             ) -> Result<PathBuf, HgPathError> {
                 // TODO add missing normalization for other platforms
                 let root = root.as_ref();
                 let cwd = cwd.as_ref();
                 let name = name.as_ref();
                 let name = if !name.is_absolute() {
                     root.join(&cwd).join(&name)
                 } else {
                     name.to_owned()
                 };
                 let auditor = PathAuditor::new(&root);
                 if name != root && name.starts_with(&root) {
                     let name = name.strip_prefix(&root).unwrap();
                     auditor.audit_path(path_to_hg_path_buf(name)?)?;
-                    return Ok(name.to_owned());
+                    Ok(name.to_owned())
                 } else if name == root {
-                    return Ok("".into());
+                    Ok("".into())
                 } else {
                     // Determine whether `name' is in the hierarchy at or beneath `root',
                     // by iterating name=name.parent() until it returns `None` (can't
                     // check name == '/', because that doesn't work on windows).
                     let mut name = name.deref();
                     let original_name = name.to_owned();
                     loop {
                         let same = is_same_file(&name, &root).unwrap_or(false);
                         if same {
                             if name == original_name {
                                 // `name` was actually the same as root (maybe a symlink)
                                 return Ok("".into());
                             }
                             // `name` is a symlink to root, so `original_name` is under
                             // root
                             let rel_path = original_name.strip_prefix(&name).unwrap();
                             auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
                             return Ok(rel_path.to_owned());
                         }
                         name = match name.parent() {
                             None => break,
                             Some(p) => p,
                         };
                     }
                     // TODO hint to the user about using --cwd
                     // Bubble up the responsibility to Python for now
                     Err(HgPathError::NotUnderRoot {
                         path: original_name.to_owned(),
                         root: root.to_owned(),
                     })
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn find_dirs_some() {
                     let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
                     assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
                     assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
                     assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                     assert_eq!(dirs.next(), None);
                     assert_eq!(dirs.next(), None);
                 }
                 #[test]
                 fn find_dirs_empty() {
                     // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
                     let mut dirs = super::find_dirs(HgPath::new(b""));
                     assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                     assert_eq!(dirs.next(), None);
                     assert_eq!(dirs.next(), None);
                 }
                 #[test]
                 fn test_find_dirs_with_base_some() {
                     let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
                     assert_eq!(
                         dirs.next(),
                         Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
                     );
                     assert_eq!(
                         dirs.next(),
                         Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
                     );
                     assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
                     assert_eq!(dirs.next(), None);
                     assert_eq!(dirs.next(), None);
                 }
                 #[test]
                 fn test_find_dirs_with_base_empty() {
                     let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
                     assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
                     assert_eq!(dirs.next(), None);
                     assert_eq!(dirs.next(), None);
                 }
                 #[test]
                 fn test_canonical_path() {
                     let root = Path::new("/repo");
                     let cwd = Path::new("/dir");
                     let name = Path::new("filename");
                     assert_eq!(
                         canonical_path(root, cwd, name),
                         Err(HgPathError::NotUnderRoot {
                             path: PathBuf::from("/dir/filename"),
                             root: root.to_path_buf()
                         })
                     );
                     let root = Path::new("/repo");
                     let cwd = Path::new("/");
                     let name = Path::new("filename");
                     assert_eq!(
                         canonical_path(root, cwd, name),
                         Err(HgPathError::NotUnderRoot {
                             path: PathBuf::from("/filename"),
                             root: root.to_path_buf()
                         })
                     );
                     let root = Path::new("/repo");
                     let cwd = Path::new("/");
                     let name = Path::new("repo/filename");
                     assert_eq!(
                         canonical_path(root, cwd, name),
                         Ok(PathBuf::from("filename"))
                     );
                     let root = Path::new("/repo");
                     let cwd = Path::new("/repo");
                     let name = Path::new("filename");
                     assert_eq!(
                         canonical_path(root, cwd, name),
                         Ok(PathBuf::from("filename"))
                     );
                     let root = Path::new("/repo");
                     let cwd = Path::new("/repo/subdir");
                     let name = Path::new("filename");
                     assert_eq!(
                         canonical_path(root, cwd, name),
                         Ok(PathBuf::from("subdir/filename"))
                     );
                 }
                 #[test]
                 fn test_canonical_path_not_rooted() {
                     use std::fs::create_dir;
                     use tempfile::tempdir;
                     let base_dir = tempdir().unwrap();
                     let base_dir_path = base_dir.path();
                     let beneath_repo = base_dir_path.join("a");
                     let root = base_dir_path.join("a/b");
                     let out_of_repo = base_dir_path.join("c");
                     let under_repo_symlink = out_of_repo.join("d");
                     create_dir(&beneath_repo).unwrap();
                     create_dir(&root).unwrap();
                     // TODO make portable
                     std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
                     assert_eq!(
                         canonical_path(&root, Path::new(""), out_of_repo),
                         Ok(PathBuf::from(""))
                     );
                     assert_eq!(
                         canonical_path(&root, Path::new(""), &beneath_repo),
                         Err(HgPathError::NotUnderRoot {
                             path: beneath_repo.to_owned(),
                             root: root.to_owned()
                         })
                     );
                     assert_eq!(
                         canonical_path(&root, Path::new(""), &under_repo_symlink),
                         Ok(PathBuf::from("d"))
                     );
                 }
             }

rust/hg-core/src/utils/hg_path.rs

0 +4 -7

             // hg_path.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use std::borrow::Borrow;
             use std::ffi::{OsStr, OsString};
             use std::fmt;
             use std::ops::Deref;
             use std::path::{Path, PathBuf};
             #[derive(Debug, Eq, PartialEq)]
             pub enum HgPathError {
                 /// Bytes from the invalid `HgPath`
                 LeadingSlash(Vec<u8>),
                 ConsecutiveSlashes {
                     bytes: Vec<u8>,
                     second_slash_index: usize,
                 },
                 ContainsNullByte {
                     bytes: Vec<u8>,
                     null_byte_index: usize,
                 },
                 /// Bytes
                 DecodeError(Vec<u8>),
                 /// The rest come from audit errors
                 EndsWithSlash(HgPathBuf),
                 ContainsIllegalComponent(HgPathBuf),
                 /// Path is inside the `.hg` folder
                 InsideDotHg(HgPathBuf),
                 IsInsideNestedRepo {
                     path: HgPathBuf,
                     nested_repo: HgPathBuf,
                 },
                 TraversesSymbolicLink {
                     path: HgPathBuf,
                     symlink: HgPathBuf,
                 },
                 NotFsCompliant(HgPathBuf),
                 /// `path` is the smallest invalid path
                 NotUnderRoot {
                     path: PathBuf,
                     root: PathBuf,
                 },
             }
             impl ToString for HgPathError {
                 fn to_string(&self) -> String {
                     match self {
                         HgPathError::LeadingSlash(bytes) => {
                             format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
                         }
                         HgPathError::ConsecutiveSlashes {
                             bytes,
                             second_slash_index: pos,
                         } => format!(
                             "Invalid HgPath '{:?}': consecutive slashes at pos {}.",
                             bytes, pos
                         ),
                         HgPathError::ContainsNullByte {
                             bytes,
                             null_byte_index: pos,
                         } => format!(
                             "Invalid HgPath '{:?}': contains null byte at pos {}.",
                             bytes, pos
                         ),
                         HgPathError::DecodeError(bytes) => {
                             format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
                         }
                         HgPathError::EndsWithSlash(path) => {
                             format!("Audit failed for '{}': ends with a slash.", path)
                         }
                         HgPathError::ContainsIllegalComponent(path) => format!(
                             "Audit failed for '{}': contains an illegal component.",
                             path
                         ),
                         HgPathError::InsideDotHg(path) => format!(
                             "Audit failed for '{}': is inside the '.hg' folder.",
                             path
                         ),
                         HgPathError::IsInsideNestedRepo {
                             path,
                             nested_repo: nested,
                         } => format!(
                             "Audit failed for '{}': is inside a nested repository '{}'.",
                             path, nested
                         ),
                         HgPathError::TraversesSymbolicLink { path, symlink } => format!(
                             "Audit failed for '{}': traverses symbolic link '{}'.",
                             path, symlink
                         ),
                         HgPathError::NotFsCompliant(path) => format!(
                             "Audit failed for '{}': cannot be turned into a \
                              filesystem path.",
                             path
                         ),
                         HgPathError::NotUnderRoot { path, root } => format!(
                             "Audit failed for '{}': not under root {}.",
                             path.display(),
                             root.display()
                         ),
                     }
                 }
             }
             impl From<HgPathError> for std::io::Error {
                 fn from(e: HgPathError) -> Self {
                     std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
                 }
             }
             /// This is a repository-relative path (or canonical path):
             ///     - no null characters
             ///     - `/` separates directories
             ///     - no consecutive slashes
             ///     - no leading slash,
             ///     - no `.` nor `..` of special meaning
             ///     - stored in repository and shared across platforms
             ///
             /// Note: there is no guarantee of any `HgPath` being well-formed at any point
             /// in its lifetime for performance reasons and to ease ergonomics. It is
             /// however checked using the `check_state` method before any file-system
             /// operation.
             ///
             /// This allows us to be encoding-transparent as much as possible, until really
             /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
             /// or `Path`) whenever more complex operations are needed:
             /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
             /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
             /// character encoding will be determined on a per-repository basis.
             //
             // FIXME: (adapted from a comment in the stdlib)
             // `HgPath::new()` current implementation relies on `Slice` being
             // layout-compatible with `[u8]`.
             // When attribute privacy is implemented, `Slice` should be annotated as
             // `#[repr(transparent)]`.
             // Anyway, `Slice` representation and layout are considered implementation
             // detail, are not documented and must not be relied upon.
             #[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
             pub struct HgPath {
                 inner: [u8],
             }
             impl HgPath {
                 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
                     unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
                 }
                 pub fn is_empty(&self) -> bool {
                     self.inner.is_empty()
                 }
                 pub fn len(&self) -> usize {
                     self.inner.len()
                 }
                 fn to_hg_path_buf(&self) -> HgPathBuf {
                     HgPathBuf {
                         inner: self.inner.to_owned(),
                     }
                 }
                 pub fn bytes(&self) -> std::slice::Iter<u8> {
                     self.inner.iter()
                 }
                 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
                     HgPathBuf::from(self.inner.to_ascii_uppercase())
                 }
                 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
                     HgPathBuf::from(self.inner.to_ascii_lowercase())
                 }
                 pub fn as_bytes(&self) -> &[u8] {
                     &self.inner
                 }
                 pub fn contains(&self, other: u8) -> bool {
                     self.inner.contains(&other)
                 }
                 pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
                     self.inner.starts_with(needle.as_ref().as_bytes())
                 }
                 pub fn trim_trailing_slash(&self) -> &Self {
                     Self::new(if self.inner.last() == Some(&b'/') {
                         &self.inner[..self.inner.len() - 1]
                     } else {
                         &self.inner[..]
                     })
                 }
                 /// Returns a tuple of slices `(base, filename)` resulting from the split
                 /// at the rightmost `/`, if any.
                 ///
                 /// # Examples:
                 ///
                 /// ```
                 /// use hg::utils::hg_path::HgPath;
                 ///
                 /// let path = HgPath::new(b"cool/hg/path").split_filename();
                 /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
                 ///
                 /// let path = HgPath::new(b"pathwithoutsep").split_filename();
                 /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
                 /// ```
                 pub fn split_filename(&self) -> (&Self, &Self) {
                     match &self.inner.iter().rposition(|c| *c == b'/') {
                         None => (HgPath::new(""), &self),
                         Some(size) => (
                             HgPath::new(&self.inner[..*size]),
                             HgPath::new(&self.inner[*size + 1..]),
                         ),
                     }
                 }
                 pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
                     let mut inner = self.inner.to_owned();
-                    if inner.len() != 0 && inner.last() != Some(&b'/') {
+                    if !inner.is_empty() && inner.last() != Some(&b'/') {
                         inner.push(b'/');
                     }
                     inner.extend(other.as_ref().bytes());
                     HgPathBuf::from_bytes(&inner)
                 }
                 pub fn parent(&self) -> &Self {
                     let inner = self.as_bytes();
                     HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
                         Some(pos) => &inner[..pos],
                         None => &[],
                     })
                 }
                 /// Given a base directory, returns the slice of `self` relative to the
                 /// base directory. If `base` is not a directory (does not end with a
                 /// `b'/'`), returns `None`.
                 pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
                     let base = base.as_ref();
                     if base.is_empty() {
                         return Some(self);
                     }
                     let is_dir = base.as_bytes().ends_with(b"/");
                     if is_dir && self.starts_with(base) {
                         Some(Self::new(&self.inner[base.len()..]))
                     } else {
                         None
                     }
                 }
                 #[cfg(windows)]
                 /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
                 ///
                 /// Split a pathname into drive/UNC sharepoint and relative path
                 /// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
                 /// be empty.
                 ///
                 /// If you assign
                 ///  result = split_drive(p)
                 /// It is always true that:
                 ///  result[0] + result[1] == p
                 ///
                 /// If the path contained a drive letter, drive_or_unc will contain
                 /// everything up to and including the colon.
                 /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
                 ///
                 /// If the path contained a UNC path, the drive_or_unc will contain the
                 /// host name and share up to but not including the fourth directory
                 /// separator character.
                 /// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
                 /// "/dir")
                 ///
                 /// Paths cannot contain both a drive letter and a UNC path.
                 pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
                     let bytes = self.as_bytes();
                     let is_sep = |b| std::path::is_separator(b as char);
                     if self.len() < 2 {
                         (HgPath::new(b""), &self)
                     } else if is_sep(bytes[0])
                         && is_sep(bytes[1])
                         && (self.len() == 2 || !is_sep(bytes[2]))
                     {
                         // Is a UNC path:
                         // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
                         // \\machine\mountpoint\directory\etc\...
                         //           directory ^^^^^^^^^^^^^^^
                         let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
                         let mountpoint_start_index = if let Some(i) = machine_end_index {
                             i + 2
                         } else {
                             return (HgPath::new(b""), &self);
                         };
                         match bytes[mountpoint_start_index + 1..]
                             .iter()
                             .position(|b| is_sep(*b))
                         {
                             // A UNC path can't have two slashes in a row
                             // (after the initial two)
                             Some(0) => (HgPath::new(b""), &self),
                             Some(i) => {
                                 let (a, b) =
                                     bytes.split_at(mountpoint_start_index + 1 + i);
                                 (HgPath::new(a), HgPath::new(b))
                             }
                             None => (&self, HgPath::new(b"")),
                         }
                     } else if bytes[1] == b':' {
                         // Drive path c:\directory
                         let (a, b) = bytes.split_at(2);
                         (HgPath::new(a), HgPath::new(b))
                     } else {
                         (HgPath::new(b""), &self)
                     }
                 }
                 #[cfg(unix)]
                 /// Split a pathname into drive and path. On Posix, drive is always empty.
                 pub fn split_drive(&self) -> (&HgPath, &HgPath) {
                     (HgPath::new(b""), &self)
                 }
                 /// Checks for errors in the path, short-circuiting at the first one.
                 /// This generates fine-grained errors useful for debugging.
                 /// To simply check if the path is valid during tests, use `is_valid`.
                 pub fn check_state(&self) -> Result<(), HgPathError> {
-                    if self.len() == 0 {
+                    if self.is_empty() {
                         return Ok(());
                     }
                     let bytes = self.as_bytes();
                     let mut previous_byte = None;
                     if bytes[0] == b'/' {
                         return Err(HgPathError::LeadingSlash(bytes.to_vec()));
                     }
                     for (index, byte) in bytes.iter().enumerate() {
                         match byte {
 => {
                                 return Err(HgPathError::ContainsNullByte {
                                     bytes: bytes.to_vec(),
                                     null_byte_index: index,
                                 })
                             }
                             b'/' => {
                                 if previous_byte.is_some() && previous_byte == Some(b'/') {
                                     return Err(HgPathError::ConsecutiveSlashes {
                                         bytes: bytes.to_vec(),
                                         second_slash_index: index,
                                     });
                                 }
                             }
                             _ => (),
                         };
                         previous_byte = Some(*byte);
                     }
                     Ok(())
                 }
                 #[cfg(test)]
                 /// Only usable during tests to force developers to handle invalid states
                 fn is_valid(&self) -> bool {
                     self.check_state().is_ok()
                 }
             }
             impl fmt::Debug for HgPath {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
                 }
             }
             impl fmt::Display for HgPath {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     write!(f, "{}", String::from_utf8_lossy(&self.inner))
                 }
             }
-            #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
+            #[derive(Default, Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
             pub struct HgPathBuf {
                 inner: Vec<u8>,
             }
             impl HgPathBuf {
                 pub fn new() -> Self {
-                    Self { inner: Vec::new() }
+                    Default::default()
                 }
                 pub fn push(&mut self, byte: u8) {
                     self.inner.push(byte);
                 }
                 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
                     HgPath::new(s).to_owned()
                 }
                 pub fn into_vec(self) -> Vec<u8> {
                     self.inner
                 }
-                pub fn as_ref(&self) -> &[u8] {
-                    self.inner.as_ref()
             }
             impl fmt::Debug for HgPathBuf {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
                 }
             }
             impl fmt::Display for HgPathBuf {
                 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                     write!(f, "{}", String::from_utf8_lossy(&self.inner))
                 }
             }
             impl Deref for HgPathBuf {
                 type Target = HgPath;
                 #[inline]
                 fn deref(&self) -> &HgPath {
                     &HgPath::new(&self.inner)
                 }
             }
             impl From<Vec<u8>> for HgPathBuf {
                 fn from(vec: Vec<u8>) -> Self {
                     Self { inner: vec }
                 }
             }
             impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
                 fn from(s: &T) -> HgPathBuf {
                     s.as_ref().to_owned()
                 }
             }
             impl Into<Vec<u8>> for HgPathBuf {
                 fn into(self) -> Vec<u8> {
                     self.inner
                 }
             }
             impl Borrow<HgPath> for HgPathBuf {
                 fn borrow(&self) -> &HgPath {
                     &HgPath::new(self.as_bytes())
                 }
             }
             impl ToOwned for HgPath {
                 type Owned = HgPathBuf;
                 fn to_owned(&self) -> HgPathBuf {
                     self.to_hg_path_buf()
                 }
             }
             impl AsRef<HgPath> for HgPath {
                 fn as_ref(&self) -> &HgPath {
                     self
                 }
             }
             impl AsRef<HgPath> for HgPathBuf {
                 fn as_ref(&self) -> &HgPath {
                     self
                 }
             }
             impl Extend<u8> for HgPathBuf {
                 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
                     self.inner.extend(iter);
                 }
             }
             /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
             /// implemented, these conversion utils will have to work differently depending
             /// on the repository encoding: either `UTF-8` or `MBCS`.
             pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
                 hg_path: P,
             ) -> Result<OsString, HgPathError> {
                 hg_path.as_ref().check_state()?;
                 let os_str;
                 #[cfg(unix)]
                 {
                     use std::os::unix::ffi::OsStrExt;
                     os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
                 }
                 // TODO Handle other platforms
                 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                 Ok(os_str.to_os_string())
             }
             pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
                 hg_path: P,
             ) -> Result<PathBuf, HgPathError> {
                 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
             }
             pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
                 os_string: S,
             ) -> Result<HgPathBuf, HgPathError> {
                 let buf;
                 #[cfg(unix)]
                 {
                     use std::os::unix::ffi::OsStrExt;
                     buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
                 }
                 // TODO Handle other platforms
                 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                 buf.check_state()?;
                 Ok(buf)
             }
             pub fn path_to_hg_path_buf<P: AsRef<Path>>(
                 path: P,
             ) -> Result<HgPathBuf, HgPathError> {
                 let buf;
                 let os_str = path.as_ref().as_os_str();
                 #[cfg(unix)]
                 {
                     use std::os::unix::ffi::OsStrExt;
                     buf = HgPathBuf::from_bytes(&os_str.as_bytes());
                 }
                 // TODO Handle other platforms
                 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                 buf.check_state()?;
                 Ok(buf)
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use pretty_assertions::assert_eq;
                 #[test]
                 fn test_path_states() {
                     assert_eq!(
                         Err(HgPathError::LeadingSlash(b"/".to_vec())),
                         HgPath::new(b"/").check_state()
                     );
                     assert_eq!(
                         Err(HgPathError::ConsecutiveSlashes {
                             bytes: b"a/b//c".to_vec(),
                             second_slash_index: 4
                         }),
                         HgPath::new(b"a/b//c").check_state()
                     );
                     assert_eq!(
                         Err(HgPathError::ContainsNullByte {
                             bytes: b"a/b/\0c".to_vec(),
                             null_byte_index: 4
                         }),
                         HgPath::new(b"a/b/\0c").check_state()
                     );
                     // TODO test HgPathError::DecodeError for the Windows implementation.
                     assert_eq!(true, HgPath::new(b"").is_valid());
                     assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
                     // Backslashes in paths are not significant, but allowed
                     assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
                     // Dots in paths are not significant, but allowed
                     assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
                     assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
                 }
                 #[test]
                 fn test_iter() {
                     let path = HgPath::new(b"a");
                     let mut iter = path.bytes();
                     assert_eq!(Some(&b'a'), iter.next());
                     assert_eq!(None, iter.next_back());
                     assert_eq!(None, iter.next());
                     let path = HgPath::new(b"a");
                     let mut iter = path.bytes();
                     assert_eq!(Some(&b'a'), iter.next_back());
                     assert_eq!(None, iter.next_back());
                     assert_eq!(None, iter.next());
                     let path = HgPath::new(b"abc");
                     let mut iter = path.bytes();
                     assert_eq!(Some(&b'a'), iter.next());
                     assert_eq!(Some(&b'c'), iter.next_back());
                     assert_eq!(Some(&b'b'), iter.next_back());
                     assert_eq!(None, iter.next_back());
                     assert_eq!(None, iter.next());
                     let path = HgPath::new(b"abc");
                     let mut iter = path.bytes();
                     assert_eq!(Some(&b'a'), iter.next());
                     assert_eq!(Some(&b'b'), iter.next());
                     assert_eq!(Some(&b'c'), iter.next());
                     assert_eq!(None, iter.next_back());
                     assert_eq!(None, iter.next());
                     let path = HgPath::new(b"abc");
                     let iter = path.bytes();
                     let mut vec = Vec::new();
                     vec.extend(iter);
                     assert_eq!(vec![b'a', b'b', b'c'], vec);
                     let path = HgPath::new(b"abc");
                     let mut iter = path.bytes();
                     assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
                     let path = HgPath::new(b"abc");
                     let mut iter = path.bytes();
                     assert_eq!(None, iter.rposition(|c| *c == b'd'));
                 }
                 #[test]
                 fn test_join() {
                     let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
                     assert_eq!(b"a/b", path.as_bytes());
                     let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
                     assert_eq!(b"a/b/c", path.as_bytes());
                     // No leading slash if empty before join
                     let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
                     assert_eq!(b"b/c", path.as_bytes());
                     // The leading slash is an invalid representation of an `HgPath`, but
                     // it can happen. This creates another invalid representation of
                     // consecutive bytes.
                     // TODO What should be done in this case? Should we silently remove
                     // the extra slash? Should we change the signature to a problematic
                     // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
                     // let the error happen upon filesystem interaction?
                     let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
                     assert_eq!(b"a//b", path.as_bytes());
                     let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
                     assert_eq!(b"a//b", path.as_bytes());
                 }
                 #[test]
                 fn test_relative_to() {
                     let path = HgPath::new(b"");
                     let base = HgPath::new(b"");
                     assert_eq!(Some(path), path.relative_to(base));
                     let path = HgPath::new(b"path");
                     let base = HgPath::new(b"");
                     assert_eq!(Some(path), path.relative_to(base));
                     let path = HgPath::new(b"a");
                     let base = HgPath::new(b"b");
                     assert_eq!(None, path.relative_to(base));
                     let path = HgPath::new(b"a/b");
                     let base = HgPath::new(b"a");
                     assert_eq!(None, path.relative_to(base));
                     let path = HgPath::new(b"a/b");
                     let base = HgPath::new(b"a/");
                     assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
                     let path = HgPath::new(b"nested/path/to/b");
                     let base = HgPath::new(b"nested/path/");
                     assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
                     let path = HgPath::new(b"ends/with/dir/");
                     let base = HgPath::new(b"ends/");
                     assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
                 }
                 #[test]
                 #[cfg(unix)]
                 fn test_split_drive() {
                     // Taken from the Python stdlib's tests
                     assert_eq!(
                         HgPath::new(br"/foo/bar").split_drive(),
                         (HgPath::new(b""), HgPath::new(br"/foo/bar"))
                     );
                     assert_eq!(
                         HgPath::new(br"foo:bar").split_drive(),
                         (HgPath::new(b""), HgPath::new(br"foo:bar"))
                     );
                     assert_eq!(
                         HgPath::new(br":foo:bar").split_drive(),
                         (HgPath::new(b""), HgPath::new(br":foo:bar"))
                     );
                     // Also try NT paths; should not split them
                     assert_eq!(
                         HgPath::new(br"c:\foo\bar").split_drive(),
                         (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
                     );
                     assert_eq!(
                         HgPath::new(b"c:/foo/bar").split_drive(),
                         (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
                     );
                     assert_eq!(
                         HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
                         (
                             HgPath::new(b""),
                             HgPath::new(br"\\conky\mountpoint\foo\bar")
                         )
                     );
                 }
                 #[test]
                 #[cfg(windows)]
                 fn test_split_drive() {
                     assert_eq!(
                         HgPath::new(br"c:\foo\bar").split_drive(),
                         (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
                     );
                     assert_eq!(
                         HgPath::new(b"c:/foo/bar").split_drive(),
                         (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
                     );
                     assert_eq!(
                         HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
                         (
                             HgPath::new(br"\\conky\mountpoint"),
                             HgPath::new(br"\foo\bar")
                         )
                     );
                     assert_eq!(
                         HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
                         (
                             HgPath::new(br"//conky/mountpoint"),
                             HgPath::new(br"/foo/bar")
                         )
                     );
                     assert_eq!(
                         HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
                         (
                             HgPath::new(br""),
                             HgPath::new(br"\\\conky\mountpoint\foo\bar")
                         )
                     );
                     assert_eq!(
                         HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
                         (
                             HgPath::new(br""),
                             HgPath::new(br"///conky/mountpoint/foo/bar")
                         )
                     );
                     assert_eq!(
                         HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
                         (
                             HgPath::new(br""),
                             HgPath::new(br"\\conky\\mountpoint\foo\bar")
                         )
                     );
                     assert_eq!(
                         HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
                         (
                             HgPath::new(br""),
                             HgPath::new(br"//conky//mountpoint/foo/bar")
                         )
                     );
                     // UNC part containing U+0130
                     assert_eq!(
                         HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
                         (
                             HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
                             HgPath::new(br"/foo/bar")
                         )
                     );
                 }
                 #[test]
                 fn test_parent() {
                     let path = HgPath::new(b"");
                     assert_eq!(path.parent(), path);
                     let path = HgPath::new(b"a");
                     assert_eq!(path.parent(), HgPath::new(b""));
                     let path = HgPath::new(b"a/b");
                     assert_eq!(path.parent(), HgPath::new(b"a"));
                     let path = HgPath::new(b"a/other/b");
                     assert_eq!(path.parent(), HgPath::new(b"a/other"));
                 }
             }

rust/hg-core/src/utils/path_auditor.rs

0 +1 -1

             // path_auditor.rs
             //
             // Copyright 2020
             // Raphaël Gomès <rgomes@octobus.net>,
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::utils::{
                 files::lower_clean,
                 find_slice_in_slice,
                 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
             };
             use std::collections::HashSet;
             use std::path::{Path, PathBuf};
             use std::sync::{Mutex, RwLock};
             /// Ensures that a path is valid for use in the repository i.e. does not use
             /// any banned components, does not traverse a symlink, etc.
             #[derive(Debug, Default)]
             pub struct PathAuditor {
                 audited: Mutex<HashSet<HgPathBuf>>,
                 audited_dirs: RwLock<HashSet<HgPathBuf>>,
                 root: PathBuf,
             }
             impl PathAuditor {
                 pub fn new(root: impl AsRef<Path>) -> Self {
                     Self {
                         root: root.as_ref().to_owned(),
                         ..Default::default()
                     }
                 }
                 pub fn audit_path(
                     &self,
                     path: impl AsRef<HgPath>,
                 ) -> Result<(), HgPathError> {
                     // TODO windows "localpath" normalization
                     let path = path.as_ref();
                     if path.is_empty() {
                         return Ok(());
                     }
                     // TODO case normalization
                     if self.audited.lock().unwrap().contains(path) {
                         return Ok(());
                     }
                     // AIX ignores "/" at end of path, others raise EISDIR.
                     let last_byte = path.as_bytes()[path.len() - 1];
                     if last_byte == b'/' || last_byte == b'\\' {
                         return Err(HgPathError::EndsWithSlash(path.to_owned()));
                     }
                     let parts: Vec<_> = path
                         .as_bytes()
                         .split(|b| std::path::is_separator(*b as char))
                         .collect();
                     let first_component = lower_clean(parts[0]);
                     let first_component = first_component.as_slice();
                     if !path.split_drive().0.is_empty()
                         || (first_component == b".hg"
                             || first_component == b".hg."
                             || first_component == b"")
                         || parts.iter().any(|c| c == b"..")
                     {
                         return Err(HgPathError::InsideDotHg(path.to_owned()));
                     }
                     // Windows shortname aliases
                     for part in parts.iter() {
                         if part.contains(&b'~') {
                             let mut split = part.splitn(2, |b| *b == b'~');
                             let first =
                                 split.next().unwrap().to_owned().to_ascii_uppercase();
                             let last = split.next().unwrap();
                             if last.iter().all(u8::is_ascii_digit)
                                 && (first == b"HG" || first == b"HG8B6C")
                             {
                                 return Err(HgPathError::ContainsIllegalComponent(
                                     path.to_owned(),
                                 ));
                             }
                         }
                     }
                     let lower_path = lower_clean(path.as_bytes());
                     if find_slice_in_slice(&lower_path, b".hg").is_some() {
                         let lower_parts: Vec<_> = path
                             .as_bytes()
                             .split(|b| std::path::is_separator(*b as char))
                             .collect();
                         for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
                             if let Some(pos) = lower_parts[1..]
                                 .iter()
                                 .position(|part| part == &pattern.as_slice())
                             {
                                 let base = lower_parts[..=pos]
                                     .iter()
                                     .fold(HgPathBuf::new(), |acc, p| {
                                         acc.join(HgPath::new(p))
                                     });
                                 return Err(HgPathError::IsInsideNestedRepo {
                                     path: path.to_owned(),
                                     nested_repo: base,
                                 });
                             }
                         }
                     }
                     let parts = &parts[..parts.len().saturating_sub(1)];
                     // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
                     // if there's a "foo/.hg" directory. This also means we won't
                     // accidentally traverse a symlink into some other filesystem (which
                     // is potentially expensive to access).
                     for index in 0..parts.len() {
-                        let prefix = &parts[..index + 1].join(&b'/');
+                        let prefix = &parts[..=index].join(&b'/');
                         let prefix = HgPath::new(prefix);
                         if self.audited_dirs.read().unwrap().contains(prefix) {
                             continue;
                         }
                         self.check_filesystem(&prefix, &path)?;
                         self.audited_dirs.write().unwrap().insert(prefix.to_owned());
                     }
                     self.audited.lock().unwrap().insert(path.to_owned());
                     Ok(())
                 }
                 pub fn check_filesystem(
                     &self,
                     prefix: impl AsRef<HgPath>,
                     path: impl AsRef<HgPath>,
                 ) -> Result<(), HgPathError> {
                     let prefix = prefix.as_ref();
                     let path = path.as_ref();
                     let current_path = self.root.join(
                         hg_path_to_path_buf(prefix)
                             .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
                     );
                     match std::fs::symlink_metadata(&current_path) {
                         Err(e) => {
                             // EINVAL can be raised as invalid path syntax under win32.
                             if e.kind() != std::io::ErrorKind::NotFound
                                 && e.kind() != std::io::ErrorKind::InvalidInput
                                 && e.raw_os_error() != Some(20)
                             {
                                 // Rust does not yet have an `ErrorKind` for
                                 // `NotADirectory` (errno 20)
                                 // It happens if the dirstate contains `foo/bar` and
                                 // foo is not a directory
                                 return Err(HgPathError::NotFsCompliant(path.to_owned()));
                             }
                         }
                         Ok(meta) => {
                             if meta.file_type().is_symlink() {
                                 return Err(HgPathError::TraversesSymbolicLink {
                                     path: path.to_owned(),
                                     symlink: prefix.to_owned(),
                                 });
                             }
                             if meta.file_type().is_dir()
                                 && current_path.join(".hg").is_dir()
                             {
                                 return Err(HgPathError::IsInsideNestedRepo {
                                     path: path.to_owned(),
                                     nested_repo: prefix.to_owned(),
                                 });
                             }
                         }
                     };
                     Ok(())
                 }
                 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
                     self.audit_path(path).is_ok()
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::utils::files::get_path_from_bytes;
                 use crate::utils::hg_path::path_to_hg_path_buf;
                 #[test]
                 fn test_path_auditor() {
                     let auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
                     let path = HgPath::new(b".hg/00changelog.i");
                     assert_eq!(
                         auditor.audit_path(path),
                         Err(HgPathError::InsideDotHg(path.to_owned()))
                     );
                     let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
                     assert_eq!(
                         auditor.audit_path(path),
                         Err(HgPathError::IsInsideNestedRepo {
                             path: path.to_owned(),
                             nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
                         })
                     );
                     use std::fs::{create_dir, File};
                     use tempfile::tempdir;
                     let base_dir = tempdir().unwrap();
                     let base_dir_path = base_dir.path();
                     let a = base_dir_path.join("a");
                     let b = base_dir_path.join("b");
                     create_dir(&a).unwrap();
                     let in_a_path = a.join("in_a");
                     File::create(in_a_path).unwrap();
                     // TODO make portable
                     std::os::unix::fs::symlink(&a, &b).unwrap();
                     let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
                     eprintln!("buf: {}", buf.display());
                     let path = path_to_hg_path_buf(buf).unwrap();
                     assert_eq!(
                         auditor.audit_path(&path),
                         Err(HgPathError::TraversesSymbolicLink {
                             path: path,
                             symlink: path_to_hg_path_buf(
                                 b.components().skip(2).collect::<PathBuf>()
                             )
                             .unwrap()
                         })
                     );
                 }
             }

rust/hg-cpython/src/cindex.rs

0 +2 -5

             // cindex.rs
             //
             // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Bindings to use the Index defined by the parsers C extension
             //!
             //! Ideally, we should use an Index entirely implemented in Rust,
             //! but this will take some time to get there.
             use cpython::{
                 exc::ImportError, ObjectProtocol, PyClone, PyErr, PyObject, PyResult,
                 PyTuple, Python, PythonObject,
             };
             use hg::revlog::{Node, RevlogIndex};
             use hg::{Graph, GraphError, Revision, WORKING_DIRECTORY_REVISION};
             use libc::c_int;
             const REVLOG_CABI_VERSION: c_int = 2;
             #[repr(C)]
             pub struct Revlog_CAPI {
                 abi_version: c_int,
                 index_length:
                     unsafe extern "C" fn(index: *mut revlog_capi::RawPyObject) -> c_int,
                 index_node: unsafe extern "C" fn(
                     index: *mut revlog_capi::RawPyObject,
                     rev: c_int,
                 ) -> *const Node,
                 index_parents: unsafe extern "C" fn(
                     index: *mut revlog_capi::RawPyObject,
                     rev: c_int,
                     ps: *mut [c_int; 2],
                 ) -> c_int,
             }
             py_capsule!(
                 from mercurial.cext.parsers import revlog_CAPI
                     as revlog_capi for Revlog_CAPI);
             /// A `Graph` backed up by objects and functions from revlog.c
             ///
             /// This implementation of the `Graph` trait, relies on (pointers to)
             /// - the C index object (`index` member)
             /// - the `index_get_parents()` function (`parents` member)
             ///
             /// # Safety
             ///
             /// The C index itself is mutable, and this Rust exposition is **not
             /// protected by the GIL**, meaning that this construct isn't safe with respect
             /// to Python threads.
             ///
             /// All callers of this `Index` must acquire the GIL and must not release it
             /// while working.
             ///
             /// # TODO find a solution to make it GIL safe again.
             ///
             /// This is non trivial, and can wait until we have a clearer picture with
             /// more Rust Mercurial constructs.
             ///
             /// One possibility would be to a `GILProtectedIndex` wrapper enclosing
             /// a `Python<'p>` marker and have it be the one implementing the
             /// `Graph` trait, but this would mean the `Graph` implementor would become
             /// likely to change between subsequent method invocations of the `hg-core`
             /// objects (a serious change of the `hg-core` API):
             /// either exposing ways to mutate the `Graph`, or making it a non persistent
             /// parameter in the relevant methods that need one.
             ///
             /// Another possibility would be to introduce an abstract lock handle into
             /// the core API, that would be tied to `GILGuard` / `Python<'p>`
             /// in the case of the `cpython` crate bindings yet could leave room for other
             /// mechanisms in other contexts.
             pub struct Index {
                 index: PyObject,
                 capi: &'static Revlog_CAPI,
             }
             impl Index {
                 pub fn new(py: Python, index: PyObject) -> PyResult<Self> {
                     let capi = unsafe { revlog_capi::retrieve(py)? };
                     if capi.abi_version != REVLOG_CABI_VERSION {
                         return Err(PyErr::new::<ImportError, _>(
                             py,
                             format!(
                                 "ABI version mismatch: the C ABI revlog version {} \
                                  does not match the {} expected by Rust hg-cpython",
                                 capi.abi_version, REVLOG_CABI_VERSION
                             ),
                         ));
                     }
-                    Ok(Index {
+                    Ok(Index { index, capi })
-                        index: index,
-                        capi: capi,
-                    })
                 }
                 /// return a reference to the CPython Index object in this Struct
                 pub fn inner(&self) -> &PyObject {
                     &self.index
                 }
                 pub fn append(&mut self, py: Python, tup: PyTuple) -> PyResult<PyObject> {
                     self.index.call_method(
                         py,
                         "append",
                         PyTuple::new(py, &[tup.into_object()]),
                         None,
                     )
                 }
             }
             impl Clone for Index {
                 fn clone(&self) -> Self {
                     let guard = Python::acquire_gil();
                     Index {
                         index: self.index.clone_ref(guard.python()),
                         capi: self.capi,
                     }
                 }
             }
             impl PyClone for Index {
                 fn clone_ref(&self, py: Python) -> Self {
                     Index {
                         index: self.index.clone_ref(py),
                         capi: self.capi,
                     }
                 }
             }
             impl Graph for Index {
                 /// wrap a call to the C extern parents function
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     if rev == WORKING_DIRECTORY_REVISION {
                         return Err(GraphError::WorkingDirectoryUnsupported);
                     }
                     let mut res: [c_int; 2] = [0; 2];
                     let code = unsafe {
                         (self.capi.index_parents)(
                             self.index.as_ptr(),
                             rev as c_int,
                             &mut res as *mut [c_int; 2],
                         )
                     };
                     match code {
 => Ok(res),
                         _ => Err(GraphError::ParentOutOfRange(rev)),
                     }
                 }
             }
             impl RevlogIndex for Index {
                 /// Note C return type is Py_ssize_t (hence signed), but we shall
                 /// force it to unsigned, because it's a length
                 fn len(&self) -> usize {
                     unsafe { (self.capi.index_length)(self.index.as_ptr()) as usize }
                 }
-                fn node<'a>(&'a self, rev: Revision) -> Option<&'a Node> {
+                fn node(&self, rev: Revision) -> Option<&Node> {
                     let raw = unsafe {
                         (self.capi.index_node)(self.index.as_ptr(), rev as c_int)
                     };
                     if raw.is_null() {
                         None
                     } else {
                         // TODO it would be much better for the C layer to give us
                         // a length, since the hash length will change in the near
                         // future, but that's probably out of scope for the nodemap
                         // patch series.
                         //
                         // The root of that unsafety relies in the signature of
                         // `capi.index_node()` itself: returning a `Node` pointer
                         // whereas it's a `char *` in the C counterpart.
                         Some(unsafe { &*raw })
                     }
                 }
             }

rust/hg-cpython/src/dirstate/copymap.rs

0 +3 -3

             // copymap.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Bindings for `hg::dirstate::dirstate_map::CopyMap` provided by the
             //! `hg-core` package.
             use cpython::{
                 PyBytes, PyClone, PyDict, PyObject, PyResult, Python, UnsafePyLeaked,
             };
             use std::cell::RefCell;
             use crate::dirstate::dirstate_map::DirstateMap;
             use hg::{utils::hg_path::HgPathBuf, CopyMapIter};
             py_class!(pub class CopyMap |py| {
                 data dirstate_map: DirstateMap;
                 def __getitem__(&self, key: PyObject) -> PyResult<PyBytes> {
                     (*self.dirstate_map(py)).copymapgetitem(py, key)
                 }
                 def __len__(&self) -> PyResult<usize> {
                     self.dirstate_map(py).copymaplen(py)
                 }
                 def __contains__(&self, key: PyObject) -> PyResult<bool> {
                     self.dirstate_map(py).copymapcontains(py, key)
                 }
                 def get(
                     &self,
                     key: PyObject,
                     default: Option<PyObject> = None
                 ) -> PyResult<Option<PyObject>> {
                     self.dirstate_map(py).copymapget(py, key, default)
                 }
                 def pop(
                     &self,
                     key: PyObject,
                     default: Option<PyObject> = None
                 ) -> PyResult<Option<PyObject>> {
                     self.dirstate_map(py).copymappop(py, key, default)
                 }
                 def __iter__(&self) -> PyResult<CopyMapKeysIterator> {
                     self.dirstate_map(py).copymapiter(py)
                 }
                 // Python's `dict()` builtin works with either a subclass of dict
                 // or an abstract mapping. Said mapping needs to implement `__getitem__`
                 // and `keys`.
                 def keys(&self) -> PyResult<CopyMapKeysIterator> {
                     self.dirstate_map(py).copymapiter(py)
                 }
                 def items(&self) -> PyResult<CopyMapItemsIterator> {
                     self.dirstate_map(py).copymapitemsiter(py)
                 }
                 def iteritems(&self) -> PyResult<CopyMapItemsIterator> {
                     self.dirstate_map(py).copymapitemsiter(py)
                 }
                 def __setitem__(
                     &self,
                     key: PyObject,
                     item: PyObject
                 ) -> PyResult<()> {
                     self.dirstate_map(py).copymapsetitem(py, key, item)?;
                     Ok(())
                 }
                 def copy(&self) -> PyResult<PyDict> {
                     self.dirstate_map(py).copymapcopy(py)
                 }
             });
             impl CopyMap {
                 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
                     Self::create_instance(py, dm)
                 }
                 fn translate_key(
                     py: Python,
                     res: (&HgPathBuf, &HgPathBuf),
                 ) -> PyResult<Option<PyBytes>> {
-                    Ok(Some(PyBytes::new(py, res.0.as_ref())))
+                    Ok(Some(PyBytes::new(py, res.0.as_bytes())))
                 }
                 fn translate_key_value(
                     py: Python,
                     res: (&HgPathBuf, &HgPathBuf),
                 ) -> PyResult<Option<(PyBytes, PyBytes)>> {
                     let (k, v) = res;
                     Ok(Some((
-                        PyBytes::new(py, k.as_ref()),
+                        PyBytes::new(py, k.as_bytes()),
-                        PyBytes::new(py, v.as_ref()),
+                        PyBytes::new(py, v.as_bytes()),
                     )))
                 }
             }
             py_shared_iterator!(
                 CopyMapKeysIterator,
                 UnsafePyLeaked<CopyMapIter<'static>>,
                 CopyMap::translate_key,
                 Option<PyBytes>
             );
             py_shared_iterator!(
                 CopyMapItemsIterator,
                 UnsafePyLeaked<CopyMapIter<'static>>,
                 CopyMap::translate_key_value,
                 Option<(PyBytes, PyBytes)>
             );

rust/hg-cpython/src/dirstate/dirs_multiset.rs

0 +1 -1

             // dirs_multiset.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Bindings for the `hg::dirstate::dirs_multiset` file provided by the
             //! `hg-core` package.
             use std::cell::RefCell;
             use std::convert::TryInto;
             use cpython::{
                 exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
                 Python, UnsafePyLeaked,
             };
             use crate::dirstate::extract_dirstate;
             use hg::{
                 utils::hg_path::{HgPath, HgPathBuf},
                 DirsMultiset, DirsMultisetIter, DirstateMapError, DirstateParseError,
                 EntryState,
             };
             py_class!(pub class Dirs |py| {
                 @shared data inner: DirsMultiset;
                 // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
                 // a `list`)
                 def __new__(
                     _cls,
                     map: PyObject,
                     skip: Option<PyObject> = None
                 ) -> PyResult<Self> {
                     let mut skip_state: Option<EntryState> = None;
                     if let Some(skip) = skip {
                         skip_state = Some(
                             skip.extract::<PyBytes>(py)?.data(py)[0]
                                 .try_into()
                                 .map_err(|e: DirstateParseError| {
                                     PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                 })?,
                         );
                     }
                     let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
                         let dirstate = extract_dirstate(py, &map)?;
                         DirsMultiset::from_dirstate(&dirstate, skip_state)
                             .map_err(|e| {
                                 PyErr::new::<exc::ValueError, _>(py, e.to_string())
                             })?
                     } else {
                         let map: Result<Vec<HgPathBuf>, PyErr> = map
                             .iter(py)?
                             .map(|o| {
                                 Ok(HgPathBuf::from_bytes(
                                     o?.extract::<PyBytes>(py)?.data(py),
                                 ))
                             })
                             .collect();
                         DirsMultiset::from_manifest(&map?)
                             .map_err(|e| {
                                 PyErr::new::<exc::ValueError, _>(py, e.to_string())
                             })?
                     };
                     Self::create_instance(py, inner)
                 }
                 def addpath(&self, path: PyObject) -> PyResult<PyObject> {
                     self.inner(py).borrow_mut().add_path(
                         HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
                     ).and(Ok(py.None())).or_else(|e| {
                         match e {
                             DirstateMapError::EmptyPath => {
                                 Ok(py.None())
                             },
                             e => {
                                 Err(PyErr::new::<exc::ValueError, _>(
                                     py,
                                     e.to_string(),
                                 ))
                             }
                         }
                     })
                 }
                 def delpath(&self, path: PyObject) -> PyResult<PyObject> {
                     self.inner(py).borrow_mut().delete_path(
                         HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
                     )
                         .and(Ok(py.None()))
                         .or_else(|e| {
                             match e {
                                 DirstateMapError::EmptyPath => {
                                     Ok(py.None())
                                 },
                                 e => {
                                     Err(PyErr::new::<exc::ValueError, _>(
                                         py,
                                         e.to_string(),
                                     ))
                                 }
                             }
                         })
                 }
                 def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
                     let leaked_ref = self.inner(py).leak_immutable();
                     DirsMultisetKeysIterator::from_inner(
                         py,
                         unsafe { leaked_ref.map(py, |o| o.iter()) },
                     )
                 }
                 def __contains__(&self, item: PyObject) -> PyResult<bool> {
                     Ok(self.inner(py).borrow().contains(HgPath::new(
                         item.extract::<PyBytes>(py)?.data(py).as_ref(),
                     )))
                 }
             });
             impl Dirs {
                 pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
                     Self::create_instance(py, d)
                 }
                 fn translate_key(
                     py: Python,
                     res: &HgPathBuf,
                 ) -> PyResult<Option<PyBytes>> {
-                    Ok(Some(PyBytes::new(py, res.as_ref())))
+                    Ok(Some(PyBytes::new(py, res.as_bytes())))
                 }
             }
             py_shared_iterator!(
                 DirsMultisetKeysIterator,
                 UnsafePyLeaked<DirsMultisetIter<'static>>,
                 Dirs::translate_key,
                 Option<PyBytes>
             );

rust/hg-cpython/src/dirstate/dirstate_map.rs

0 +12 -8

             // dirstate_map.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
             //! `hg-core` package.
             use std::cell::{Ref, RefCell};
             use std::convert::TryInto;
             use std::time::Duration;
             use cpython::{
                 exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList,
                 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
                 UnsafePyLeaked,
             };
             use crate::{
                 dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
                 dirstate::non_normal_entries::{
                     NonNormalEntries, NonNormalEntriesIterator,
                 },
                 dirstate::{dirs_multiset::Dirs, make_dirstate_tuple},
             };
             use hg::{
                 utils::hg_path::{HgPath, HgPathBuf},
                 DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
                 DirstateMapError, DirstateParents, DirstateParseError, EntryState,
                 StateMapIter, PARENT_SIZE,
             };
             // TODO
             //     This object needs to share references to multiple members of its Rust
             //     inner struct, namely `copy_map`, `dirs` and `all_dirs`.
             //     Right now `CopyMap` is done, but it needs to have an explicit reference
             //     to `RustDirstateMap` which itself needs to have an encapsulation for
             //     every method in `CopyMap` (copymapcopy, etc.).
             //     This is ugly and hard to maintain.
             //     The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
             //     `py_class!` is already implemented and does not mention
             //     `RustDirstateMap`, rightfully so.
             //     All attributes also have to have a separate refcount data attribute for
             //     leaks, with all methods that go along for reference sharing.
             py_class!(pub class DirstateMap |py| {
                 @shared data inner: RustDirstateMap;
                 def __new__(_cls, _root: PyObject) -> PyResult<Self> {
                     let inner = RustDirstateMap::default();
                     Self::create_instance(py, inner)
                 }
                 def clear(&self) -> PyResult<PyObject> {
                     self.inner(py).borrow_mut().clear();
                     Ok(py.None())
                 }
                 def get(
                     &self,
                     key: PyObject,
                     default: Option<PyObject> = None
                 ) -> PyResult<Option<PyObject>> {
                     let key = key.extract::<PyBytes>(py)?;
                     match self.inner(py).borrow().get(HgPath::new(key.data(py))) {
                         Some(entry) => {
                             Ok(Some(make_dirstate_tuple(py, entry)?))
                         },
                         None => Ok(default)
                     }
                 }
                 def addfile(
                     &self,
                     f: PyObject,
                     oldstate: PyObject,
                     state: PyObject,
                     mode: PyObject,
                     size: PyObject,
                     mtime: PyObject
                 ) -> PyResult<PyObject> {
                     self.inner(py).borrow_mut().add_file(
                         HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
                         oldstate.extract::<PyBytes>(py)?.data(py)[0]
                             .try_into()
                             .map_err(|e: DirstateParseError| {
                                 PyErr::new::<exc::ValueError, _>(py, e.to_string())
                             })?,
                         DirstateEntry {
                             state: state.extract::<PyBytes>(py)?.data(py)[0]
                                 .try_into()
                                 .map_err(|e: DirstateParseError| {
                                     PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                 })?,
                             mode: mode.extract(py)?,
                             size: size.extract(py)?,
                             mtime: mtime.extract(py)?,
                         },
                     ).and(Ok(py.None())).or_else(|e: DirstateMapError| {
                         Err(PyErr::new::<exc::ValueError, _>(py, e.to_string()))
                     })
                 }
                 def removefile(
                     &self,
                     f: PyObject,
                     oldstate: PyObject,
                     size: PyObject
                 ) -> PyResult<PyObject> {
                     self.inner(py).borrow_mut()
                         .remove_file(
                             HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
                             oldstate.extract::<PyBytes>(py)?.data(py)[0]
                                 .try_into()
                                 .map_err(|e: DirstateParseError| {
                                     PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                 })?,
                             size.extract(py)?,
                         )
                         .or_else(|_| {
                             Err(PyErr::new::<exc::OSError, _>(
                                 py,
                                 "Dirstate error".to_string(),
                             ))
                         })?;
                     Ok(py.None())
                 }
                 def dropfile(
                     &self,
                     f: PyObject,
                     oldstate: PyObject
                 ) -> PyResult<PyBool> {
                     self.inner(py).borrow_mut()
                         .drop_file(
                             HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
                             oldstate.extract::<PyBytes>(py)?.data(py)[0]
                                 .try_into()
                                 .map_err(|e: DirstateParseError| {
                                     PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                 })?,
                         )
                         .and_then(|b| Ok(b.to_py_object(py)))
                         .or_else(|_| {
                             Err(PyErr::new::<exc::OSError, _>(
                                 py,
                                 "Dirstate error".to_string(),
                             ))
                         })
                 }
                 def clearambiguoustimes(
                     &self,
                     files: PyObject,
                     now: PyObject
                 ) -> PyResult<PyObject> {
                     let files: PyResult<Vec<HgPathBuf>> = files
                         .iter(py)?
                         .map(|filename| {
                             Ok(HgPathBuf::from_bytes(
                                 filename?.extract::<PyBytes>(py)?.data(py),
                             ))
                         })
                         .collect();
                     self.inner(py).borrow_mut()
                         .clear_ambiguous_times(files?, now.extract(py)?);
                     Ok(py.None())
                 }
                 def other_parent_entries(&self) -> PyResult<PyObject> {
                     let mut inner_shared = self.inner(py).borrow_mut();
                     let (_, other_parent) =
                         inner_shared.get_non_normal_other_parent_entries();
                     let locals = PyDict::new(py);
                     locals.set_item(
                         py,
                         "other_parent",
                         other_parent
                             .iter()
-                            .map(|v| PyBytes::new(py, v.as_ref()))
+                            .map(|v| PyBytes::new(py, v.as_bytes()))
                             .collect::<Vec<PyBytes>>()
                             .to_py_object(py),
                     )?;
                     py.eval("set(other_parent)", None, Some(&locals))
                 }
                 def non_normal_entries(&self) -> PyResult<NonNormalEntries> {
                     NonNormalEntries::from_inner(py, self.clone_ref(py))
                 }
                 def non_normal_entries_contains(&self, key: PyObject) -> PyResult<bool> {
                     let key = key.extract::<PyBytes>(py)?;
                     Ok(self
                         .inner(py)
                         .borrow_mut()
                         .get_non_normal_other_parent_entries().0
                         .contains(HgPath::new(key.data(py))))
                 }
                 def non_normal_entries_display(&self) -> PyResult<PyString> {
                     Ok(
                         PyString::new(
                             py,
                             &format!(
                                 "NonNormalEntries: {:?}",
                                 self
                                     .inner(py)
                                     .borrow_mut()
                                     .get_non_normal_other_parent_entries().0
                                     .iter().map(|o| o))
                             )
                         )
                 }
                 def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> {
                     let key = key.extract::<PyBytes>(py)?;
                     self
                         .inner(py)
                         .borrow_mut()
                         .non_normal_entries_remove(HgPath::new(key.data(py)));
                     Ok(py.None())
                 }
                 def non_normal_entries_union(&self, other: PyObject) -> PyResult<PyList> {
                     let other: PyResult<_> = other.iter(py)?
                                 .map(|f| {
                                     Ok(HgPathBuf::from_bytes(
                                         f?.extract::<PyBytes>(py)?.data(py),
                                     ))
                                 })
                                 .collect();
                     let res = self
                         .inner(py)
                         .borrow_mut()
                         .non_normal_entries_union(other?);
                     let ret = PyList::new(py, &[]);
                     for filename in res.iter() {
                         let as_pystring = PyBytes::new(py, filename.as_bytes());
                         ret.append(py, as_pystring.into_object());
                     }
                     Ok(ret)
                 }
                 def non_normal_entries_iter(&self) -> PyResult<NonNormalEntriesIterator> {
                     // Make sure the sets are defined before we no longer have a mutable
                     // reference to the dmap.
                     self.inner(py)
                         .borrow_mut()
                         .set_non_normal_other_parent_entries(false);
                     let leaked_ref = self.inner(py).leak_immutable();
                     NonNormalEntriesIterator::from_inner(py, unsafe {
                         leaked_ref.map(py, |o| {
                             o.get_non_normal_other_parent_entries_panic().0.iter()
                         })
                     })
                 }
                 def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
                     let d = d.extract::<PyBytes>(py)?;
                     Ok(self.inner(py).borrow_mut()
                         .has_tracked_dir(HgPath::new(d.data(py)))
                         .map_err(|e| {
                             PyErr::new::<exc::ValueError, _>(py, e.to_string())
                         })?
                         .to_py_object(py))
                 }
                 def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
                     let d = d.extract::<PyBytes>(py)?;
                     Ok(self.inner(py).borrow_mut()
                         .has_dir(HgPath::new(d.data(py)))
                         .map_err(|e| {
                             PyErr::new::<exc::ValueError, _>(py, e.to_string())
                         })?
                         .to_py_object(py))
                 }
                 def parents(&self, st: PyObject) -> PyResult<PyTuple> {
                     self.inner(py).borrow_mut()
                         .parents(st.extract::<PyBytes>(py)?.data(py))
                         .and_then(|d| {
                             Ok((PyBytes::new(py, &d.p1), PyBytes::new(py, &d.p2))
                                 .to_py_object(py))
                         })
                         .or_else(|_| {
                             Err(PyErr::new::<exc::OSError, _>(
                                 py,
                                 "Dirstate error".to_string(),
                             ))
                         })
                 }
                 def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
                     let p1 = extract_node_id(py, &p1)?;
                     let p2 = extract_node_id(py, &p2)?;
                     self.inner(py).borrow_mut()
                         .set_parents(&DirstateParents { p1, p2 });
                     Ok(py.None())
                 }
                 def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
                     match self.inner(py).borrow_mut()
                         .read(st.extract::<PyBytes>(py)?.data(py))
                     {
                         Ok(Some(parents)) => Ok(Some(
                             (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
                                 .to_py_object(py)
                                 .into_object(),
                         )),
                         Ok(None) => Ok(Some(py.None())),
                         Err(_) => Err(PyErr::new::<exc::OSError, _>(
                             py,
                             "Dirstate error".to_string(),
                         )),
                     }
                 }
                 def write(
                     &self,
                     p1: PyObject,
                     p2: PyObject,
                     now: PyObject
                 ) -> PyResult<PyBytes> {
                     let now = Duration::new(now.extract(py)?, 0);
                     let parents = DirstateParents {
                         p1: extract_node_id(py, &p1)?,
                         p2: extract_node_id(py, &p2)?,
                     };
                     match self.inner(py).borrow_mut().pack(parents, now) {
                         Ok(packed) => Ok(PyBytes::new(py, &packed)),
                         Err(_) => Err(PyErr::new::<exc::OSError, _>(
                             py,
                             "Dirstate error".to_string(),
                         )),
                     }
                 }
                 def filefoldmapasdict(&self) -> PyResult<PyDict> {
                     let dict = PyDict::new(py);
                     for (key, value) in
                         self.inner(py).borrow_mut().build_file_fold_map().iter()
                     {
-                        dict.set_item(py, key.as_ref().to_vec(), value.as_ref().to_vec())?;
+                        dict.set_item(
+                            py,
+                            key.as_bytes().to_vec(),
+                            value.as_bytes().to_vec(),
+                        )?;
                     }
                     Ok(dict)
                 }
                 def __len__(&self) -> PyResult<usize> {
                     Ok(self.inner(py).borrow().len())
                 }
                 def __contains__(&self, key: PyObject) -> PyResult<bool> {
                     let key = key.extract::<PyBytes>(py)?;
                     Ok(self.inner(py).borrow().contains_key(HgPath::new(key.data(py))))
                 }
                 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
                     let key = key.extract::<PyBytes>(py)?;
                     let key = HgPath::new(key.data(py));
                     match self.inner(py).borrow().get(key) {
                         Some(entry) => {
                             Ok(make_dirstate_tuple(py, entry)?)
                         },
                         None => Err(PyErr::new::<exc::KeyError, _>(
                             py,
                             String::from_utf8_lossy(key.as_bytes()),
                         )),
                     }
                 }
                 def keys(&self) -> PyResult<DirstateMapKeysIterator> {
                     let leaked_ref = self.inner(py).leak_immutable();
                     DirstateMapKeysIterator::from_inner(
                         py,
                         unsafe { leaked_ref.map(py, |o| o.iter()) },
                     )
                 }
                 def items(&self) -> PyResult<DirstateMapItemsIterator> {
                     let leaked_ref = self.inner(py).leak_immutable();
                     DirstateMapItemsIterator::from_inner(
                         py,
                         unsafe { leaked_ref.map(py, |o| o.iter()) },
                     )
                 }
                 def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
                     let leaked_ref = self.inner(py).leak_immutable();
                     DirstateMapKeysIterator::from_inner(
                         py,
                         unsafe { leaked_ref.map(py, |o| o.iter()) },
                     )
                 }
                 def getdirs(&self) -> PyResult<Dirs> {
                     // TODO don't copy, share the reference
                     self.inner(py).borrow_mut().set_dirs()
                         .map_err(|e| {
                             PyErr::new::<exc::ValueError, _>(py, e.to_string())
                         })?;
                     Dirs::from_inner(
                         py,
                         DirsMultiset::from_dirstate(
                             &self.inner(py).borrow(),
                             Some(EntryState::Removed),
                         )
                         .map_err(|e| {
                             PyErr::new::<exc::ValueError, _>(py, e.to_string())
                         })?,
                     )
                 }
                 def getalldirs(&self) -> PyResult<Dirs> {
                     // TODO don't copy, share the reference
                     self.inner(py).borrow_mut().set_all_dirs()
                         .map_err(|e| {
                             PyErr::new::<exc::ValueError, _>(py, e.to_string())
                         })?;
                     Dirs::from_inner(
                         py,
                         DirsMultiset::from_dirstate(
                             &self.inner(py).borrow(),
                             None,
                         ).map_err(|e| {
                             PyErr::new::<exc::ValueError, _>(py, e.to_string())
                         })?,
                     )
                 }
                 // TODO all copymap* methods, see docstring above
                 def copymapcopy(&self) -> PyResult<PyDict> {
                     let dict = PyDict::new(py);
                     for (key, value) in self.inner(py).borrow().copy_map.iter() {
                         dict.set_item(
                             py,
-                            PyBytes::new(py, key.as_ref()),
+                            PyBytes::new(py, key.as_bytes()),
-                            PyBytes::new(py, value.as_ref()),
+                            PyBytes::new(py, value.as_bytes()),
                         )?;
                     }
                     Ok(dict)
                 }
                 def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
                     let key = key.extract::<PyBytes>(py)?;
                     match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) {
-                        Some(copy) => Ok(PyBytes::new(py, copy.as_ref())),
+                        Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
                         None => Err(PyErr::new::<exc::KeyError, _>(
                             py,
                             String::from_utf8_lossy(key.data(py)),
                         )),
                     }
                 }
                 def copymap(&self) -> PyResult<CopyMap> {
                     CopyMap::from_inner(py, self.clone_ref(py))
                 }
                 def copymaplen(&self) -> PyResult<usize> {
                     Ok(self.inner(py).borrow().copy_map.len())
                 }
                 def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
                     let key = key.extract::<PyBytes>(py)?;
                     Ok(self
                         .inner(py)
                         .borrow()
                         .copy_map
                         .contains_key(HgPath::new(key.data(py))))
                 }
                 def copymapget(
                     &self,
                     key: PyObject,
                     default: Option<PyObject>
                 ) -> PyResult<Option<PyObject>> {
                     let key = key.extract::<PyBytes>(py)?;
                     match self
                         .inner(py)
                         .borrow()
                         .copy_map
                         .get(HgPath::new(key.data(py)))
                     {
                         Some(copy) => Ok(Some(
-                            PyBytes::new(py, copy.as_ref()).into_object(),
+                            PyBytes::new(py, copy.as_bytes()).into_object(),
                         )),
                         None => Ok(default),
                     }
                 }
                 def copymapsetitem(
                     &self,
                     key: PyObject,
                     value: PyObject
                 ) -> PyResult<PyObject> {
                     let key = key.extract::<PyBytes>(py)?;
                     let value = value.extract::<PyBytes>(py)?;
                     self.inner(py).borrow_mut().copy_map.insert(
                         HgPathBuf::from_bytes(key.data(py)),
                         HgPathBuf::from_bytes(value.data(py)),
                     );
                     Ok(py.None())
                 }
                 def copymappop(
                     &self,
                     key: PyObject,
                     default: Option<PyObject>
                 ) -> PyResult<Option<PyObject>> {
                     let key = key.extract::<PyBytes>(py)?;
                     match self
                         .inner(py)
                         .borrow_mut()
                         .copy_map
                         .remove(HgPath::new(key.data(py)))
                     {
                         Some(_) => Ok(None),
                         None => Ok(default),
                     }
                 }
                 def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
                     let leaked_ref = self.inner(py).leak_immutable();
                     CopyMapKeysIterator::from_inner(
                         py,
                         unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
                     )
                 }
                 def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
                     let leaked_ref = self.inner(py).leak_immutable();
                     CopyMapItemsIterator::from_inner(
                         py,
                         unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
                     )
                 }
             });
             impl DirstateMap {
                 pub fn get_inner<'a>(
                     &'a self,
                     py: Python<'a>,
                 ) -> Ref<'a, RustDirstateMap> {
                     self.inner(py).borrow()
                 }
                 fn translate_key(
                     py: Python,
                     res: (&HgPathBuf, &DirstateEntry),
                 ) -> PyResult<Option<PyBytes>> {
-                    Ok(Some(PyBytes::new(py, res.0.as_ref())))
+                    Ok(Some(PyBytes::new(py, res.0.as_bytes())))
                 }
                 fn translate_key_value(
                     py: Python,
                     res: (&HgPathBuf, &DirstateEntry),
                 ) -> PyResult<Option<(PyBytes, PyObject)>> {
                     let (f, entry) = res;
                     Ok(Some((
-                        PyBytes::new(py, f.as_ref()),
+                        PyBytes::new(py, f.as_bytes()),
                         make_dirstate_tuple(py, entry)?,
                     )))
                 }
             }
             py_shared_iterator!(
                 DirstateMapKeysIterator,
                 UnsafePyLeaked<StateMapIter<'static>>,
                 DirstateMap::translate_key,
                 Option<PyBytes>
             );
             py_shared_iterator!(
                 DirstateMapItemsIterator,
                 UnsafePyLeaked<StateMapIter<'static>>,
                 DirstateMap::translate_key_value,
                 Option<(PyBytes, PyObject)>
             );
             fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<[u8; PARENT_SIZE]> {
                 let bytes = obj.extract::<PyBytes>(py)?;
                 match bytes.data(py).try_into() {
                     Ok(s) => Ok(s),
                     Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
                 }
             }

rust/hg-cpython/src/dirstate/non_normal_entries.rs

0 +1 -1

             // non_normal_other_parent_entries.rs
             //
             // Copyright 2020 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use cpython::{
                 exc::NotImplementedError, CompareOp, ObjectProtocol, PyBytes, PyClone,
                 PyErr, PyList, PyObject, PyResult, PyString, Python, PythonObject,
                 ToPyObject, UnsafePyLeaked,
             };
             use crate::dirstate::DirstateMap;
             use hg::utils::hg_path::HgPathBuf;
             use std::cell::RefCell;
             use std::collections::hash_set;
             py_class!(pub class NonNormalEntries |py| {
                 data dmap: DirstateMap;
                 def __contains__(&self, key: PyObject) -> PyResult<bool> {
                     self.dmap(py).non_normal_entries_contains(py, key)
                 }
                 def remove(&self, key: PyObject) -> PyResult<PyObject> {
                     self.dmap(py).non_normal_entries_remove(py, key)
                 }
                 def union(&self, other: PyObject) -> PyResult<PyList> {
                     self.dmap(py).non_normal_entries_union(py, other)
                 }
                 def __richcmp__(&self, other: PyObject, op: CompareOp) -> PyResult<bool> {
                     match op {
                         CompareOp::Eq => self.is_equal_to(py, other),
                         CompareOp::Ne => Ok(!self.is_equal_to(py, other)?),
                         _ => Err(PyErr::new::<NotImplementedError, _>(py, ""))
                     }
                 }
                 def __repr__(&self) -> PyResult<PyString> {
                     self.dmap(py).non_normal_entries_display(py)
                 }
                 def __iter__(&self) -> PyResult<NonNormalEntriesIterator> {
                     self.dmap(py).non_normal_entries_iter(py)
                 }
             });
             impl NonNormalEntries {
                 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
                     Self::create_instance(py, dm)
                 }
                 fn is_equal_to(&self, py: Python, other: PyObject) -> PyResult<bool> {
                     for item in other.iter(py)? {
                         if !self.dmap(py).non_normal_entries_contains(py, item?)? {
                             return Ok(false);
                         }
                     }
                     Ok(true)
                 }
                 fn translate_key(
                     py: Python,
                     key: &HgPathBuf,
                 ) -> PyResult<Option<PyBytes>> {
-                    Ok(Some(PyBytes::new(py, key.as_ref())))
+                    Ok(Some(PyBytes::new(py, key.as_bytes())))
                 }
             }
             type NonNormalEntriesIter<'a> = hash_set::Iter<'a, HgPathBuf>;
             py_shared_iterator!(
                 NonNormalEntriesIterator,
                 UnsafePyLeaked<NonNormalEntriesIter<'static>>,
                 NonNormalEntries::translate_key,
                 Option<PyBytes>
             );

rust/hg-cpython/src/dirstate/status.rs

0 +4 -6

             // status.rs
             //
             // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Bindings for the `hg::status` module provided by the
             //! `hg-core` crate. From Python, this will be seen as
             //! `rustext.dirstate.status`.
             use crate::{dirstate::DirstateMap, exceptions::FallbackError};
             use cpython::{
                 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
                 PyResult, PyTuple, Python, PythonObject, ToPyObject,
             };
             use hg::{
                 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
                 parse_pattern_syntax, status,
                 utils::{
                     files::{get_bytes_from_path, get_path_from_bytes},
                     hg_path::{HgPath, HgPathBuf},
                 },
                 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
                 StatusOptions,
             };
             use std::borrow::{Borrow, Cow};
             /// This will be useless once trait impls for collection are added to `PyBytes`
             /// upstream.
             fn collect_pybytes_list(
                 py: Python,
                 collection: &[impl AsRef<HgPath>],
             ) -> PyList {
                 let list = PyList::new(py, &[]);
                 for path in collection.iter() {
                     list.append(
                         py,
                         PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
                     )
                 }
                 list
             }
             fn collect_bad_matches(
                 py: Python,
                 collection: &[(impl AsRef<HgPath>, BadMatch)],
             ) -> PyResult<PyList> {
                 let list = PyList::new(py, &[]);
                 let os = py.import("os")?;
                 let get_error_message = |code: i32| -> PyResult<_> {
                     os.call(
                         py,
                         "strerror",
                         PyTuple::new(py, &[code.to_py_object(py).into_object()]),
                         None,
                     )
                 };
                 for (path, bad_match) in collection.iter() {
                     let message = match bad_match {
                         BadMatch::OsError(code) => get_error_message(*code)?,
                         BadMatch::BadType(bad_type) => format!(
                             "unsupported file type (type is {})",
                             bad_type.to_string()
                         )
                         .to_py_object(py)
                         .into_object(),
                     };
                     list.append(
                         py,
                         (PyBytes::new(py, path.as_ref().as_bytes()), message)
                             .to_py_object(py)
                             .into_object(),
                     )
                 }
                 Ok(list)
             }
             fn handle_fallback(py: Python, err: StatusError) -> PyErr {
                 match err {
                     StatusError::Pattern(e) => {
                         let as_string = e.to_string();
                         log::trace!("Rust status fallback: `{}`", &as_string);
                         PyErr::new::<FallbackError, _>(py, &as_string)
                     }
                     e => PyErr::new::<ValueError, _>(py, e.to_string()),
                 }
             }
             pub fn status_wrapper(
                 py: Python,
                 dmap: DirstateMap,
                 matcher: PyObject,
                 root_dir: PyObject,
                 ignore_files: PyList,
                 check_exec: bool,
                 last_normal_time: i64,
                 list_clean: bool,
                 list_ignored: bool,
                 list_unknown: bool,
                 collect_traversed_dirs: bool,
             ) -> PyResult<PyTuple> {
                 let bytes = root_dir.extract::<PyBytes>(py)?;
                 let root_dir = get_path_from_bytes(bytes.data(py));
                 let dmap: DirstateMap = dmap.to_py_object(py);
                 let dmap = dmap.get_inner(py);
                 let ignore_files: PyResult<Vec<_>> = ignore_files
                     .iter(py)
                     .map(|b| {
                         let file = b.extract::<PyBytes>(py)?;
                         Ok(get_path_from_bytes(file.data(py)).to_owned())
                     })
                     .collect();
                 let ignore_files = ignore_files?;
                 match matcher.get_type(py).name(py).borrow() {
                     "alwaysmatcher" => {
                         let matcher = AlwaysMatcher;
                         let ((lookup, status_res), warnings) = status(
                             &dmap,
                             &matcher,
                             &root_dir,
                             ignore_files,
                             StatusOptions {
                                 check_exec,
                                 last_normal_time,
                                 list_clean,
                                 list_ignored,
                                 list_unknown,
                                 collect_traversed_dirs,
                             },
                         )
                         .map_err(|e| handle_fallback(py, e))?;
                         build_response(py, lookup, status_res, warnings)
                     }
                     "exactmatcher" => {
                         let files = matcher.call_method(
                             py,
                             "files",
                             PyTuple::new(py, &[]),
                             None,
                         )?;
                         let files: PyList = files.cast_into(py)?;
                         let files: PyResult<Vec<HgPathBuf>> = files
                             .iter(py)
                             .map(|f| {
                                 Ok(HgPathBuf::from_bytes(
                                     f.extract::<PyBytes>(py)?.data(py),
                                 ))
                             })
                             .collect();
                         let files = files?;
                         let matcher = FileMatcher::new(&files)
                             .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
                         let ((lookup, status_res), warnings) = status(
                             &dmap,
                             &matcher,
                             &root_dir,
                             ignore_files,
                             StatusOptions {
                                 check_exec,
                                 last_normal_time,
                                 list_clean,
                                 list_ignored,
                                 list_unknown,
                                 collect_traversed_dirs,
                             },
                         )
                         .map_err(|e| handle_fallback(py, e))?;
                         build_response(py, lookup, status_res, warnings)
                     }
                     "includematcher" => {
                         // Get the patterns from Python even though most of them are
                         // redundant with those we will parse later on, as they include
                         // those passed from the command line.
                         let ignore_patterns: PyResult<Vec<_>> = matcher
                             .getattr(py, "_kindpats")?
                             .iter(py)?
                             .map(|k| {
                                 let k = k?;
                                 let syntax = parse_pattern_syntax(
                                     &[
                                         k.get_item(py, 0)?
                                             .extract::<PyBytes>(py)?
                                             .data(py),
                                         &b":"[..],
                                     ]
                                     .concat(),
                                 )
                                 .map_err(|e| {
                                     handle_fallback(py, StatusError::Pattern(e))
                                 })?;
                                 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
                                 let pattern = pattern.data(py);
                                 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
                                 let source = get_path_from_bytes(source.data(py));
                                 let new = IgnorePattern::new(syntax, pattern, source);
                                 Ok(new)
                             })
                             .collect();
                         let ignore_patterns = ignore_patterns?;
                         let mut all_warnings = vec![];
                         let (matcher, warnings) =
                             IncludeMatcher::new(ignore_patterns, &root_dir)
                                 .map_err(|e| handle_fallback(py, e.into()))?;
                         all_warnings.extend(warnings);
                         let ((lookup, status_res), warnings) = status(
                             &dmap,
                             &matcher,
                             &root_dir,
                             ignore_files,
                             StatusOptions {
                                 check_exec,
                                 last_normal_time,
                                 list_clean,
                                 list_ignored,
                                 list_unknown,
                                 collect_traversed_dirs,
                             },
                         )
                         .map_err(|e| handle_fallback(py, e))?;
                         all_warnings.extend(warnings);
                         build_response(py, lookup, status_res, all_warnings)
                     }
-                    e => {
+                    e => Err(PyErr::new::<ValueError, _>(
-                        return Err(PyErr::new::<ValueError, _>(
+                        py,
-                            py,
+                        format!("Unsupported matcher {}", e),
-                            format!("Unsupported matcher {}", e),
+                    )),
-                        ));
                 }
             }
             fn build_response(
                 py: Python,
                 lookup: Vec<Cow<HgPath>>,
                 status_res: DirstateStatus,
                 warnings: Vec<PatternFileWarning>,
             ) -> PyResult<PyTuple> {
                 let modified = collect_pybytes_list(py, status_res.modified.as_ref());
                 let added = collect_pybytes_list(py, status_res.added.as_ref());
                 let removed = collect_pybytes_list(py, status_res.removed.as_ref());
                 let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
                 let clean = collect_pybytes_list(py, status_res.clean.as_ref());
                 let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
                 let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
                 let lookup = collect_pybytes_list(py, lookup.as_ref());
                 let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
                 let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
                 let py_warnings = PyList::new(py, &[]);
                 for warning in warnings.iter() {
                     // We use duck-typing on the Python side for dispatch, good enough for
                     // now.
                     match warning {
                         PatternFileWarning::InvalidSyntax(file, syn) => {
                             py_warnings.append(
                                 py,
                                 (
                                     PyBytes::new(py, &get_bytes_from_path(&file)),
                                     PyBytes::new(py, syn),
                                 )
                                     .to_py_object(py)
                                     .into_object(),
                             );
                         }
                         PatternFileWarning::NoSuchFile(file) => py_warnings.append(
                             py,
                             PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
                         ),
                     }
                 }
                 Ok(PyTuple::new(
                     py,
                     &[
                         lookup.into_object(),
                         modified.into_object(),
                         added.into_object(),
                         removed.into_object(),
                         deleted.into_object(),
                         clean.into_object(),
                         ignored.into_object(),
                         unknown.into_object(),
                         py_warnings.into_object(),
                         bad.into_object(),
                         traversed.into_object(),
                     ][..],
                 ))
             }

rust/hg-cpython/src/parsers.rs

0 +4 -4

             // parsers.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Bindings for the `hg::dirstate::parsers` module provided by the
             //! `hg-core` package.
             //!
             //! From Python, this will be seen as `mercurial.rustext.parsers`
             use cpython::{
                 exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
                 PythonObject, ToPyObject,
             };
             use hg::{
                 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
                 DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
                 PARENT_SIZE,
             };
             use std::convert::TryInto;
             use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
             use std::time::Duration;
             fn parse_dirstate_wrapper(
                 py: Python,
                 dmap: PyDict,
                 copymap: PyDict,
                 st: PyBytes,
             ) -> PyResult<PyTuple> {
                 let mut dirstate_map = FastHashMap::default();
                 let mut copies = FastHashMap::default();
                 match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
                     Ok(parents) => {
                         for (filename, entry) in &dirstate_map {
                             dmap.set_item(
                                 py,
-                                PyBytes::new(py, filename.as_ref()),
+                                PyBytes::new(py, filename.as_bytes()),
                                 make_dirstate_tuple(py, entry)?,
                             )?;
                         }
                         for (path, copy_path) in copies {
                             copymap.set_item(
                                 py,
-                                PyBytes::new(py, path.as_ref()),
+                                PyBytes::new(py, path.as_bytes()),
-                                PyBytes::new(py, copy_path.as_ref()),
+                                PyBytes::new(py, copy_path.as_bytes()),
                             )?;
                         }
                         Ok(
                             (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
                                 .to_py_object(py),
                         )
                     }
                     Err(e) => Err(PyErr::new::<exc::ValueError, _>(
                         py,
                         match e {
                             DirstateParseError::TooLittleData => {
                                 "too little data for parents".to_string()
                             }
                             DirstateParseError::Overflow => {
                                 "overflow in dirstate".to_string()
                             }
                             DirstateParseError::CorruptedEntry(e) => e,
                             DirstateParseError::Damaged => {
                                 "dirstate appears to be damaged".to_string()
                             }
                         },
                     )),
                 }
             }
             fn pack_dirstate_wrapper(
                 py: Python,
                 dmap: PyDict,
                 copymap: PyDict,
                 pl: PyTuple,
                 now: PyInt,
             ) -> PyResult<PyBytes> {
                 let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
                 let p1: &[u8] = p1.data(py);
                 let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
                 let p2: &[u8] = p2.data(py);
                 let mut dirstate_map = extract_dirstate(py, &dmap)?;
                 let copies: Result<FastHashMap<HgPathBuf, HgPathBuf>, PyErr> = copymap
                     .items(py)
                     .iter()
                     .map(|(key, value)| {
                         Ok((
                             HgPathBuf::from_bytes(key.extract::<PyBytes>(py)?.data(py)),
                             HgPathBuf::from_bytes(value.extract::<PyBytes>(py)?.data(py)),
                         ))
                     })
                     .collect();
                 if p1.len() != PARENT_SIZE || p2.len() != PARENT_SIZE {
                     return Err(PyErr::new::<exc::ValueError, _>(
                         py,
                         "expected a 20-byte hash".to_string(),
                     ));
                 }
                 match pack_dirstate(
                     &mut dirstate_map,
                     &copies?,
                     DirstateParents {
                         p1: p1.try_into().unwrap(),
                         p2: p2.try_into().unwrap(),
                     },
                     Duration::from_secs(now.as_object().extract::<u64>(py)?),
                 ) {
                     Ok(packed) => {
                         for (filename, entry) in &dirstate_map {
                             dmap.set_item(
                                 py,
-                                PyBytes::new(py, filename.as_ref()),
+                                PyBytes::new(py, filename.as_bytes()),
                                 make_dirstate_tuple(py, entry)?,
                             )?;
                         }
                         Ok(PyBytes::new(py, &packed))
                     }
                     Err(error) => Err(PyErr::new::<exc::ValueError, _>(
                         py,
                         match error {
                             DirstatePackError::CorruptedParent => {
                                 "expected a 20-byte hash".to_string()
                             }
                             DirstatePackError::CorruptedEntry(e) => e,
                             DirstatePackError::BadSize(expected, actual) => {
                                 format!("bad dirstate size: {} != {}", actual, expected)
                             }
                         },
                     )),
                 }
             }
             /// Create the module, with `__package__` given from parent
             pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
                 let dotted_name = &format!("{}.parsers", package);
                 let m = PyModule::new(py, dotted_name)?;
                 m.add(py, "__package__", package)?;
                 m.add(py, "__doc__", "Parsers - Rust implementation")?;
                 m.add(
                     py,
                     "parse_dirstate",
                     py_fn!(
                         py,
                         parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
                     ),
                 )?;
                 m.add(
                     py,
                     "pack_dirstate",
                     py_fn!(
                         py,
                         pack_dirstate_wrapper(
                             dmap: PyDict,
                             copymap: PyDict,
                             pl: PyTuple,
                             now: PyInt
                         )
                     ),
                 )?;
                 let sys = PyModule::import(py, "sys")?;
                 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
                 sys_modules.set_item(py, dotted_name, &m)?;
                 Ok(m)
             }

rust/hg-cpython/src/utils.rs

0 +2 -5

             use cpython::exc::ValueError;
             use cpython::{PyBytes, PyDict, PyErr, PyObject, PyResult, PyTuple, Python};
             use hg::revlog::Node;
             use std::convert::TryFrom;
             #[allow(unused)]
             pub fn print_python_trace(py: Python) -> PyResult<PyObject> {
                 eprintln!("===============================");
                 eprintln!("Printing Python stack from Rust");
                 eprintln!("===============================");
                 let traceback = py.import("traceback")?;
                 let sys = py.import("sys")?;
                 let kwargs = PyDict::new(py);
                 kwargs.set_item(py, "file", sys.get(py, "stderr")?)?;
                 traceback.call(py, "print_stack", PyTuple::new(py, &[]), Some(&kwargs))
             }
             // Necessary evil for the time being, could maybe be moved to
             // a TryFrom in Node itself
             const NODE_BYTES_LENGTH: usize = 20;
             type NodeData = [u8; NODE_BYTES_LENGTH];
             /// Copy incoming Python bytes given as `PyObject` into `Node`,
             /// doing the necessary checks
             pub fn node_from_py_object<'a>(
                 py: Python,
                 bytes: &'a PyObject,
             ) -> PyResult<Node> {
                 let as_py_bytes: &'a PyBytes = bytes.extract(py)?;
                 node_from_py_bytes(py, as_py_bytes)
             }
             /// Clone incoming Python bytes given as `PyBytes` as a `Node`,
             /// doing the necessary checks.
-            pub fn node_from_py_bytes<'a>(
+            pub fn node_from_py_bytes(py: Python, bytes: &PyBytes) -> PyResult<Node> {
-                py: Python,
-                bytes: &'a PyBytes,
-            ) -> PyResult<Node> {
                 <NodeData>::try_from(bytes.data(py))
                     .map_err(|_| {
                         PyErr::new::<ValueError, _>(
                             py,
                             format!("{}-byte hash required", NODE_BYTES_LENGTH),
                         )
                     })
-                    .map(|n| n.into())
+                    .map(Into::into)
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages