upstream/mercurial-mirror Commit - r45500:26114bd6

rust: do a clippy pass...

Raphaël Gomès -

r45500:26114bd6 default

parent child

rust/hg-core/src/ancestors.rs

0 +11 -11

              // ancestors.rs
              //
              // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Rust versions of generic DAG ancestors algorithms for Mercurial
              use super::{Graph, GraphError, Revision, NULL_REVISION};
              use crate::dagops;
              use std::cmp::max;
              use std::collections::{BinaryHeap, HashSet};
              /// Iterator over the ancestors of a given list of revisions
              /// This is a generic type, defined and implemented for any Graph, so that
              /// it's easy to
              ///
              /// - unit test in pure Rust
              /// - bind to main Mercurial code, potentially in several ways and have these
              ///   bindings evolve over time
              pub struct AncestorsIterator<G: Graph> {
                  graph: G,
                  visit: BinaryHeap<Revision>,
                  seen: HashSet<Revision>,
                  stoprev: Revision,
              }
              /// Lazy ancestors set, backed by AncestorsIterator
              pub struct LazyAncestors<G: Graph + Clone> {
                  graph: G,
                  containsiter: AncestorsIterator<G>,
                  initrevs: Vec<Revision>,
                  stoprev: Revision,
                  inclusive: bool,
              }
              pub struct MissingAncestors<G: Graph> {
                  graph: G,
                  bases: HashSet<Revision>,
                  max_base: Revision,
              }
              impl<G: Graph> AncestorsIterator<G> {
                  /// Constructor.
                  ///
                  /// if `inclusive` is true, then the init revisions are emitted in
                  /// particular, otherwise iteration starts from their parents.
                  pub fn new(
                      graph: G,
                      initrevs: impl IntoIterator<Item = Revision>,
                      stoprev: Revision,
                      inclusive: bool,
                  ) -> Result<Self, GraphError> {
                      let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
                      if inclusive {
                          let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
-                         let seen = visit.iter().map(|&x| x).collect();
+                         let seen = visit.iter().cloned().collect();
                          return Ok(AncestorsIterator {
-                             visit: visit,
-                             seen: seen,
-                             stoprev: stoprev,
-                             graph: graph,
+                             visit,
+                             seen,
+                             stoprev,
+                             graph,
                          });
                      }
                      let mut this = AncestorsIterator {
                          visit: BinaryHeap::new(),
                          seen: HashSet::new(),
-                         stoprev: stoprev,
-                         graph: graph,
+                         stoprev,
+                         graph,
                      };
                      this.seen.insert(NULL_REVISION);
                      for rev in filtered_initrevs {
                          for parent in this.graph.parents(rev)?.iter().cloned() {
                              this.conditionally_push_rev(parent);
                          }
                      }
                      Ok(this)
                  }
                  #[inline]
                  fn conditionally_push_rev(&mut self, rev: Revision) {
                      if self.stoprev <= rev && self.seen.insert(rev) {
                          self.visit.push(rev);
                      }
                  }
                  /// Consumes partially the iterator to tell if the given target
                  /// revision
                  /// is in the ancestors it emits.
                  /// This is meant for iterators actually dedicated to that kind of
                  /// purpose
                  pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
                      if self.seen.contains(&target) && target != NULL_REVISION {
                          return Ok(true);
                      }
                      for item in self {
                          let rev = item?;
                          if rev == target {
                              return Ok(true);
                          }
                          if rev < target {
                              return Ok(false);
                          }
                      }
                      Ok(false)
                  }
                  pub fn peek(&self) -> Option<Revision> {
-                     self.visit.peek().map(|&r| r)
+                     self.visit.peek().cloned()
                  }
                  /// Tell if the iterator is about an empty set
                  ///
                  /// The result does not depend whether the iterator has been consumed
                  /// or not.
                  /// This is mostly meant for iterators backing a lazy ancestors set
                  pub fn is_empty(&self) -> bool {
                      if self.visit.len() > 0 {
                          return false;
                      }
                      if self.seen.len() > 1 {
                          return false;
                      }
                      // at this point, the seen set is at most a singleton.
                      // If not `self.inclusive`, it's still possible that it has only
                      // the null revision
                      self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
                  }
              }
              /// Main implementation for the iterator
              ///
              /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
              /// with a few non crucial differences:
              ///
              /// - there's no filtering of invalid parent revisions. Actually, it should be
              ///   consistent and more efficient to filter them from the end caller.
              /// - we don't have the optimization for adjacent revisions (i.e., the case
              ///   where `p1 == rev - 1`), because it amounts to update the first element of
              ///   the heap without sifting, which Rust's BinaryHeap doesn't let us do.
              /// - we save a few pushes by comparing with `stoprev` before pushing
              impl<G: Graph> Iterator for AncestorsIterator<G> {
                  type Item = Result<Revision, GraphError>;
                  fn next(&mut self) -> Option<Self::Item> {
                      let current = match self.visit.peek() {
                          None => {
                              return None;
                          }
                          Some(c) => *c,
                      };
                      let [p1, p2] = match self.graph.parents(current) {
                          Ok(ps) => ps,
                          Err(e) => return Some(Err(e)),
                      };
                      if p1 < self.stoprev || !self.seen.insert(p1) {
                          self.visit.pop();
                      } else {
                          *(self.visit.peek_mut().unwrap()) = p1;
                      };
                      self.conditionally_push_rev(p2);
                      Some(Ok(current))
                  }
              }
              impl<G: Graph + Clone> LazyAncestors<G> {
                  pub fn new(
                      graph: G,
                      initrevs: impl IntoIterator<Item = Revision>,
                      stoprev: Revision,
                      inclusive: bool,
                  ) -> Result<Self, GraphError> {
                      let v: Vec<Revision> = initrevs.into_iter().collect();
                      Ok(LazyAncestors {
                          graph: graph.clone(),
                          containsiter: AncestorsIterator::new(
                              graph,
                              v.iter().cloned(),
                              stoprev,
                              inclusive,
                          )?,
                          initrevs: v,
-                         stoprev: stoprev,
-                         inclusive: inclusive,
+                         stoprev,
+                         inclusive,
                      })
                  }
                  pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
                      self.containsiter.contains(rev)
                  }
                  pub fn is_empty(&self) -> bool {
                      self.containsiter.is_empty()
                  }
                  pub fn iter(&self) -> AncestorsIterator<G> {
                      // the arguments being the same as for self.containsiter, we know
                      // for sure that AncestorsIterator constructor can't fail
                      AncestorsIterator::new(
                          self.graph.clone(),
                          self.initrevs.iter().cloned(),
                          self.stoprev,
                          self.inclusive,
                      )
                      .unwrap()
                  }
              }
              impl<G: Graph> MissingAncestors<G> {
                  pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
                      let mut created = MissingAncestors {
-                         graph: graph,
+                         graph,
                          bases: HashSet::new(),
                          max_base: NULL_REVISION,
                      };
                      created.add_bases(bases);
                      created
                  }
                  pub fn has_bases(&self) -> bool {
                      !self.bases.is_empty()
                  }
                  /// Return a reference to current bases.
                  ///
                  /// This is useful in unit tests, but also setdiscovery.py does
                  /// read the bases attribute of a ancestor.missingancestors instance.
                  pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
                      &self.bases
                  }
                  /// Computes the relative heads of current bases.
                  ///
                  /// The object is still usable after this.
                  pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                      dagops::heads(&self.graph, self.bases.iter())
                  }
                  /// Consumes the object and returns the relative heads of its bases.
                  pub fn into_bases_heads(
                      mut self,
                  ) -> Result<HashSet<Revision>, GraphError> {
                      dagops::retain_heads(&self.graph, &mut self.bases)?;
                      Ok(self.bases)
                  }
                  /// Add some revisions to `self.bases`
                  ///
                  /// Takes care of keeping `self.max_base` up to date.
                  pub fn add_bases(
                      &mut self,
                      new_bases: impl IntoIterator<Item = Revision>,
                  ) {
                      let mut max_base = self.max_base;
                      self.bases.extend(
                          new_bases
                              .into_iter()
                              .filter(|&rev| rev != NULL_REVISION)
                              .map(|r| {
                                  if r > max_base {
                                      max_base = r;
                                  }
                                  r
                              }),
                      );
                      self.max_base = max_base;
                  }
                  /// Remove all ancestors of self.bases from the revs set (in place)
                  pub fn remove_ancestors_from(
                      &mut self,
                      revs: &mut HashSet<Revision>,
                  ) -> Result<(), GraphError> {
                      revs.retain(|r| !self.bases.contains(r));
                      // the null revision is always an ancestor. Logically speaking
                      // it's debatable in case bases is empty, but the Python
                      // implementation always adds NULL_REVISION to bases, making it
                      // unconditionnally true.
                      revs.remove(&NULL_REVISION);
                      if revs.is_empty() {
                          return Ok(());
                      }
                      // anything in revs > start is definitely not an ancestor of bases
                      // revs <= start need to be investigated
                      if self.max_base == NULL_REVISION {
                          return Ok(());
                      }
                      // whatever happens, we'll keep at least keepcount of them
                      // knowing this gives us a earlier stop condition than
                      // going all the way to the root
                      let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
                      let mut curr = self.max_base;
                      while curr != NULL_REVISION && revs.len() > keepcount {
                          if self.bases.contains(&curr) {
                              revs.remove(&curr);
                              self.add_parents(curr)?;
                          }
                          curr -= 1;
                      }
                      Ok(())
                  }
                  /// Add the parents of `rev` to `self.bases`
                  ///
                  /// This has no effect on `self.max_base`
                  #[inline]
                  fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
                      if rev == NULL_REVISION {
                          return Ok(());
                      }
                      for p in self.graph.parents(rev)?.iter().cloned() {
                          // No need to bother the set with inserting NULL_REVISION over and
                          // over
                          if p != NULL_REVISION {
                              self.bases.insert(p);
                          }
                      }
                      Ok(())
                  }
                  /// Return all the ancestors of revs that are not ancestors of self.bases
                  ///
                  /// This may include elements from revs.
                  ///
                  /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
                  /// revision number order, which is a topological order.
                  pub fn missing_ancestors(
                      &mut self,
                      revs: impl IntoIterator<Item = Revision>,
                  ) -> Result<Vec<Revision>, GraphError> {
                      // just for convenience and comparison with Python version
                      let bases_visit = &mut self.bases;
                      let mut revs: HashSet<Revision> = revs
                          .into_iter()
                          .filter(|r| !bases_visit.contains(r))
                          .collect();
                      let revs_visit = &mut revs;
                      let mut both_visit: HashSet<Revision> =
                          revs_visit.intersection(&bases_visit).cloned().collect();
                      if revs_visit.is_empty() {
                          return Ok(Vec::new());
                      }
                      let max_revs = revs_visit.iter().cloned().max().unwrap();
                      let start = max(self.max_base, max_revs);
                      // TODO heuristics for with_capacity()?
                      let mut missing: Vec<Revision> = Vec::new();
                      for curr in (0..=start).rev() {
                          if revs_visit.is_empty() {
                              break;
                          }
                          if both_visit.remove(&curr) {
                              // curr's parents might have made it into revs_visit through
                              // another path
                              for p in self.graph.parents(curr)?.iter().cloned() {
                                  if p == NULL_REVISION {
                                      continue;
                                  }
                                  revs_visit.remove(&p);
                                  bases_visit.insert(p);
                                  both_visit.insert(p);
                              }
                          } else if revs_visit.remove(&curr) {
                              missing.push(curr);
                              for p in self.graph.parents(curr)?.iter().cloned() {
                                  if p == NULL_REVISION {
                                      continue;
                                  }
                                  if bases_visit.contains(&p) {
                                      // p is already known to be an ancestor of revs_visit
                                      revs_visit.remove(&p);
                                      both_visit.insert(p);
                                  } else if both_visit.contains(&p) {
                                      // p should have been in bases_visit
                                      revs_visit.remove(&p);
                                      bases_visit.insert(p);
                                  } else {
                                      // visit later
                                      revs_visit.insert(p);
                                  }
                              }
                          } else if bases_visit.contains(&curr) {
                              for p in self.graph.parents(curr)?.iter().cloned() {
                                  if p == NULL_REVISION {
                                      continue;
                                  }
                                  if revs_visit.remove(&p) || both_visit.contains(&p) {
                                      // p is an ancestor of bases_visit, and is implicitly
                                      // in revs_visit, which means p is ::revs & ::bases.
                                      bases_visit.insert(p);
                                      both_visit.insert(p);
                                  } else {
                                      bases_visit.insert(p);
                                  }
                              }
                          }
                      }
                      missing.reverse();
                      Ok(missing)
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::testing::{SampleGraph, VecGraph};
                  use std::iter::FromIterator;
                  fn list_ancestors<G: Graph>(
                      graph: G,
                      initrevs: Vec<Revision>,
                      stoprev: Revision,
                      inclusive: bool,
                  ) -> Vec<Revision> {
                      AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
                          .unwrap()
                          .map(|res| res.unwrap())
                          .collect()
                  }
                  #[test]
                  /// Same tests as test-ancestor.py, without membership
                  /// (see also test-ancestor.py.out)
                  fn test_list_ancestor() {
                      assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![11, 13], 0, false),
                          vec![8, 7, 4, 3, 2, 1, 0]
                      );
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![1, 3], 0, false),
                          vec![1, 0]
                      );
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![11, 13], 0, true),
                          vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
                      );
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![11, 13], 6, false),
                          vec![8, 7]
                      );
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![11, 13], 6, true),
                          vec![13, 11, 8, 7]
                      );
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![11, 13], 11, true),
                          vec![13, 11]
                      );
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![11, 13], 12, true),
                          vec![13]
                      );
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![10, 1], 0, true),
                          vec![10, 5, 4, 2, 1, 0]
                      );
                  }
                  #[test]
                  /// Corner case that's not directly in test-ancestors.py, but
                  /// that happens quite often, as demonstrated by running the whole
                  /// suite.
                  /// For instance, run tests/test-obsolete-checkheads.t
                  fn test_nullrev_input() {
                      let mut iter =
                          AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
                      assert_eq!(iter.next(), None)
                  }
                  #[test]
                  fn test_contains() {
                      let mut lazy =
                          AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
                      assert!(lazy.contains(1).unwrap());
                      assert!(!lazy.contains(3).unwrap());
                      let mut lazy =
                          AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
                      assert!(!lazy.contains(NULL_REVISION).unwrap());
                  }
                  #[test]
                  fn test_peek() {
                      let mut iter =
                          AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
                      // peek() gives us the next value
                      assert_eq!(iter.peek(), Some(10));
                      // but it's not been consumed
                      assert_eq!(iter.next(), Some(Ok(10)));
                      // and iteration resumes normally
                      assert_eq!(iter.next(), Some(Ok(5)));
                      // let's drain the iterator to test peek() at the end
                      while iter.next().is_some() {}
                      assert_eq!(iter.peek(), None);
                  }
                  #[test]
                  fn test_empty() {
                      let mut iter =
                          AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
                      assert!(!iter.is_empty());
                      while iter.next().is_some() {}
                      assert!(!iter.is_empty());
                      let iter =
                          AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
                      assert!(iter.is_empty());
                      // case where iter.seen == {NULL_REVISION}
                      let iter =
                          AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
                      assert!(iter.is_empty());
                  }
                  /// A corrupted Graph, supporting error handling tests
                  #[derive(Clone, Debug)]
                  struct Corrupted;
                  impl Graph for Corrupted {
                      fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                          match rev {
 => Ok([0, -1]),
                              r => Err(GraphError::ParentOutOfRange(r)),
                          }
                      }
                  }
                  #[test]
                  fn test_initrev_out_of_range() {
                      // inclusive=false looks up initrev's parents right away
                      match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
                          Ok(_) => panic!("Should have been ParentOutOfRange"),
                          Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
                      }
                  }
                  #[test]
                  fn test_next_out_of_range() {
                      // inclusive=false looks up initrev's parents right away
                      let mut iter =
                          AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
                      assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
                  }
                  #[test]
                  fn test_lazy_iter_contains() {
                      let mut lazy =
                          LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
                      let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
                      // compare with iterator tests on the same initial revisions
                      assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
                      // contains() results are correct, unaffected by the fact that
                      // we consumed entirely an iterator out of lazy
                      assert_eq!(lazy.contains(2), Ok(true));
                      assert_eq!(lazy.contains(9), Ok(false));
                  }
                  #[test]
                  fn test_lazy_contains_iter() {
                      let mut lazy =
                          LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
                      assert_eq!(lazy.contains(2), Ok(true));
                      assert_eq!(lazy.contains(6), Ok(false));
                      // after consumption of 2 by the inner iterator, results stay
                      // consistent
                      assert_eq!(lazy.contains(2), Ok(true));
                      assert_eq!(lazy.contains(5), Ok(false));
                      // iter() still gives us a fresh iterator
                      let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
                      assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
                  }
                  #[test]
                  /// Test constructor, add/get bases and heads
                  fn test_missing_bases() -> Result<(), GraphError> {
                      let mut missing_ancestors =
                          MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
                      let mut as_vec: Vec<Revision> =
                          missing_ancestors.get_bases().iter().cloned().collect();
                      as_vec.sort();
                      assert_eq!(as_vec, [1, 3, 5]);
                      assert_eq!(missing_ancestors.max_base, 5);
                      missing_ancestors.add_bases([3, 7, 8].iter().cloned());
                      as_vec = missing_ancestors.get_bases().iter().cloned().collect();
                      as_vec.sort();
                      assert_eq!(as_vec, [1, 3, 5, 7, 8]);
                      assert_eq!(missing_ancestors.max_base, 8);
                      as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
                      as_vec.sort();
                      assert_eq!(as_vec, [3, 5, 7, 8]);
                      Ok(())
                  }
                  fn assert_missing_remove(
                      bases: &[Revision],
                      revs: &[Revision],
                      expected: &[Revision],
                  ) {
                      let mut missing_ancestors =
                          MissingAncestors::new(SampleGraph, bases.iter().cloned());
                      let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
                      missing_ancestors
                          .remove_ancestors_from(&mut revset)
                          .unwrap();
                      let mut as_vec: Vec<Revision> = revset.into_iter().collect();
                      as_vec.sort();
                      assert_eq!(as_vec.as_slice(), expected);
                  }
                  #[test]
                  fn test_missing_remove() {
                      assert_missing_remove(
                          &[1, 2, 3, 4, 7],
                          Vec::from_iter(1..10).as_slice(),
                          &[5, 6, 8, 9],
                      );
                      assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
                      assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
                  }
                  fn assert_missing_ancestors(
                      bases: &[Revision],
                      revs: &[Revision],
                      expected: &[Revision],
                  ) {
                      let mut missing_ancestors =
                          MissingAncestors::new(SampleGraph, bases.iter().cloned());
                      let missing = missing_ancestors
                          .missing_ancestors(revs.iter().cloned())
                          .unwrap();
                      assert_eq!(missing.as_slice(), expected);
                  }
                  #[test]
                  fn test_missing_ancestors() {
                      // examples taken from test-ancestors.py by having it run
                      // on the same graph (both naive and fast Python algs)
                      assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
                      assert_missing_ancestors(&[11], &[10], &[5, 10]);
                      assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
                  }
                  /// An interesting case found by a random generator similar to
                  /// the one in test-ancestor.py. An early version of Rust MissingAncestors
                  /// failed this, yet none of the integration tests of the whole suite
                  /// catched it.
                  #[test]
                  fn test_remove_ancestors_from_case1() {
                      let graph: VecGraph = vec![
                          [NULL_REVISION, NULL_REVISION],
                          [0, NULL_REVISION],
                          [1, 0],
                          [2, 1],
                          [3, NULL_REVISION],
                          [4, NULL_REVISION],
                          [5, 1],
                          [2, NULL_REVISION],
                          [7, NULL_REVISION],
                          [8, NULL_REVISION],
                          [9, NULL_REVISION],
                          [10, 1],
                          [3, NULL_REVISION],
                          [12, NULL_REVISION],
                          [13, NULL_REVISION],
                          [14, NULL_REVISION],
                          [4, NULL_REVISION],
                          [16, NULL_REVISION],
                          [17, NULL_REVISION],
                          [18, NULL_REVISION],
                          [19, 11],
                          [20, NULL_REVISION],
                          [21, NULL_REVISION],
                          [22, NULL_REVISION],
                          [23, NULL_REVISION],
                          [2, NULL_REVISION],
                          [3, NULL_REVISION],
                          [26, 24],
                          [27, NULL_REVISION],
                          [28, NULL_REVISION],
                          [12, NULL_REVISION],
                          [1, NULL_REVISION],
                          [1, 9],
                          [32, NULL_REVISION],
                          [33, NULL_REVISION],
                          [34, 31],
                          [35, NULL_REVISION],
                          [36, 26],
                          [37, NULL_REVISION],
                          [38, NULL_REVISION],
                          [39, NULL_REVISION],
                          [40, NULL_REVISION],
                          [41, NULL_REVISION],
                          [42, 26],
                          [0, NULL_REVISION],
                          [44, NULL_REVISION],
                          [45, 4],
                          [40, NULL_REVISION],
                          [47, NULL_REVISION],
                          [36, 0],
                          [49, NULL_REVISION],
                          [NULL_REVISION, NULL_REVISION],
                          [51, NULL_REVISION],
                          [52, NULL_REVISION],
                          [53, NULL_REVISION],
                          [14, NULL_REVISION],
                          [55, NULL_REVISION],
                          [15, NULL_REVISION],
                          [23, NULL_REVISION],
                          [58, NULL_REVISION],
                          [59, NULL_REVISION],
                          [2, NULL_REVISION],
                          [61, 59],
                          [62, NULL_REVISION],
                          [63, NULL_REVISION],
                          [NULL_REVISION, NULL_REVISION],
                          [65, NULL_REVISION],
                          [66, NULL_REVISION],
                          [67, NULL_REVISION],
                          [68, NULL_REVISION],
                          [37, 28],
                          [69, 25],
                          [71, NULL_REVISION],
                          [72, NULL_REVISION],
                          [50, 2],
                          [74, NULL_REVISION],
                          [12, NULL_REVISION],
                          [18, NULL_REVISION],
                          [77, NULL_REVISION],
                          [78, NULL_REVISION],
                          [79, NULL_REVISION],
                          [43, 33],
                          [81, NULL_REVISION],
                          [82, NULL_REVISION],
                          [83, NULL_REVISION],
                          [84, 45],
                          [85, NULL_REVISION],
                          [86, NULL_REVISION],
                          [NULL_REVISION, NULL_REVISION],
                          [88, NULL_REVISION],
                          [NULL_REVISION, NULL_REVISION],
                          [76, 83],
                          [44, NULL_REVISION],
                          [92, NULL_REVISION],
                          [93, NULL_REVISION],
                          [9, NULL_REVISION],
                          [95, 67],
                          [96, NULL_REVISION],
                          [97, NULL_REVISION],
                          [NULL_REVISION, NULL_REVISION],
                      ];
                      let problem_rev = 28 as Revision;
                      let problem_base = 70 as Revision;
                      // making the problem obvious: problem_rev is a parent of problem_base
                      assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
                      let mut missing_ancestors: MissingAncestors<VecGraph> =
                          MissingAncestors::new(
                              graph,
                              [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
                                  .iter()
                                  .cloned(),
                          );
                      assert!(missing_ancestors.bases.contains(&problem_base));
                      let mut revs: HashSet<Revision> =
                          [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
                              .iter()
                              .cloned()
                              .collect();
                      missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
                      assert!(!revs.contains(&problem_rev));
                  }
              }

rust/hg-core/src/dagops.rs

0 +8 -7

              // dagops.rs
              //
              // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Miscellaneous DAG operations
              //!
              //! # Terminology
              //! - By *relative heads* of a collection of revision numbers (`Revision`), we
              //!   mean those revisions that have no children among the collection.
              //! - Similarly *relative roots* of a collection of `Revision`, we mean those
              //!   whose parents, if any, don't belong to the collection.
              use super::{Graph, GraphError, Revision, NULL_REVISION};
              use crate::ancestors::AncestorsIterator;
              use std::collections::{BTreeSet, HashSet};
-             fn remove_parents(
+             fn remove_parents<S: std::hash::BuildHasher>(
                  graph: &impl Graph,
                  rev: Revision,
-                 set: &mut HashSet<Revision>,
+                 set: &mut HashSet<Revision, S>,
              ) -> Result<(), GraphError> {
                  for parent in graph.parents(rev)?.iter() {
                      if *parent != NULL_REVISION {
                          set.remove(parent);
                      }
                  }
                  Ok(())
              }
              /// Relative heads out of some revisions, passed as an iterator.
              ///
              /// These heads are defined as those revisions that have no children
              /// among those emitted by the iterator.
              ///
              /// # Performance notes
              /// Internally, this clones the iterator, and builds a `HashSet` out of it.
              ///
              /// This function takes an `Iterator` instead of `impl IntoIterator` to
              /// guarantee that cloning the iterator doesn't result in cloning the full
              /// construct it comes from.
              pub fn heads<'a>(
                  graph: &impl Graph,
                  iter_revs: impl Clone + Iterator<Item = &'a Revision>,
              ) -> Result<HashSet<Revision>, GraphError> {
                  let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
                  heads.remove(&NULL_REVISION);
                  for rev in iter_revs {
                      if *rev != NULL_REVISION {
                          remove_parents(graph, *rev, &mut heads)?;
                      }
                  }
                  Ok(heads)
              }
              /// Retain in `revs` only its relative heads.
              ///
              /// This is an in-place operation, so that control of the incoming
              /// set is left to the caller.
              /// - a direct Python binding would probably need to build its own `HashSet`
              ///   from an incoming iterable, even if its sole purpose is to extract the
              ///   heads.
              /// - a Rust caller can decide whether cloning beforehand is appropriate
              ///
              /// # Performance notes
              /// Internally, this function will store a full copy of `revs` in a `Vec`.
-             pub fn retain_heads(
+             pub fn retain_heads<S: std::hash::BuildHasher>(
                  graph: &impl Graph,
-                 revs: &mut HashSet<Revision>,
+                 revs: &mut HashSet<Revision, S>,
              ) -> Result<(), GraphError> {
                  revs.remove(&NULL_REVISION);
                  // we need to construct an iterable copy of revs to avoid itering while
                  // mutating
                  let as_vec: Vec<Revision> = revs.iter().cloned().collect();
                  for rev in as_vec {
                      if rev != NULL_REVISION {
                          remove_parents(graph, rev, revs)?;
                      }
                  }
                  Ok(())
              }
              /// Roots of `revs`, passed as a `HashSet`
              ///
              /// They are returned in arbitrary order
-             pub fn roots<G: Graph>(
+             pub fn roots<G: Graph, S: std::hash::BuildHasher>(
                  graph: &G,
-                 revs: &HashSet<Revision>,
+                 revs: &HashSet<Revision, S>,
              ) -> Result<Vec<Revision>, GraphError> {
                  let mut roots: Vec<Revision> = Vec::new();
                  for rev in revs {
                      if graph
                          .parents(*rev)?
                          .iter()
                          .filter(|p| **p != NULL_REVISION)
                          .all(|p| !revs.contains(p))
                      {
                          roots.push(*rev);
                      }
                  }
                  Ok(roots)
              }
              /// Compute the topological range between two collections of revisions
              ///
              /// This is equivalent to the revset `<roots>::<heads>`.
              ///
              /// Currently, the given `Graph` has to implement `Clone`, which means
              /// actually cloning just a reference-counted Python pointer if
              /// it's passed over through `rust-cpython`. This is due to the internal
              /// use of `AncestorsIterator`
              ///
              /// # Algorithmic details
              ///
              /// This is a two-pass swipe inspired from what `reachableroots2` from
              /// `mercurial.cext.parsers` does to obtain the same results.
              ///
              /// - first, we climb up the DAG from `heads` in topological order, keeping
              ///   them in the vector `heads_ancestors` vector, and adding any element of
              ///   `roots` we find among them to the resulting range.
              /// - Then, we iterate on that recorded vector so that a revision is always
              ///   emitted after its parents and add all revisions whose parents are already
              ///   in the range to the results.
              ///
              /// # Performance notes
              ///
              /// The main difference with the C implementation is that
              /// the latter uses a flat array with bit flags, instead of complex structures
              /// like `HashSet`, making it faster in most scenarios. In theory, it's
              /// possible that the present implementation could be more memory efficient
              /// for very large repositories with many branches.
              pub fn range(
                  graph: &(impl Graph + Clone),
                  roots: impl IntoIterator<Item = Revision>,
                  heads: impl IntoIterator<Item = Revision>,
              ) -> Result<BTreeSet<Revision>, GraphError> {
                  let mut range = BTreeSet::new();
                  let roots: HashSet<Revision> = roots.into_iter().collect();
                  let min_root: Revision = match roots.iter().cloned().min() {
                      None => {
                          return Ok(range);
                      }
                      Some(r) => r,
                  };
                  // Internally, AncestorsIterator currently maintains a `HashSet`
                  // of all seen revision, which is also what we record, albeit in an ordered
                  // way. There's room for improvement on this duplication.
                  let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
                  let mut heads_ancestors: Vec<Revision> = Vec::new();
                  for revres in ait {
                      let rev = revres?;
                      if roots.contains(&rev) {
                          range.insert(rev);
                      }
                      heads_ancestors.push(rev);
                  }
                  for rev in heads_ancestors.into_iter().rev() {
                      for parent in graph.parents(rev)?.iter() {
                          if *parent != NULL_REVISION && range.contains(parent) {
                              range.insert(rev);
                          }
                      }
                  }
                  Ok(range)
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::testing::SampleGraph;
                  /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
                  fn retain_heads_sorted(
                      graph: &impl Graph,
                      revs: &[Revision],
                  ) -> Result<Vec<Revision>, GraphError> {
                      let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
                      retain_heads(graph, &mut revs)?;
                      let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
                      as_vec.sort();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_retain_heads() -> Result<(), GraphError> {
                      assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                      assert_eq!(
                          retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                          vec![1, 6, 12]
                      );
                      assert_eq!(
                          retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                          vec![3, 5, 8, 9]
                      );
                      Ok(())
                  }
                  /// Apply `heads()` to the given slice and return as a sorted `Vec`
                  fn heads_sorted(
                      graph: &impl Graph,
                      revs: &[Revision],
                  ) -> Result<Vec<Revision>, GraphError> {
                      let heads = heads(graph, revs.iter())?;
                      let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
                      as_vec.sort();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_heads() -> Result<(), GraphError> {
                      assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                      assert_eq!(
                          heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                          vec![1, 6, 12]
                      );
                      assert_eq!(
                          heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                          vec![3, 5, 8, 9]
                      );
                      Ok(())
                  }
                  /// Apply `roots()` and sort the result for easier comparison
                  fn roots_sorted(
                      graph: &impl Graph,
                      revs: &[Revision],
                  ) -> Result<Vec<Revision>, GraphError> {
-                     let mut as_vec = roots(graph, &revs.iter().cloned().collect())?;
+                     let set: HashSet<_> = revs.iter().cloned().collect();
+                     let mut as_vec = roots(graph, &set)?;
                      as_vec.sort();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_roots() -> Result<(), GraphError> {
                      assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
                      assert_eq!(
                          roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                          vec![0, 4, 12]
                      );
                      assert_eq!(
                          roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                          vec![1, 8]
                      );
                      Ok(())
                  }
                  /// Apply `range()` and convert the result into a Vec for easier comparison
                  fn range_vec(
                      graph: impl Graph + Clone,
                      roots: &[Revision],
                      heads: &[Revision],
                  ) -> Result<Vec<Revision>, GraphError> {
                      range(&graph, roots.iter().cloned(), heads.iter().cloned())
                          .map(|bs| bs.into_iter().collect())
                  }
                  #[test]
                  fn test_range() -> Result<(), GraphError> {
                      assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
                      assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
                      assert_eq!(
                          range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
                          vec![5, 10]
                      );
                      assert_eq!(
                          range_vec(SampleGraph, &[5, 6], &[10, 12])?,
                          vec![5, 6, 9, 10, 12]
                      );
                      Ok(())
                  }
              }

rust/hg-core/src/dirstate/dirs_multiset.rs

0 +6 -2

              // dirs_multiset.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! A multiset of directory names.
              //!
              //! Used to counts the references to directories in a manifest or dirstate.
              use crate::{
                  dirstate::EntryState,
                  utils::{
                      files,
                      hg_path::{HgPath, HgPathBuf, HgPathError},
                  },
                  DirstateEntry, DirstateMapError, FastHashMap,
              };
              use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
              // could be encapsulated if we care API stability more seriously
              pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
              #[derive(PartialEq, Debug)]
              pub struct DirsMultiset {
                  inner: FastHashMap<HgPathBuf, u32>,
              }
              impl DirsMultiset {
                  /// Initializes the multiset from a dirstate.
                  ///
                  /// If `skip_state` is provided, skips dirstate entries with equal state.
                  pub fn from_dirstate(
                      dirstate: &FastHashMap<HgPathBuf, DirstateEntry>,
                      skip_state: Option<EntryState>,
                  ) -> Result<Self, DirstateMapError> {
                      let mut multiset = DirsMultiset {
                          inner: FastHashMap::default(),
                      };
                      for (filename, DirstateEntry { state, .. }) in dirstate {
                          // This `if` is optimized out of the loop
                          if let Some(skip) = skip_state {
                              if skip != *state {
                                  multiset.add_path(filename)?;
                              }
                          } else {
                              multiset.add_path(filename)?;
                          }
                      }
                      Ok(multiset)
                  }
                  /// Initializes the multiset from a manifest.
                  pub fn from_manifest(
                      manifest: &[impl AsRef<HgPath>],
                  ) -> Result<Self, DirstateMapError> {
                      let mut multiset = DirsMultiset {
                          inner: FastHashMap::default(),
                      };
                      for filename in manifest {
                          multiset.add_path(filename.as_ref())?;
                      }
                      Ok(multiset)
                  }
                  /// Increases the count of deepest directory contained in the path.
                  ///
                  /// If the directory is not yet in the map, adds its parents.
                  pub fn add_path(
                      &mut self,
                      path: impl AsRef<HgPath>,
                  ) -> Result<(), DirstateMapError> {
                      for subpath in files::find_dirs(path.as_ref()) {
                          if subpath.as_bytes().last() == Some(&b'/') {
                              // TODO Remove this once PathAuditor is certified
                              // as the only entrypoint for path data
                              let second_slash_index = subpath.len() - 1;
                              return Err(DirstateMapError::InvalidPath(
                                  HgPathError::ConsecutiveSlashes {
                                      bytes: path.as_ref().as_bytes().to_owned(),
                                      second_slash_index,
                                  },
                              ));
                          }
                          if let Some(val) = self.inner.get_mut(subpath) {
                              *val += 1;
                              break;
                          }
                          self.inner.insert(subpath.to_owned(), 1);
                      }
                      Ok(())
                  }
                  /// Decreases the count of deepest directory contained in the path.
                  ///
                  /// If it is the only reference, decreases all parents until one is
                  /// removed.
                  /// If the directory is not in the map, something horrible has happened.
                  pub fn delete_path(
                      &mut self,
                      path: impl AsRef<HgPath>,
                  ) -> Result<(), DirstateMapError> {
                      for subpath in files::find_dirs(path.as_ref()) {
                          match self.inner.entry(subpath.to_owned()) {
                              Entry::Occupied(mut entry) => {
-                                 let val = entry.get().clone();
+                                 let val = *entry.get();
                                  if val > 1 {
                                      entry.insert(val - 1);
                                      break;
                                  }
                                  entry.remove();
                              }
                              Entry::Vacant(_) => {
                                  return Err(DirstateMapError::PathNotFound(
                                      path.as_ref().to_owned(),
                                  ))
                              }
                          };
                      }
                      Ok(())
                  }
                  pub fn contains(&self, key: impl AsRef<HgPath>) -> bool {
                      self.inner.contains_key(key.as_ref())
                  }
                  pub fn iter(&self) -> DirsMultisetIter {
                      self.inner.keys()
                  }
                  pub fn len(&self) -> usize {
                      self.inner.len()
                  }
+                 pub fn is_empty(&self) -> bool {
+                     self.len() == 0
+                 }
              }
              /// This is basically a reimplementation of `DirsMultiset` that stores the
              /// children instead of just a count of them, plus a small optional
              /// optimization to avoid some directories we don't need.
              #[derive(PartialEq, Debug)]
              pub struct DirsChildrenMultiset<'a> {
                  inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
                  only_include: Option<HashSet<&'a HgPath>>,
              }
              impl<'a> DirsChildrenMultiset<'a> {
                  pub fn new(
                      paths: impl Iterator<Item = &'a HgPathBuf>,
                      only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
                  ) -> Self {
                      let mut new = Self {
                          inner: HashMap::default(),
                          only_include: only_include
-                             .map(|s| s.iter().map(|p| p.as_ref()).collect()),
+                             .map(|s| s.iter().map(AsRef::as_ref).collect()),
                      };
                      for path in paths {
                          new.add_path(path)
                      }
                      new
                  }
                  fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
                      if path.as_ref().is_empty() {
                          return;
                      }
                      for (directory, basename) in files::find_dirs_with_base(path.as_ref())
                      {
                          if !self.is_dir_included(directory) {
                              continue;
                          }
                          self.inner
                              .entry(directory)
                              .and_modify(|e| {
                                  e.insert(basename);
                              })
                              .or_insert_with(|| {
                                  let mut set = HashSet::new();
                                  set.insert(basename);
                                  set
                              });
                      }
                  }
                  fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
                      match &self.only_include {
                          None => false,
                          Some(i) => i.contains(dir.as_ref()),
                      }
                  }
                  pub fn get(
                      &self,
                      path: impl AsRef<HgPath>,
                  ) -> Option<&HashSet<&'a HgPath>> {
                      self.inner.get(path.as_ref())
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  #[test]
                  fn test_delete_path_path_not_found() {
                      let manifest: Vec<HgPathBuf> = vec![];
                      let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
                      let path = HgPathBuf::from_bytes(b"doesnotexist/");
                      assert_eq!(
                          Err(DirstateMapError::PathNotFound(path.to_owned())),
                          map.delete_path(&path)
                      );
                  }
                  #[test]
                  fn test_delete_path_empty_path() {
                      let mut map =
                          DirsMultiset::from_manifest(&vec![HgPathBuf::new()]).unwrap();
                      let path = HgPath::new(b"");
                      assert_eq!(Ok(()), map.delete_path(path));
                      assert_eq!(
                          Err(DirstateMapError::PathNotFound(path.to_owned())),
                          map.delete_path(path)
                      );
                  }
                  #[test]
                  fn test_delete_path_successful() {
                      let mut map = DirsMultiset {
                          inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
                              .iter()
                              .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                              .collect(),
                      };
                      assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
                      eprintln!("{:?}", map);
                      assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
                      eprintln!("{:?}", map);
                      assert_eq!(
                          Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
                              b"a/b/"
                          ))),
                          map.delete_path(HgPath::new(b"a/b/"))
                      );
                      assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
                      assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
                      eprintln!("{:?}", map);
                      assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/")));
                      eprintln!("{:?}", map);
                      assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/c/")));
                      assert_eq!(
                          Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
                              b"a/c/"
                          ))),
                          map.delete_path(HgPath::new(b"a/c/"))
                      );
                  }
                  #[test]
                  fn test_add_path_empty_path() {
                      let manifest: Vec<HgPathBuf> = vec![];
                      let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
                      let path = HgPath::new(b"");
                      map.add_path(path).unwrap();
                      assert_eq!(1, map.len());
                  }
                  #[test]
                  fn test_add_path_successful() {
                      let manifest: Vec<HgPathBuf> = vec![];
                      let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
                      map.add_path(HgPath::new(b"a/")).unwrap();
                      assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
                      assert_eq!(1, *map.inner.get(HgPath::new(b"")).unwrap());
                      assert_eq!(2, map.len());
                      // Non directory should be ignored
                      map.add_path(HgPath::new(b"a")).unwrap();
                      assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
                      assert_eq!(2, map.len());
                      // Non directory will still add its base
                      map.add_path(HgPath::new(b"a/b")).unwrap();
                      assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
                      assert_eq!(2, map.len());
                      // Duplicate path works
                      map.add_path(HgPath::new(b"a/")).unwrap();
                      assert_eq!(3, *map.inner.get(HgPath::new(b"a")).unwrap());
                      // Nested dir adds to its base
                      map.add_path(HgPath::new(b"a/b/")).unwrap();
                      assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
                      assert_eq!(1, *map.inner.get(HgPath::new(b"a/b")).unwrap());
                      // but not its base's base, because it already existed
                      map.add_path(HgPath::new(b"a/b/c/")).unwrap();
                      assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
                      assert_eq!(2, *map.inner.get(HgPath::new(b"a/b")).unwrap());
                      map.add_path(HgPath::new(b"a/c/")).unwrap();
                      assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
                      let expected = DirsMultiset {
                          inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
                              .iter()
                              .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                              .collect(),
                      };
                      assert_eq!(map, expected);
                  }
                  #[test]
                  fn test_dirsmultiset_new_empty() {
                      let manifest: Vec<HgPathBuf> = vec![];
                      let new = DirsMultiset::from_manifest(&manifest).unwrap();
                      let expected = DirsMultiset {
                          inner: FastHashMap::default(),
                      };
                      assert_eq!(expected, new);
                      let new = DirsMultiset::from_dirstate(&FastHashMap::default(), None)
                          .unwrap();
                      let expected = DirsMultiset {
                          inner: FastHashMap::default(),
                      };
                      assert_eq!(expected, new);
                  }
                  #[test]
                  fn test_dirsmultiset_new_no_skip() {
                      let input_vec: Vec<HgPathBuf> = ["a/", "b/", "a/c", "a/d/"]
                          .iter()
                          .map(|e| HgPathBuf::from_bytes(e.as_bytes()))
                          .collect();
                      let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
                          .iter()
                          .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                          .collect();
                      let new = DirsMultiset::from_manifest(&input_vec).unwrap();
                      let expected = DirsMultiset {
                          inner: expected_inner,
                      };
                      assert_eq!(expected, new);
                      let input_map = ["a/", "b/", "a/c", "a/d/"]
                          .iter()
                          .map(|f| {
                              (
                                  HgPathBuf::from_bytes(f.as_bytes()),
                                  DirstateEntry {
                                      state: EntryState::Normal,
                                      mode: 0,
                                      mtime: 0,
                                      size: 0,
                                  },
                              )
                          })
                          .collect();
                      let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
                          .iter()
                          .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                          .collect();
                      let new = DirsMultiset::from_dirstate(&input_map, None).unwrap();
                      let expected = DirsMultiset {
                          inner: expected_inner,
                      };
                      assert_eq!(expected, new);
                  }
                  #[test]
                  fn test_dirsmultiset_new_skip() {
                      let input_map = [
                          ("a/", EntryState::Normal),
                          ("a/b/", EntryState::Normal),
                          ("a/c", EntryState::Removed),
                          ("a/d/", EntryState::Merged),
                      ]
                      .iter()
                      .map(|(f, state)| {
                          (
                              HgPathBuf::from_bytes(f.as_bytes()),
                              DirstateEntry {
                                  state: *state,
                                  mode: 0,
                                  mtime: 0,
                                  size: 0,
                              },
                          )
                      })
                      .collect();
                      // "a" incremented with "a/c" and "a/d/"
                      let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
                          .iter()
                          .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
                          .collect();
                      let new =
                          DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal))
                              .unwrap();
                      let expected = DirsMultiset {
                          inner: expected_inner,
                      };
                      assert_eq!(expected, new);
                  }
              }

rust/hg-core/src/dirstate/dirstate_map.rs

0 +1 -1

              // dirstate_map.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::{
                  dirstate::{parsers::PARENT_SIZE, EntryState, SIZE_FROM_OTHER_PARENT},
                  pack_dirstate, parse_dirstate,
                  utils::{
                      files::normalize_case,
                      hg_path::{HgPath, HgPathBuf},
                  },
                  CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateMapError,
                  DirstateParents, DirstateParseError, FastHashMap, StateMap,
              };
              use core::borrow::Borrow;
              use std::collections::HashSet;
              use std::convert::TryInto;
              use std::iter::FromIterator;
              use std::ops::Deref;
              use std::time::Duration;
              pub type FileFoldMap = FastHashMap<HgPathBuf, HgPathBuf>;
              const NULL_ID: [u8; 20] = [0; 20];
              const MTIME_UNSET: i32 = -1;
              #[derive(Default)]
              pub struct DirstateMap {
                  state_map: StateMap,
                  pub copy_map: CopyMap,
                  file_fold_map: Option<FileFoldMap>,
                  pub dirs: Option<DirsMultiset>,
                  pub all_dirs: Option<DirsMultiset>,
                  non_normal_set: Option<HashSet<HgPathBuf>>,
                  other_parent_set: Option<HashSet<HgPathBuf>>,
                  parents: Option<DirstateParents>,
                  dirty_parents: bool,
              }
              /// Should only really be used in python interface code, for clarity
              impl Deref for DirstateMap {
                  type Target = StateMap;
                  fn deref(&self) -> &Self::Target {
                      &self.state_map
                  }
              }
              impl FromIterator<(HgPathBuf, DirstateEntry)> for DirstateMap {
                  fn from_iter<I: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(
                      iter: I,
                  ) -> Self {
                      Self {
                          state_map: iter.into_iter().collect(),
                          ..Self::default()
                      }
                  }
              }
              impl DirstateMap {
                  pub fn new() -> Self {
                      Self::default()
                  }
                  pub fn clear(&mut self) {
                      self.state_map.clear();
                      self.copy_map.clear();
                      self.file_fold_map = None;
                      self.non_normal_set = None;
                      self.other_parent_set = None;
                      self.set_parents(&DirstateParents {
                          p1: NULL_ID,
                          p2: NULL_ID,
                      })
                  }
                  /// Add a tracked file to the dirstate
                  pub fn add_file(
                      &mut self,
                      filename: &HgPath,
                      old_state: EntryState,
                      entry: DirstateEntry,
                  ) -> Result<(), DirstateMapError> {
                      if old_state == EntryState::Unknown || old_state == EntryState::Removed
                      {
                          if let Some(ref mut dirs) = self.dirs {
                              dirs.add_path(filename)?;
                          }
                      }
                      if old_state == EntryState::Unknown {
                          if let Some(ref mut all_dirs) = self.all_dirs {
                              all_dirs.add_path(filename)?;
                          }
                      }
                      self.state_map.insert(filename.to_owned(), entry.to_owned());
                      if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
                          self.get_non_normal_other_parent_entries()
                              .0
                              .insert(filename.to_owned());
                      }
                      if entry.size == SIZE_FROM_OTHER_PARENT {
                          self.get_non_normal_other_parent_entries()
                              .1
                              .insert(filename.to_owned());
                      }
                      Ok(())
                  }
                  /// Mark a file as removed in the dirstate.
                  ///
                  /// The `size` parameter is used to store sentinel values that indicate
                  /// the file's previous state.  In the future, we should refactor this
                  /// to be more explicit about what that state is.
                  pub fn remove_file(
                      &mut self,
                      filename: &HgPath,
                      old_state: EntryState,
                      size: i32,
                  ) -> Result<(), DirstateMapError> {
                      if old_state != EntryState::Unknown && old_state != EntryState::Removed
                      {
                          if let Some(ref mut dirs) = self.dirs {
                              dirs.delete_path(filename)?;
                          }
                      }
                      if old_state == EntryState::Unknown {
                          if let Some(ref mut all_dirs) = self.all_dirs {
                              all_dirs.add_path(filename)?;
                          }
                      }
                      if let Some(ref mut file_fold_map) = self.file_fold_map {
                          file_fold_map.remove(&normalize_case(filename));
                      }
                      self.state_map.insert(
                          filename.to_owned(),
                          DirstateEntry {
                              state: EntryState::Removed,
                              mode: 0,
                              size,
                              mtime: 0,
                          },
                      );
                      self.get_non_normal_other_parent_entries()
                          .0
                          .insert(filename.to_owned());
                      Ok(())
                  }
                  /// Remove a file from the dirstate.
                  /// Returns `true` if the file was previously recorded.
                  pub fn drop_file(
                      &mut self,
                      filename: &HgPath,
                      old_state: EntryState,
                  ) -> Result<bool, DirstateMapError> {
                      let exists = self.state_map.remove(filename).is_some();
                      if exists {
                          if old_state != EntryState::Removed {
                              if let Some(ref mut dirs) = self.dirs {
                                  dirs.delete_path(filename)?;
                              }
                          }
                          if let Some(ref mut all_dirs) = self.all_dirs {
                              all_dirs.delete_path(filename)?;
                          }
                      }
                      if let Some(ref mut file_fold_map) = self.file_fold_map {
                          file_fold_map.remove(&normalize_case(filename));
                      }
                      self.get_non_normal_other_parent_entries()
                          .0
                          .remove(filename);
                      Ok(exists)
                  }
                  pub fn clear_ambiguous_times(
                      &mut self,
                      filenames: Vec<HgPathBuf>,
                      now: i32,
                  ) {
                      for filename in filenames {
                          let mut changed = false;
                          self.state_map
                              .entry(filename.to_owned())
                              .and_modify(|entry| {
                                  if entry.state == EntryState::Normal && entry.mtime == now
                                  {
                                      changed = true;
                                      *entry = DirstateEntry {
                                          mtime: MTIME_UNSET,
                                          ..*entry
                                      };
                                  }
                              });
                          if changed {
                              self.get_non_normal_other_parent_entries()
                                  .0
                                  .insert(filename.to_owned());
                          }
                      }
                  }
                  pub fn non_normal_entries_remove(
                      &mut self,
                      key: impl AsRef<HgPath>,
                  ) -> bool {
                      self.get_non_normal_other_parent_entries()
                          .0
                          .remove(key.as_ref())
                  }
                  pub fn non_normal_entries_union(
                      &mut self,
                      other: HashSet<HgPathBuf>,
                  ) -> Vec<HgPathBuf> {
                      self.get_non_normal_other_parent_entries()
                          .0
                          .union(&other)
-                         .map(|e| e.to_owned())
+                         .map(ToOwned::to_owned)
                          .collect()
                  }
                  pub fn get_non_normal_other_parent_entries(
                      &mut self,
                  ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>) {
                      self.set_non_normal_other_parent_entries(false);
                      (
                          self.non_normal_set.as_mut().unwrap(),
                          self.other_parent_set.as_mut().unwrap(),
                      )
                  }
                  /// Useful to get immutable references to those sets in contexts where
                  /// you only have an immutable reference to the `DirstateMap`, like when
                  /// sharing references with Python.
                  ///
                  /// TODO, get rid of this along with the other "setter/getter" stuff when
                  /// a nice typestate plan is defined.
                  ///
                  /// # Panics
                  ///
                  /// Will panic if either set is `None`.
                  pub fn get_non_normal_other_parent_entries_panic(
                      &self,
                  ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>) {
                      (
                          self.non_normal_set.as_ref().unwrap(),
                          self.other_parent_set.as_ref().unwrap(),
                      )
                  }
                  pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
                      if !force
                          && self.non_normal_set.is_some()
                          && self.other_parent_set.is_some()
                      {
                          return;
                      }
                      let mut non_normal = HashSet::new();
                      let mut other_parent = HashSet::new();
                      for (
                          filename,
                          DirstateEntry {
                              state, size, mtime, ..
                          },
                      ) in self.state_map.iter()
                      {
                          if *state != EntryState::Normal || *mtime == MTIME_UNSET {
                              non_normal.insert(filename.to_owned());
                          }
                          if *state == EntryState::Normal && *size == SIZE_FROM_OTHER_PARENT
                          {
                              other_parent.insert(filename.to_owned());
                          }
                      }
                      self.non_normal_set = Some(non_normal);
                      self.other_parent_set = Some(other_parent);
                  }
                  /// Both of these setters and their uses appear to be the simplest way to
                  /// emulate a Python lazy property, but it is ugly and unidiomatic.
                  /// TODO One day, rewriting this struct using the typestate might be a
                  /// good idea.
                  pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
                      if self.all_dirs.is_none() {
                          self.all_dirs =
                              Some(DirsMultiset::from_dirstate(&self.state_map, None)?);
                      }
                      Ok(())
                  }
                  pub fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
                      if self.dirs.is_none() {
                          self.dirs = Some(DirsMultiset::from_dirstate(
                              &self.state_map,
                              Some(EntryState::Removed),
                          )?);
                      }
                      Ok(())
                  }
                  pub fn has_tracked_dir(
                      &mut self,
                      directory: &HgPath,
                  ) -> Result<bool, DirstateMapError> {
                      self.set_dirs()?;
                      Ok(self.dirs.as_ref().unwrap().contains(directory))
                  }
                  pub fn has_dir(
                      &mut self,
                      directory: &HgPath,
                  ) -> Result<bool, DirstateMapError> {
                      self.set_all_dirs()?;
                      Ok(self.all_dirs.as_ref().unwrap().contains(directory))
                  }
                  pub fn parents(
                      &mut self,
                      file_contents: &[u8],
                  ) -> Result<&DirstateParents, DirstateError> {
                      if let Some(ref parents) = self.parents {
                          return Ok(parents);
                      }
                      let parents;
                      if file_contents.len() == PARENT_SIZE * 2 {
                          parents = DirstateParents {
                              p1: file_contents[..PARENT_SIZE].try_into().unwrap(),
                              p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2]
                                  .try_into()
                                  .unwrap(),
                          };
                      } else if file_contents.is_empty() {
                          parents = DirstateParents {
                              p1: NULL_ID,
                              p2: NULL_ID,
                          };
                      } else {
                          return Err(DirstateError::Parse(DirstateParseError::Damaged));
                      }
                      self.parents = Some(parents);
                      Ok(self.parents.as_ref().unwrap())
                  }
                  pub fn set_parents(&mut self, parents: &DirstateParents) {
                      self.parents = Some(parents.clone());
                      self.dirty_parents = true;
                  }
                  pub fn read(
                      &mut self,
                      file_contents: &[u8],
                  ) -> Result<Option<DirstateParents>, DirstateError> {
                      if file_contents.is_empty() {
                          return Ok(None);
                      }
                      let parents = parse_dirstate(
                          &mut self.state_map,
                          &mut self.copy_map,
                          file_contents,
                      )?;
                      if !self.dirty_parents {
                          self.set_parents(&parents);
                      }
                      Ok(Some(parents))
                  }
                  pub fn pack(
                      &mut self,
                      parents: DirstateParents,
                      now: Duration,
                  ) -> Result<Vec<u8>, DirstateError> {
                      let packed =
                          pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
                      self.dirty_parents = false;
                      self.set_non_normal_other_parent_entries(true);
                      Ok(packed)
                  }
                  pub fn build_file_fold_map(&mut self) -> &FileFoldMap {
                      if let Some(ref file_fold_map) = self.file_fold_map {
                          return file_fold_map;
                      }
                      let mut new_file_fold_map = FileFoldMap::default();
                      for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
                      {
                          if *state == EntryState::Removed {
                              new_file_fold_map
                                  .insert(normalize_case(filename), filename.to_owned());
                          }
                      }
                      self.file_fold_map = Some(new_file_fold_map);
                      self.file_fold_map.as_ref().unwrap()
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  #[test]
                  fn test_dirs_multiset() {
                      let mut map = DirstateMap::new();
                      assert!(map.dirs.is_none());
                      assert!(map.all_dirs.is_none());
                      assert_eq!(map.has_dir(HgPath::new(b"nope")).unwrap(), false);
                      assert!(map.all_dirs.is_some());
                      assert!(map.dirs.is_none());
                      assert_eq!(map.has_tracked_dir(HgPath::new(b"nope")).unwrap(), false);
                      assert!(map.dirs.is_some());
                  }
                  #[test]
                  fn test_add_file() {
                      let mut map = DirstateMap::new();
                      assert_eq!(0, map.len());
                      map.add_file(
                          HgPath::new(b"meh"),
                          EntryState::Normal,
                          DirstateEntry {
                              state: EntryState::Normal,
                              mode: 1337,
                              mtime: 1337,
                              size: 1337,
                          },
                      )
                      .unwrap();
                      assert_eq!(1, map.len());
                      assert_eq!(0, map.get_non_normal_other_parent_entries().0.len());
                      assert_eq!(0, map.get_non_normal_other_parent_entries().1.len());
                  }
                  #[test]
                  fn test_non_normal_other_parent_entries() {
                      let mut map: DirstateMap = [
                          (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
                          (b"f2", (EntryState::Normal, 1337, 1337, -1)),
                          (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
                          (b"f4", (EntryState::Normal, 1337, -2, 1337)),
                          (b"f5", (EntryState::Added, 1337, 1337, 1337)),
                          (b"f6", (EntryState::Added, 1337, 1337, -1)),
                          (b"f7", (EntryState::Merged, 1337, 1337, -1)),
                          (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
                          (b"f9", (EntryState::Merged, 1337, -2, 1337)),
                          (b"fa", (EntryState::Added, 1337, -2, 1337)),
                          (b"fb", (EntryState::Removed, 1337, -2, 1337)),
                      ]
                      .iter()
                      .map(|(fname, (state, mode, size, mtime))| {
                          (
                              HgPathBuf::from_bytes(fname.as_ref()),
                              DirstateEntry {
                                  state: *state,
                                  mode: *mode,
                                  size: *size,
                                  mtime: *mtime,
                              },
                          )
                      })
                      .collect();
                      let mut non_normal = [
                          b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
                      ]
                      .iter()
                      .map(|x| HgPathBuf::from_bytes(x.as_ref()))
                      .collect();
                      let mut other_parent = HashSet::new();
                      other_parent.insert(HgPathBuf::from_bytes(b"f4"));
                      let entries = map.get_non_normal_other_parent_entries();
                      assert_eq!(
                          (&mut non_normal, &mut other_parent),
                          (entries.0, entries.1)
                      );
                  }
              }

rust/hg-core/src/dirstate/parsers.rs

0 +1 -1

              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::utils::hg_path::HgPath;
              use crate::{
                  dirstate::{CopyMap, EntryState, StateMap},
                  DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
              };
              use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
              use micro_timer::timed;
              use std::convert::{TryFrom, TryInto};
              use std::io::Cursor;
              use std::time::Duration;
              /// Parents are stored in the dirstate as byte hashes.
              pub const PARENT_SIZE: usize = 20;
              /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
              const MIN_ENTRY_SIZE: usize = 17;
              // TODO parse/pack: is mutate-on-loop better for performance?
              #[timed]
              pub fn parse_dirstate(
                  state_map: &mut StateMap,
                  copy_map: &mut CopyMap,
                  contents: &[u8],
              ) -> Result<DirstateParents, DirstateParseError> {
                  if contents.len() < PARENT_SIZE * 2 {
                      return Err(DirstateParseError::TooLittleData);
                  }
                  let mut curr_pos = PARENT_SIZE * 2;
                  let parents = DirstateParents {
                      p1: contents[..PARENT_SIZE].try_into().unwrap(),
                      p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(),
                  };
                  while curr_pos < contents.len() {
                      if curr_pos + MIN_ENTRY_SIZE > contents.len() {
                          return Err(DirstateParseError::Overflow);
                      }
                      let entry_bytes = &contents[curr_pos..];
                      let mut cursor = Cursor::new(entry_bytes);
                      let state = EntryState::try_from(cursor.read_u8()?)?;
                      let mode = cursor.read_i32::<BigEndian>()?;
                      let size = cursor.read_i32::<BigEndian>()?;
                      let mtime = cursor.read_i32::<BigEndian>()?;
                      let path_len = cursor.read_i32::<BigEndian>()? as usize;
                      if path_len > contents.len() - curr_pos {
                          return Err(DirstateParseError::Overflow);
                      }
                      // Slice instead of allocating a Vec needed for `read_exact`
                      let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
                      let (path, copy) = match memchr::memchr(0, path) {
                          None => (path, None),
                          Some(i) => (&path[..i], Some(&path[(i + 1)..])),
                      };
                      if let Some(copy_path) = copy {
                          copy_map.insert(
                              HgPath::new(path).to_owned(),
                              HgPath::new(copy_path).to_owned(),
                          );
                      };
                      state_map.insert(
                          HgPath::new(path).to_owned(),
                          DirstateEntry {
                              state,
                              mode,
                              size,
                              mtime,
                          },
                      );
                      curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
                  }
                  Ok(parents)
              }
              /// `now` is the duration in seconds since the Unix epoch
              pub fn pack_dirstate(
                  state_map: &mut StateMap,
                  copy_map: &CopyMap,
                  parents: DirstateParents,
                  now: Duration,
              ) -> Result<Vec<u8>, DirstatePackError> {
                  // TODO move away from i32 before 2038.
                  let now: i32 = now.as_secs().try_into().expect("time overflow");
                  let expected_size: usize = state_map
                      .iter()
                      .map(|(filename, _)| {
                          let mut length = MIN_ENTRY_SIZE + filename.len();
                          if let Some(copy) = copy_map.get(filename) {
                              length += copy.len() + 1;
                          }
                          length
                      })
                      .sum();
                  let expected_size = expected_size + PARENT_SIZE * 2;
                  let mut packed = Vec::with_capacity(expected_size);
                  let mut new_state_map = vec![];
                  packed.extend(&parents.p1);
                  packed.extend(&parents.p2);
                  for (filename, entry) in state_map.iter() {
                      let new_filename = filename.to_owned();
                      let mut new_mtime: i32 = entry.mtime;
                      if entry.state == EntryState::Normal && entry.mtime == now {
                          // The file was last modified "simultaneously" with the current
                          // write to dirstate (i.e. within the same second for file-
                          // systems with a granularity of 1 sec). This commonly happens
                          // for at least a couple of files on 'update'.
                          // The user could change the file without changing its size
                          // within the same second. Invalidate the file's mtime in
                          // dirstate, forcing future 'status' calls to compare the
                          // contents of the file if the size is the same. This prevents
                          // mistakenly treating such files as clean.
                          new_mtime = -1;
                          new_state_map.push((
                              filename.to_owned(),
                              DirstateEntry {
                                  mtime: new_mtime,
                                  ..*entry
                              },
                          ));
                      }
                      let mut new_filename = new_filename.into_vec();
                      if let Some(copy) = copy_map.get(filename) {
-                         new_filename.push('\0' as u8);
+                         new_filename.push(b'\0');
                          new_filename.extend(copy.bytes());
                      }
                      packed.write_u8(entry.state.into())?;
                      packed.write_i32::<BigEndian>(entry.mode)?;
                      packed.write_i32::<BigEndian>(entry.size)?;
                      packed.write_i32::<BigEndian>(new_mtime)?;
                      packed.write_i32::<BigEndian>(new_filename.len() as i32)?;
                      packed.extend(new_filename)
                  }
                  if packed.len() != expected_size {
                      return Err(DirstatePackError::BadSize(expected_size, packed.len()));
                  }
                  state_map.extend(new_state_map);
                  Ok(packed)
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::{utils::hg_path::HgPathBuf, FastHashMap};
                  #[test]
                  fn test_pack_dirstate_empty() {
                      let mut state_map: StateMap = FastHashMap::default();
                      let copymap = FastHashMap::default();
                      let parents = DirstateParents {
                          p1: *b"12345678910111213141",
                          p2: *b"00000000000000000000",
                      };
                      let now = Duration::new(15000000, 0);
                      let expected = b"1234567891011121314100000000000000000000".to_vec();
                      assert_eq!(
                          expected,
                          pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
                      );
                      assert!(state_map.is_empty())
                  }
                  #[test]
                  fn test_pack_dirstate_one_entry() {
                      let expected_state_map: StateMap = [(
                          HgPathBuf::from_bytes(b"f1"),
                          DirstateEntry {
                              state: EntryState::Normal,
                              mode: 0o644,
                              size: 0,
                              mtime: 791231220,
                          },
                      )]
                      .iter()
                      .cloned()
                      .collect();
                      let mut state_map = expected_state_map.clone();
                      let copymap = FastHashMap::default();
                      let parents = DirstateParents {
                          p1: *b"12345678910111213141",
                          p2: *b"00000000000000000000",
                      };
                      let now = Duration::new(15000000, 0);
                      let expected = [
 , 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
 , 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
 , 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
 , 58, 244, 0, 0, 0, 2, 102, 49,
                      ]
                      .to_vec();
                      assert_eq!(
                          expected,
                          pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
                      );
                      assert_eq!(expected_state_map, state_map);
                  }
                  #[test]
                  fn test_pack_dirstate_one_entry_with_copy() {
                      let expected_state_map: StateMap = [(
                          HgPathBuf::from_bytes(b"f1"),
                          DirstateEntry {
                              state: EntryState::Normal,
                              mode: 0o644,
                              size: 0,
                              mtime: 791231220,
                          },
                      )]
                      .iter()
                      .cloned()
                      .collect();
                      let mut state_map = expected_state_map.clone();
                      let mut copymap = FastHashMap::default();
                      copymap.insert(
                          HgPathBuf::from_bytes(b"f1"),
                          HgPathBuf::from_bytes(b"copyname"),
                      );
                      let parents = DirstateParents {
                          p1: *b"12345678910111213141",
                          p2: *b"00000000000000000000",
                      };
                      let now = Duration::new(15000000, 0);
                      let expected = [
 , 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
 , 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
 , 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
 , 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
 , 101,
                      ]
                      .to_vec();
                      assert_eq!(
                          expected,
                          pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
                      );
                      assert_eq!(expected_state_map, state_map);
                  }
                  #[test]
                  fn test_parse_pack_one_entry_with_copy() {
                      let mut state_map: StateMap = [(
                          HgPathBuf::from_bytes(b"f1"),
                          DirstateEntry {
                              state: EntryState::Normal,
                              mode: 0o644,
                              size: 0,
                              mtime: 791231220,
                          },
                      )]
                      .iter()
                      .cloned()
                      .collect();
                      let mut copymap = FastHashMap::default();
                      copymap.insert(
                          HgPathBuf::from_bytes(b"f1"),
                          HgPathBuf::from_bytes(b"copyname"),
                      );
                      let parents = DirstateParents {
                          p1: *b"12345678910111213141",
                          p2: *b"00000000000000000000",
                      };
                      let now = Duration::new(15000000, 0);
                      let result =
                          pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                              .unwrap();
                      let mut new_state_map: StateMap = FastHashMap::default();
                      let mut new_copy_map: CopyMap = FastHashMap::default();
                      let new_parents = parse_dirstate(
                          &mut new_state_map,
                          &mut new_copy_map,
                          result.as_slice(),
                      )
                      .unwrap();
                      assert_eq!(
                          (parents, state_map, copymap),
                          (new_parents, new_state_map, new_copy_map)
                      )
                  }
                  #[test]
                  fn test_parse_pack_multiple_entries_with_copy() {
                      let mut state_map: StateMap = [
                          (
                              HgPathBuf::from_bytes(b"f1"),
                              DirstateEntry {
                                  state: EntryState::Normal,
                                  mode: 0o644,
                                  size: 0,
                                  mtime: 791231220,
                              },
                          ),
                          (
                              HgPathBuf::from_bytes(b"f2"),
                              DirstateEntry {
                                  state: EntryState::Merged,
                                  mode: 0o777,
                                  size: 1000,
                                  mtime: 791231220,
                              },
                          ),
                          (
                              HgPathBuf::from_bytes(b"f3"),
                              DirstateEntry {
                                  state: EntryState::Removed,
                                  mode: 0o644,
                                  size: 234553,
                                  mtime: 791231220,
                              },
                          ),
                          (
                              HgPathBuf::from_bytes(b"f4\xF6"),
                              DirstateEntry {
                                  state: EntryState::Added,
                                  mode: 0o644,
                                  size: -1,
                                  mtime: -1,
                              },
                          ),
                      ]
                      .iter()
                      .cloned()
                      .collect();
                      let mut copymap = FastHashMap::default();
                      copymap.insert(
                          HgPathBuf::from_bytes(b"f1"),
                          HgPathBuf::from_bytes(b"copyname"),
                      );
                      copymap.insert(
                          HgPathBuf::from_bytes(b"f4\xF6"),
                          HgPathBuf::from_bytes(b"copyname2"),
                      );
                      let parents = DirstateParents {
                          p1: *b"12345678910111213141",
                          p2: *b"00000000000000000000",
                      };
                      let now = Duration::new(15000000, 0);
                      let result =
                          pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                              .unwrap();
                      let mut new_state_map: StateMap = FastHashMap::default();
                      let mut new_copy_map: CopyMap = FastHashMap::default();
                      let new_parents = parse_dirstate(
                          &mut new_state_map,
                          &mut new_copy_map,
                          result.as_slice(),
                      )
                      .unwrap();
                      assert_eq!(
                          (parents, state_map, copymap),
                          (new_parents, new_state_map, new_copy_map)
                      )
                  }
                  #[test]
                  /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
                  fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
                      let mut state_map: StateMap = [(
                          HgPathBuf::from_bytes(b"f1"),
                          DirstateEntry {
                              state: EntryState::Normal,
                              mode: 0o644,
                              size: 0,
                              mtime: 15000000,
                          },
                      )]
                      .iter()
                      .cloned()
                      .collect();
                      let mut copymap = FastHashMap::default();
                      copymap.insert(
                          HgPathBuf::from_bytes(b"f1"),
                          HgPathBuf::from_bytes(b"copyname"),
                      );
                      let parents = DirstateParents {
                          p1: *b"12345678910111213141",
                          p2: *b"00000000000000000000",
                      };
                      let now = Duration::new(15000000, 0);
                      let result =
                          pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                              .unwrap();
                      let mut new_state_map: StateMap = FastHashMap::default();
                      let mut new_copy_map: CopyMap = FastHashMap::default();
                      let new_parents = parse_dirstate(
                          &mut new_state_map,
                          &mut new_copy_map,
                          result.as_slice(),
                      )
                      .unwrap();
                      assert_eq!(
                          (
                              parents,
                              [(
                                  HgPathBuf::from_bytes(b"f1"),
                                  DirstateEntry {
                                      state: EntryState::Normal,
                                      mode: 0o644,
                                      size: 0,
                                      mtime: -1
                                  }
                              )]
                              .iter()
                              .cloned()
                              .collect::<StateMap>(),
                              copymap,
                          ),
                          (new_parents, new_state_map, new_copy_map)
                      )
                  }
              }

rust/hg-core/src/dirstate/status.rs

0 +35 -38

              // status.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Rust implementation of dirstate.status (dirstate.py).
              //! It is currently missing a lot of functionality compared to the Python one
              //! and will only be triggered in narrow cases.
              use crate::{
                  dirstate::SIZE_FROM_OTHER_PARENT,
                  filepatterns::PatternFileWarning,
                  matchers::{get_ignore_function, Matcher, VisitChildrenSet},
                  utils::{
                      files::{find_dirs, HgMetadata},
                      hg_path::{
                          hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
                          HgPathError,
                      },
                      path_auditor::PathAuditor,
                  },
                  CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
                  PatternError,
              };
              use lazy_static::lazy_static;
              use micro_timer::timed;
              use rayon::prelude::*;
              use std::{
                  borrow::Cow,
                  collections::HashSet,
                  fs::{read_dir, DirEntry},
                  io::ErrorKind,
                  ops::Deref,
                  path::{Path, PathBuf},
              };
              /// Wrong type of file from a `BadMatch`
              /// Note: a lot of those don't exist on all platforms.
              #[derive(Debug, Copy, Clone)]
              pub enum BadType {
                  CharacterDevice,
                  BlockDevice,
                  FIFO,
                  Socket,
                  Directory,
                  Unknown,
              }
              impl ToString for BadType {
                  fn to_string(&self) -> String {
                      match self {
                          BadType::CharacterDevice => "character device",
                          BadType::BlockDevice => "block device",
                          BadType::FIFO => "fifo",
                          BadType::Socket => "socket",
                          BadType::Directory => "directory",
                          BadType::Unknown => "unknown",
                      }
                      .to_string()
                  }
              }
              /// Was explicitly matched but cannot be found/accessed
              #[derive(Debug, Copy, Clone)]
              pub enum BadMatch {
                  OsError(i32),
                  BadType(BadType),
              }
              /// Marker enum used to dispatch new status entries into the right collections.
              /// Is similar to `crate::EntryState`, but represents the transient state of
              /// entries during the lifetime of a command.
              #[derive(Debug, Copy, Clone)]
              enum Dispatch {
                  Unsure,
                  Modified,
                  Added,
                  Removed,
                  Deleted,
                  Clean,
                  Unknown,
                  Ignored,
                  /// Empty dispatch, the file is not worth listing
                  None,
                  /// Was explicitly matched but cannot be found/accessed
                  Bad(BadMatch),
                  Directory {
                      /// True if the directory used to be a file in the dmap so we can say
                      /// that it's been removed.
                      was_file: bool,
                  },
              }
              type IoResult<T> = std::io::Result<T>;
              /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
              /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
              type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
              /// Dates and times that are outside the 31-bit signed range are compared
              /// modulo 2^31. This should prevent hg from behaving badly with very large
              /// files or corrupt dates while still having a high probability of detecting
              /// changes. (issue2608)
              /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
              /// is not defined for `i32`, and there is no `As` trait. This forces the
              /// caller to cast `b` as `i32`.
              fn mod_compare(a: i32, b: i32) -> bool {
                  a & i32::max_value() != b & i32::max_value()
              }
              /// Return a sorted list containing information about the entries
              /// in the directory.
              ///
              /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
              fn list_directory(
                  path: impl AsRef<Path>,
                  skip_dot_hg: bool,
              ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
                  let mut results = vec![];
                  let entries = read_dir(path.as_ref())?;
                  for entry in entries {
                      let entry = entry?;
                      let filename = os_string_to_hg_path_buf(entry.file_name())?;
                      let file_type = entry.file_type()?;
                      if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
                          return Ok(vec![]);
                      } else {
-                         results.push((HgPathBuf::from(filename), entry))
+                         results.push((filename, entry))
                      }
                  }
                  results.sort_unstable_by_key(|e| e.0.clone());
                  Ok(results)
              }
              /// The file corresponding to the dirstate entry was found on the filesystem.
              fn dispatch_found(
                  filename: impl AsRef<HgPath>,
                  entry: DirstateEntry,
                  metadata: HgMetadata,
                  copy_map: &CopyMap,
                  options: StatusOptions,
              ) -> Dispatch {
                  let DirstateEntry {
                      state,
                      mode,
                      mtime,
                      size,
                  } = entry;
                  let HgMetadata {
                      st_mode,
                      st_size,
                      st_mtime,
                      ..
                  } = metadata;
                  match state {
                      EntryState::Normal => {
                          let size_changed = mod_compare(size, st_size as i32);
                          let mode_changed =
                              (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
                          let metadata_changed = size >= 0 && (size_changed || mode_changed);
                          let other_parent = size == SIZE_FROM_OTHER_PARENT;
                          if metadata_changed
                              || other_parent
                              || copy_map.contains_key(filename.as_ref())
                          {
                              Dispatch::Modified
-                         } else if mod_compare(mtime, st_mtime as i32) {
-                             Dispatch::Unsure
-                         } else if st_mtime == options.last_normal_time {
+                         } else if mod_compare(mtime, st_mtime as i32)
+                             || st_mtime == options.last_normal_time
+                         {
                              // the file may have just been marked as normal and
                              // it may have changed in the same second without
                              // changing its size. This can happen if we quickly
                              // do multiple commits. Force lookup, so we don't
                              // miss such a racy file change.
                              Dispatch::Unsure
                          } else if options.list_clean {
                              Dispatch::Clean
                          } else {
                              Dispatch::None
                          }
                      }
                      EntryState::Merged => Dispatch::Modified,
                      EntryState::Added => Dispatch::Added,
                      EntryState::Removed => Dispatch::Removed,
                      EntryState::Unknown => Dispatch::Unknown,
                  }
              }
              /// The file corresponding to this Dirstate entry is missing.
              fn dispatch_missing(state: EntryState) -> Dispatch {
                  match state {
                      // File was removed from the filesystem during commands
                      EntryState::Normal | EntryState::Merged | EntryState::Added => {
                          Dispatch::Deleted
                      }
                      // File was removed, everything is normal
                      EntryState::Removed => Dispatch::Removed,
                      // File is unknown to Mercurial, everything is normal
                      EntryState::Unknown => Dispatch::Unknown,
                  }
              }
              lazy_static! {
                  static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
                      let mut h = HashSet::new();
                      h.insert(HgPath::new(b""));
                      h
                  };
              }
              /// Get stat data about the files explicitly specified by match.
              /// TODO subrepos
              #[timed]
              fn walk_explicit<'a>(
                  files: Option<&'a HashSet<&HgPath>>,
                  dmap: &'a DirstateMap,
                  root_dir: impl AsRef<Path> + Sync + Send + 'a,
                  options: StatusOptions,
                  traversed_sender: crossbeam::Sender<HgPathBuf>,
              ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
                  files
                      .unwrap_or(&DEFAULT_WORK)
                      .par_iter()
-                     .map(move |filename| {
+                     .map(move |&filename| {
                          // TODO normalization
-                         let normalized = filename.as_ref();
+                         let normalized = filename;
                          let buf = match hg_path_to_path_buf(normalized) {
                              Ok(x) => x,
                              Err(e) => return Some(Err(e.into())),
                          };
                          let target = root_dir.as_ref().join(buf);
                          let st = target.symlink_metadata();
                          let in_dmap = dmap.get(normalized);
                          match st {
                              Ok(meta) => {
                                  let file_type = meta.file_type();
                                  return if file_type.is_file() || file_type.is_symlink() {
                                      if let Some(entry) = in_dmap {
                                          return Some(Ok((
                                              normalized,
                                              dispatch_found(
                                                  &normalized,
                                                  *entry,
                                                  HgMetadata::from_metadata(meta),
                                                  &dmap.copy_map,
                                                  options,
                                              ),
                                          )));
                                      }
                                      Some(Ok((normalized, Dispatch::Unknown)))
+                                 } else if file_type.is_dir() {
+                                     if options.collect_traversed_dirs {
+                                         traversed_sender
+                                             .send(normalized.to_owned())
+                                             .expect("receiver should outlive sender");
+                                     }
+                                     Some(Ok((
+                                         normalized,
+                                         Dispatch::Directory {
+                                             was_file: in_dmap.is_some(),
+                                         },
+                                     )))
                                  } else {
-                                     if file_type.is_dir() {
-                                         if options.collect_traversed_dirs {
-                                             traversed_sender
-                                                 .send(normalized.to_owned())
-                                                 .expect("receiver should outlive sender");
+                                         }
-                                         Some(Ok((
-                                             normalized,
-                                             Dispatch::Directory {
-                                                 was_file: in_dmap.is_some(),
-                                             },
-                                         )))
-                                     } else {
-                                         Some(Ok((
-                                             normalized,
-                                             Dispatch::Bad(BadMatch::BadType(
-                                                 // TODO do more than unknown
-                                                 // Support for all `BadType` variant
-                                                 // varies greatly between platforms.
-                                                 // So far, no tests check the type and
-                                                 // this should be good enough for most
-                                                 // users.
-                                                 BadType::Unknown,
-                                             )),
-                                         )))
+                                     }
+                                     Some(Ok((
+                                         normalized,
+                                         Dispatch::Bad(BadMatch::BadType(
+                                             // TODO do more than unknown
+                                             // Support for all `BadType` variant
+                                             // varies greatly between platforms.
+                                             // So far, no tests check the type and
+                                             // this should be good enough for most
+                                             // users.
+                                             BadType::Unknown,
+                                         )),
+                                     )))
                                  };
                              }
                              Err(_) => {
                                  if let Some(entry) = in_dmap {
                                      return Some(Ok((
                                          normalized,
                                          dispatch_missing(entry.state),
                                      )));
                                  }
                              }
                          };
                          None
                      })
                      .flatten()
              }
              #[derive(Debug, Copy, Clone)]
              pub struct StatusOptions {
                  /// Remember the most recent modification timeslot for status, to make
                  /// sure we won't miss future size-preserving file content modifications
                  /// that happen within the same timeslot.
                  pub last_normal_time: i64,
                  /// Whether we are on a filesystem with UNIX-like exec flags
                  pub check_exec: bool,
                  pub list_clean: bool,
                  pub list_unknown: bool,
                  pub list_ignored: bool,
                  /// Whether to collect traversed dirs for applying a callback later.
                  /// Used by `hg purge` for example.
                  pub collect_traversed_dirs: bool,
              }
              /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
              /// If the entry is a folder that needs to be traversed, it will be handled
              /// in a separate thread.
              fn handle_traversed_entry<'a>(
                  scope: &rayon::Scope<'a>,
                  files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
                  matcher: &'a (impl Matcher + Sync),
                  root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
                  dmap: &'a DirstateMap,
                  old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
                  ignore_fn: &'a IgnoreFnType,
                  dir_ignore_fn: &'a IgnoreFnType,
                  options: StatusOptions,
                  filename: HgPathBuf,
                  dir_entry: DirEntry,
                  traversed_sender: crossbeam::Sender<HgPathBuf>,
              ) -> IoResult<()> {
                  let file_type = dir_entry.file_type()?;
                  let entry_option = dmap.get(&filename);
                  if filename.as_bytes() == b".hg" {
                      // Could be a directory or a symlink
                      return Ok(());
                  }
                  if file_type.is_dir() {
                      handle_traversed_dir(
                          scope,
                          files_sender,
                          matcher,
                          root_dir,
                          dmap,
                          old_results,
                          ignore_fn,
                          dir_ignore_fn,
                          options,
                          entry_option,
                          filename,
                          traversed_sender,
                      );
                  } else if file_type.is_file() || file_type.is_symlink() {
                      if let Some(entry) = entry_option {
                          if matcher.matches_everything() || matcher.matches(&filename) {
                              let metadata = dir_entry.metadata()?;
                              files_sender
                                  .send(Ok((
                                      filename.to_owned(),
                                      dispatch_found(
                                          &filename,
                                          *entry,
                                          HgMetadata::from_metadata(metadata),
                                          &dmap.copy_map,
                                          options,
                                      ),
                                  )))
                                  .unwrap();
                          }
                      } else if (matcher.matches_everything() || matcher.matches(&filename))
                          && !ignore_fn(&filename)
                      {
                          if (options.list_ignored || matcher.exact_match(&filename))
                              && dir_ignore_fn(&filename)
                          {
                              if options.list_ignored {
                                  files_sender
                                      .send(Ok((filename.to_owned(), Dispatch::Ignored)))
                                      .unwrap();
                              }
-                         } else {
-                             if options.list_unknown {
-                                 files_sender
-                                     .send(Ok((filename.to_owned(), Dispatch::Unknown)))
-                                     .unwrap();
+                             }
+                         } else if options.list_unknown {
+                             files_sender
+                                 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
+                                 .unwrap();
                          }
                      } else if ignore_fn(&filename) && options.list_ignored {
                          files_sender
                              .send(Ok((filename.to_owned(), Dispatch::Ignored)))
                              .unwrap();
                      }
                  } else if let Some(entry) = entry_option {
                      // Used to be a file or a folder, now something else.
                      if matcher.matches_everything() || matcher.matches(&filename) {
                          files_sender
                              .send(Ok((filename.to_owned(), dispatch_missing(entry.state))))
                              .unwrap();
                      }
                  }
                  Ok(())
              }
              /// A directory was found in the filesystem and needs to be traversed
              fn handle_traversed_dir<'a>(
                  scope: &rayon::Scope<'a>,
                  files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
                  matcher: &'a (impl Matcher + Sync),
                  root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
                  dmap: &'a DirstateMap,
                  old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
                  ignore_fn: &'a IgnoreFnType,
                  dir_ignore_fn: &'a IgnoreFnType,
                  options: StatusOptions,
                  entry_option: Option<&'a DirstateEntry>,
                  directory: HgPathBuf,
                  traversed_sender: crossbeam::Sender<HgPathBuf>,
              ) {
                  scope.spawn(move |_| {
                      // Nested `if` until `rust-lang/rust#53668` is stable
                      if let Some(entry) = entry_option {
                          // Used to be a file, is now a folder
                          if matcher.matches_everything() || matcher.matches(&directory) {
                              files_sender
                                  .send(Ok((
                                      directory.to_owned(),
                                      dispatch_missing(entry.state),
                                  )))
                                  .unwrap();
                          }
                      }
                      // Do we need to traverse it?
                      if !ignore_fn(&directory) || options.list_ignored {
                          traverse_dir(
                              files_sender,
                              matcher,
                              root_dir,
                              dmap,
                              directory,
                              &old_results,
                              ignore_fn,
                              dir_ignore_fn,
                              options,
                              traversed_sender,
                          )
                          .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
                      }
                  });
              }
              /// Decides whether the directory needs to be listed, and if so handles the
              /// entries in a separate thread.
              fn traverse_dir<'a>(
                  files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
                  matcher: &'a (impl Matcher + Sync),
                  root_dir: impl AsRef<Path> + Sync + Send + Copy,
                  dmap: &'a DirstateMap,
                  directory: impl AsRef<HgPath>,
                  old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
                  ignore_fn: &IgnoreFnType,
                  dir_ignore_fn: &IgnoreFnType,
                  options: StatusOptions,
                  traversed_sender: crossbeam::Sender<HgPathBuf>,
              ) -> IoResult<()> {
                  let directory = directory.as_ref();
                  if options.collect_traversed_dirs {
                      traversed_sender
                          .send(directory.to_owned())
                          .expect("receiver should outlive sender");
                  }
                  let visit_entries = match matcher.visit_children_set(directory) {
                      VisitChildrenSet::Empty => return Ok(()),
                      VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
                      VisitChildrenSet::Set(set) => Some(set),
                  };
                  let buf = hg_path_to_path_buf(directory)?;
                  let dir_path = root_dir.as_ref().join(buf);
                  let skip_dot_hg = !directory.as_bytes().is_empty();
                  let entries = match list_directory(dir_path, skip_dot_hg) {
                      Err(e) => match e.kind() {
                          ErrorKind::NotFound | ErrorKind::PermissionDenied => {
                              files_sender
                                  .send(Ok((
                                      directory.to_owned(),
                                      Dispatch::Bad(BadMatch::OsError(
                                          // Unwrapping here is OK because the error always
                                          // is a real os error
                                          e.raw_os_error().unwrap(),
                                      )),
                                  )))
                                  .unwrap();
                              return Ok(());
                          }
                          _ => return Err(e),
                      },
                      Ok(entries) => entries,
                  };
                  rayon::scope(|scope| -> IoResult<()> {
                      for (filename, dir_entry) in entries {
                          if let Some(ref set) = visit_entries {
                              if !set.contains(filename.deref()) {
                                  continue;
                              }
                          }
                          // TODO normalize
                          let filename = if directory.is_empty() {
                              filename.to_owned()
                          } else {
                              directory.join(&filename)
                          };
                          if !old_results.contains_key(filename.deref()) {
                              handle_traversed_entry(
                                  scope,
                                  files_sender,
                                  matcher,
                                  root_dir,
                                  dmap,
                                  old_results,
                                  ignore_fn,
                                  dir_ignore_fn,
                                  options,
                                  filename,
                                  dir_entry,
                                  traversed_sender.clone(),
                              )?;
                          }
                      }
                      Ok(())
                  })
              }
              /// Walk the working directory recursively to look for changes compared to the
              /// current `DirstateMap`.
              ///
              /// This takes a mutable reference to the results to account for the `extend`
              /// in timings
              #[timed]
              fn traverse<'a>(
                  matcher: &'a (impl Matcher + Sync),
                  root_dir: impl AsRef<Path> + Sync + Send + Copy,
                  dmap: &'a DirstateMap,
                  path: impl AsRef<HgPath>,
                  old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
                  ignore_fn: &IgnoreFnType,
                  dir_ignore_fn: &IgnoreFnType,
                  options: StatusOptions,
                  results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
                  traversed_sender: crossbeam::Sender<HgPathBuf>,
              ) -> IoResult<()> {
                  let root_dir = root_dir.as_ref();
                  // The traversal is done in parallel, so use a channel to gather entries.
                  // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
                  let (files_transmitter, files_receiver) = crossbeam::channel::unbounded();
                  traverse_dir(
                      &files_transmitter,
                      matcher,
                      root_dir,
                      &dmap,
                      path,
                      &old_results,
                      &ignore_fn,
                      &dir_ignore_fn,
                      options,
                      traversed_sender,
                  )?;
                  // Disconnect the channel so the receiver stops waiting
                  drop(files_transmitter);
                  // TODO don't collect. Find a way of replicating the behavior of
                  // `itertools::process_results`, but for `rayon::ParallelIterator`
                  let new_results: IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> =
                      files_receiver
                          .into_iter()
                          .map(|item| {
                              let (f, d) = item?;
                              Ok((Cow::Owned(f), d))
                          })
                          .collect();
                  results.par_extend(new_results?);
                  Ok(())
              }
              /// Stat all entries in the `DirstateMap` and mark them for dispatch.
              fn stat_dmap_entries(
                  dmap: &DirstateMap,
                  root_dir: impl AsRef<Path> + Sync + Send,
                  options: StatusOptions,
              ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
                  dmap.par_iter().map(move |(filename, entry)| {
                      let filename: &HgPath = filename;
                      let filename_as_path = hg_path_to_path_buf(filename)?;
                      let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
                      match meta {
                          Ok(ref m)
                              if !(m.file_type().is_file()
                                  || m.file_type().is_symlink()) =>
                          {
                              Ok((filename, dispatch_missing(entry.state)))
                          }
                          Ok(m) => Ok((
                              filename,
                              dispatch_found(
                                  filename,
                                  *entry,
                                  HgMetadata::from_metadata(m),
                                  &dmap.copy_map,
                                  options,
                              ),
                          )),
                          Err(ref e)
                              if e.kind() == ErrorKind::NotFound
                                  || e.raw_os_error() == Some(20) =>
                          {
                              // Rust does not yet have an `ErrorKind` for
                              // `NotADirectory` (errno 20)
                              // It happens if the dirstate contains `foo/bar` and
                              // foo is not a directory
                              Ok((filename, dispatch_missing(entry.state)))
                          }
                          Err(e) => Err(e),
                      }
                  })
              }
              /// This takes a mutable reference to the results to account for the `extend`
              /// in timings
              #[timed]
              fn extend_from_dmap<'a>(
                  dmap: &'a DirstateMap,
                  root_dir: impl AsRef<Path> + Sync + Send,
                  options: StatusOptions,
                  results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
              ) {
                  results.par_extend(
                      stat_dmap_entries(dmap, root_dir, options)
                          .flatten()
                          .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)),
                  );
              }
              #[derive(Debug)]
              pub struct DirstateStatus<'a> {
                  pub modified: Vec<Cow<'a, HgPath>>,
                  pub added: Vec<Cow<'a, HgPath>>,
                  pub removed: Vec<Cow<'a, HgPath>>,
                  pub deleted: Vec<Cow<'a, HgPath>>,
                  pub clean: Vec<Cow<'a, HgPath>>,
                  pub ignored: Vec<Cow<'a, HgPath>>,
                  pub unknown: Vec<Cow<'a, HgPath>>,
                  pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
                  /// Only filled if `collect_traversed_dirs` is `true`
                  pub traversed: Vec<HgPathBuf>,
              }
              #[timed]
              fn build_response<'a>(
                  results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
                  traversed: Vec<HgPathBuf>,
              ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
                  let mut lookup = vec![];
                  let mut modified = vec![];
                  let mut added = vec![];
                  let mut removed = vec![];
                  let mut deleted = vec![];
                  let mut clean = vec![];
                  let mut ignored = vec![];
                  let mut unknown = vec![];
                  let mut bad = vec![];
                  for (filename, dispatch) in results.into_iter() {
                      match dispatch {
                          Dispatch::Unknown => unknown.push(filename),
                          Dispatch::Unsure => lookup.push(filename),
                          Dispatch::Modified => modified.push(filename),
                          Dispatch::Added => added.push(filename),
                          Dispatch::Removed => removed.push(filename),
                          Dispatch::Deleted => deleted.push(filename),
                          Dispatch::Clean => clean.push(filename),
                          Dispatch::Ignored => ignored.push(filename),
                          Dispatch::None => {}
                          Dispatch::Bad(reason) => bad.push((filename, reason)),
                          Dispatch::Directory { .. } => {}
                      }
                  }
                  (
                      lookup,
                      DirstateStatus {
                          modified,
                          added,
                          removed,
                          deleted,
                          clean,
                          ignored,
                          unknown,
                          bad,
                          traversed,
                      },
                  )
              }
              #[derive(Debug)]
              pub enum StatusError {
                  IO(std::io::Error),
                  Path(HgPathError),
                  Pattern(PatternError),
              }
              pub type StatusResult<T> = Result<T, StatusError>;
              impl From<PatternError> for StatusError {
                  fn from(e: PatternError) -> Self {
                      StatusError::Pattern(e)
                  }
              }
              impl From<HgPathError> for StatusError {
                  fn from(e: HgPathError) -> Self {
                      StatusError::Path(e)
                  }
              }
              impl From<std::io::Error> for StatusError {
                  fn from(e: std::io::Error) -> Self {
                      StatusError::IO(e)
                  }
              }
              impl ToString for StatusError {
                  fn to_string(&self) -> String {
                      match self {
                          StatusError::IO(e) => e.to_string(),
                          StatusError::Path(e) => e.to_string(),
                          StatusError::Pattern(e) => e.to_string(),
                      }
                  }
              }
              /// This takes a mutable reference to the results to account for the `extend`
              /// in timings
              #[timed]
              fn handle_unknowns<'a>(
                  dmap: &'a DirstateMap,
                  matcher: &(impl Matcher + Sync),
                  root_dir: impl AsRef<Path> + Sync + Send + Copy,
                  options: StatusOptions,
                  results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
              ) -> IoResult<()> {
                  let to_visit: Vec<(&HgPath, &DirstateEntry)> = if results.is_empty()
                      && matcher.matches_everything()
                  {
                      dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
                  } else {
                      // Only convert to a hashmap if needed.
                      let old_results: FastHashMap<_, _> = results.iter().cloned().collect();
                      dmap.iter()
                          .filter_map(move |(f, e)| {
                              if !old_results.contains_key(f.deref()) && matcher.matches(f) {
                                  Some((f.deref(), e))
                              } else {
                                  None
                              }
                          })
                          .collect()
                  };
                  // We walked all dirs under the roots that weren't ignored, and
                  // everything that matched was stat'ed and is already in results.
                  // The rest must thus be ignored or under a symlink.
                  let path_auditor = PathAuditor::new(root_dir);
                  // TODO don't collect. Find a way of replicating the behavior of
                  // `itertools::process_results`, but for `rayon::ParallelIterator`
                  let new_results: IoResult<Vec<_>> = to_visit
                      .into_par_iter()
                      .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
                          // Report ignored items in the dmap as long as they are not
                          // under a symlink directory.
                          if path_auditor.check(filename) {
                              // TODO normalize for case-insensitive filesystems
                              let buf = match hg_path_to_path_buf(filename) {
                                  Ok(x) => x,
                                  Err(e) => return Some(Err(e.into())),
                              };
                              Some(Ok((
                                  Cow::Borrowed(filename),
                                  match root_dir.as_ref().join(&buf).symlink_metadata() {
                                      // File was just ignored, no links, and exists
                                      Ok(meta) => {
                                          let metadata = HgMetadata::from_metadata(meta);
                                          dispatch_found(
                                              filename,
                                              *entry,
                                              metadata,
                                              &dmap.copy_map,
                                              options,
                                          )
                                      }
                                      // File doesn't exist
                                      Err(_) => dispatch_missing(entry.state),
                                  },
                              )))
                          } else {
                              // It's either missing or under a symlink directory which
                              // we, in this case, report as missing.
                              Some(Ok((
                                  Cow::Borrowed(filename),
                                  dispatch_missing(entry.state),
                              )))
                          }
                      })
                      .collect();
                  results.par_extend(new_results?);
                  Ok(())
              }
              /// Get the status of files in the working directory.
              ///
              /// This is the current entry-point for `hg-core` and is realistically unusable
              /// outside of a Python context because its arguments need to provide a lot of
              /// information that will not be necessary in the future.
              #[timed]
              pub fn status<'a: 'c, 'b: 'c, 'c>(
                  dmap: &'a DirstateMap,
                  matcher: &'b (impl Matcher + Sync),
                  root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
                  ignore_files: Vec<PathBuf>,
                  options: StatusOptions,
              ) -> StatusResult<(
                  (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
                  Vec<PatternFileWarning>,
              )> {
                  // Needs to outlive `dir_ignore_fn` since it's captured.
                  let ignore_fn: IgnoreFnType;
                  // Only involve real ignore mechanism if we're listing unknowns or ignored.
                  let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored
                      || options.list_unknown
                  {
                      let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?;
                      ignore_fn = ignore;
                      let dir_ignore_fn = Box::new(|dir: &_| {
                          // Is the path or one of its ancestors ignored?
                          if ignore_fn(dir) {
                              true
                          } else {
                              for p in find_dirs(dir) {
                                  if ignore_fn(p) {
                                      return true;
                                  }
                              }
                              false
                          }
                      });
                      (dir_ignore_fn, warnings)
                  } else {
                      ignore_fn = Box::new(|&_| true);
                      (Box::new(|&_| true), vec![])
                  };
                  let files = matcher.file_set();
                  // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
                  let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
                  // Step 1: check the files explicitly mentioned by the user
                  let explicit = walk_explicit(
                      files,
                      &dmap,
                      root_dir,
                      options,
                      traversed_sender.clone(),
                  );
                  // Collect results into a `Vec` because we do very few lookups in most
                  // cases.
                  let (work, mut results): (Vec<_>, Vec<_>) = explicit
                      .filter_map(Result::ok)
                      .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
                      .partition(|(_, dispatch)| match dispatch {
                          Dispatch::Directory { .. } => true,
                          _ => false,
                      });
                  if !work.is_empty() {
                      // Hashmaps are quite a bit slower to build than vecs, so only build it
                      // if needed.
                      let old_results = results.iter().cloned().collect();
                      // Step 2: recursively check the working directory for changes if
                      // needed
                      for (dir, dispatch) in work {
                          match dispatch {
                              Dispatch::Directory { was_file } => {
                                  if was_file {
                                      results.push((dir.to_owned(), Dispatch::Removed));
                                  }
                                  if options.list_ignored
                                      || options.list_unknown && !dir_ignore_fn(&dir)
                                  {
                                      traverse(
                                          matcher,
                                          root_dir,
                                          &dmap,
                                          &dir,
                                          &old_results,
                                          &ignore_fn,
                                          &dir_ignore_fn,
                                          options,
                                          &mut results,
                                          traversed_sender.clone(),
                                      )?;
                                  }
                              }
                              _ => unreachable!("There can only be directories in `work`"),
                          }
                      }
                  }
                  if !matcher.is_exact() {
                      // Step 3: Check the remaining files from the dmap.
                      // If a dmap file is not in results yet, it was either
                      // a) not matched b) ignored, c) missing, or d) under a
                      // symlink directory.
                      if options.list_unknown {
                          handle_unknowns(dmap, matcher, root_dir, options, &mut results)?;
                      } else {
                          // We may not have walked the full directory tree above, so stat
                          // and check everything we missed.
                          extend_from_dmap(&dmap, root_dir, options, &mut results);
                      }
                  }
                  // Close the channel
                  drop(traversed_sender);
                  let traversed_dirs = traversed_recv.into_iter().collect();
                  Ok((build_response(results, traversed_dirs), warnings))
              }

rust/hg-core/src/discovery.rs

0 +5 -5

              // discovery.rs
              //
              // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Discovery operations
              //!
              //! This is a Rust counterpart to the `partialdiscovery` class of
              //! `mercurial.setdiscovery`
              use super::{Graph, GraphError, Revision, NULL_REVISION};
              use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
              use rand::seq::SliceRandom;
              use rand::{thread_rng, RngCore, SeedableRng};
              use std::cmp::{max, min};
              use std::collections::{HashSet, VecDeque};
              type Rng = rand_pcg::Pcg32;
              type Seed = [u8; 16];
              pub struct PartialDiscovery<G: Graph + Clone> {
                  target_heads: Option<Vec<Revision>>,
                  graph: G, // plays the role of self._repo
                  common: MissingAncestors<G>,
                  undecided: Option<HashSet<Revision>>,
                  children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
                  missing: HashSet<Revision>,
                  rng: Rng,
                  respect_size: bool,
                  randomize: bool,
              }
              pub struct DiscoveryStats {
                  pub undecided: Option<usize>,
              }
              /// Update an existing sample to match the expected size
              ///
              /// The sample is updated with revisions exponentially distant from each
              /// element of `heads`.
              ///
              /// If a target size is specified, the sampling will stop once this size is
              /// reached. Otherwise sampling will happen until roots of the <revs> set are
              /// reached.
              ///
              /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
              ///   represented by `parentfn`
              /// - `heads`: set of DAG head revs
              /// - `sample`: a sample to update
              /// - `parentfn`: a callable to resolve parents for a revision
              /// - `quicksamplesize`: optional target size of the sample
              fn update_sample<I>(
                  revs: Option<&HashSet<Revision>>,
                  heads: impl IntoIterator<Item = Revision>,
                  sample: &mut HashSet<Revision>,
                  parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                  quicksamplesize: Option<usize>,
              ) -> Result<(), GraphError>
              where
                  I: Iterator<Item = Revision>,
              {
                  let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
                  let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                  let mut factor: u32 = 1;
                  let mut seen: HashSet<Revision> = HashSet::new();
                  while let Some(current) = visit.pop_front() {
                      if !seen.insert(current) {
                          continue;
                      }
                      let d = *distances.entry(current).or_insert(1);
                      if d > factor {
                          factor *= 2;
                      }
                      if d == factor {
                          sample.insert(current);
                          if let Some(sz) = quicksamplesize {
                              if sample.len() >= sz {
                                  return Ok(());
                              }
                          }
                      }
                      for p in parentsfn(current)? {
                          if let Some(revs) = revs {
                              if !revs.contains(&p) {
                                  continue;
                              }
                          }
                          distances.entry(p).or_insert(d + 1);
                          visit.push_back(p);
                      }
                  }
                  Ok(())
              }
              struct ParentsIterator {
                  parents: [Revision; 2],
                  cur: usize,
              }
              impl ParentsIterator {
                  fn graph_parents(
                      graph: &impl Graph,
                      r: Revision,
                  ) -> Result<ParentsIterator, GraphError> {
                      Ok(ParentsIterator {
                          parents: graph.parents(r)?,
                          cur: 0,
                      })
                  }
              }
              impl Iterator for ParentsIterator {
                  type Item = Revision;
                  fn next(&mut self) -> Option<Revision> {
                      if self.cur > 1 {
                          return None;
                      }
                      let rev = self.parents[self.cur];
                      self.cur += 1;
                      if rev == NULL_REVISION {
                          return self.next();
                      }
                      Some(rev)
                  }
              }
              impl<G: Graph + Clone> PartialDiscovery<G> {
                  /// Create a PartialDiscovery object, with the intent
                  /// of comparing our `::<target_heads>` revset to the contents of another
                  /// repo.
                  ///
                  /// For now `target_heads` is passed as a vector, and will be used
                  /// at the first call to `ensure_undecided()`.
                  ///
                  /// If we want to make the signature more flexible,
                  /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                  /// object since we'll keep it in the meanwhile
                  ///
                  /// The `respect_size` boolean controls how the sampling methods
                  /// will interpret the size argument requested by the caller. If it's
                  /// `false`, they are allowed to produce a sample whose size is more
                  /// appropriate to the situation (typically bigger).
                  ///
                  /// The `randomize` boolean affects sampling, and specifically how
                  /// limiting or last-minute expanding is been done:
                  ///
                  /// If `true`, both will perform random picking from `self.undecided`.
                  /// This is currently the best for actual discoveries.
                  ///
                  /// If `false`, a reproductible picking strategy is performed. This is
                  /// useful for integration tests.
                  pub fn new(
                      graph: G,
                      target_heads: Vec<Revision>,
                      respect_size: bool,
                      randomize: bool,
                  ) -> Self {
                      let mut seed = [0; 16];
                      if randomize {
                          thread_rng().fill_bytes(&mut seed);
                      }
                      Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                  }
                  pub fn new_with_seed(
                      graph: G,
                      target_heads: Vec<Revision>,
                      seed: Seed,
                      respect_size: bool,
                      randomize: bool,
                  ) -> Self {
                      PartialDiscovery {
                          undecided: None,
                          children_cache: None,
                          target_heads: Some(target_heads),
                          graph: graph.clone(),
                          common: MissingAncestors::new(graph, vec![]),
                          missing: HashSet::new(),
                          rng: Rng::from_seed(seed),
-                         respect_size: respect_size,
-                         randomize: randomize,
+                         respect_size,
+                         randomize,
                      }
                  }
                  /// Extract at most `size` random elements from sample and return them
                  /// as a vector
                  fn limit_sample(
                      &mut self,
                      mut sample: Vec<Revision>,
                      size: usize,
                  ) -> Vec<Revision> {
                      if !self.randomize {
                          sample.sort();
                          sample.truncate(size);
                          return sample;
                      }
                      let sample_len = sample.len();
                      if sample_len <= size {
                          return sample;
                      }
                      let rng = &mut self.rng;
                      let dropped_size = sample_len - size;
                      let limited_slice = if size < dropped_size {
                          sample.partial_shuffle(rng, size).0
                      } else {
                          sample.partial_shuffle(rng, dropped_size).1
                      };
                      limited_slice.to_owned()
                  }
                  /// Register revisions known as being common
                  pub fn add_common_revisions(
                      &mut self,
                      common: impl IntoIterator<Item = Revision>,
                  ) -> Result<(), GraphError> {
                      let before_len = self.common.get_bases().len();
                      self.common.add_bases(common);
                      if self.common.get_bases().len() == before_len {
                          return Ok(());
                      }
                      if let Some(ref mut undecided) = self.undecided {
                          self.common.remove_ancestors_from(undecided)?;
                      }
                      Ok(())
                  }
                  /// Register revisions known as being missing
                  ///
                  /// # Performance note
                  ///
                  /// Except in the most trivial case, the first call of this method has
                  /// the side effect of computing `self.undecided` set for the first time,
                  /// and the related caches it might need for efficiency of its internal
                  /// computation. This is typically faster if more information is
                  /// available in `self.common`. Therefore, for good performance, the
                  /// caller should avoid calling this too early.
                  pub fn add_missing_revisions(
                      &mut self,
                      missing: impl IntoIterator<Item = Revision>,
                  ) -> Result<(), GraphError> {
                      let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
                      if tovisit.is_empty() {
                          return Ok(());
                      }
                      self.ensure_children_cache()?;
                      self.ensure_undecided()?; // for safety of possible future refactors
                      let children = self.children_cache.as_ref().unwrap();
                      let mut seen: HashSet<Revision> = HashSet::new();
                      let undecided_mut = self.undecided.as_mut().unwrap();
                      while let Some(rev) = tovisit.pop_front() {
                          if !self.missing.insert(rev) {
                              // either it's known to be missing from a previous
                              // invocation, and there's no need to iterate on its
                              // children (we now they are all missing)
                              // or it's from a previous iteration of this loop
                              // and its children have already been queued
                              continue;
                          }
                          undecided_mut.remove(&rev);
                          match children.get(&rev) {
                              None => {
                                  continue;
                              }
                              Some(this_children) => {
                                  for child in this_children.iter().cloned() {
                                      if seen.insert(child) {
                                          tovisit.push_back(child);
                                      }
                                  }
                              }
                          }
                      }
                      Ok(())
                  }
                  /// Do we have any information about the peer?
                  pub fn has_info(&self) -> bool {
                      self.common.has_bases()
                  }
                  /// Did we acquire full knowledge of our Revisions that the peer has?
                  pub fn is_complete(&self) -> bool {
-                     self.undecided.as_ref().map_or(false, |s| s.is_empty())
+                     self.undecided.as_ref().map_or(false, HashSet::is_empty)
                  }
                  /// Return the heads of the currently known common set of revisions.
                  ///
                  /// If the discovery process is not complete (see `is_complete()`), the
                  /// caller must be aware that this is an intermediate state.
                  ///
                  /// On the other hand, if it is complete, then this is currently
                  /// the only way to retrieve the end results of the discovery process.
                  ///
                  /// We may introduce in the future an `into_common_heads` call that
                  /// would be more appropriate for normal Rust callers, dropping `self`
                  /// if it is complete.
                  pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                      self.common.bases_heads()
                  }
                  /// Force first computation of `self.undecided`
                  ///
                  /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                  /// unwrapped to get workable immutable or mutable references without
                  /// any panic.
                  ///
                  /// This is an imperative call instead of an access with added lazyness
                  /// to reduce easily the scope of mutable borrow for the caller,
                  /// compared to undecided(&'a mut self) -> &'a… that would keep it
                  /// as long as the resulting immutable one.
                  fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                      if self.undecided.is_some() {
                          return Ok(());
                      }
                      let tgt = self.target_heads.take().unwrap();
                      self.undecided =
                          Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                      Ok(())
                  }
                  fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                      if self.children_cache.is_some() {
                          return Ok(());
                      }
                      self.ensure_undecided()?;
                      let mut children: FastHashMap<Revision, Vec<Revision>> =
                          FastHashMap::default();
                      for &rev in self.undecided.as_ref().unwrap() {
                          for p in ParentsIterator::graph_parents(&self.graph, rev)? {
-                             children.entry(p).or_insert_with(|| Vec::new()).push(rev);
+                             children.entry(p).or_insert_with(Vec::new).push(rev);
                          }
                      }
                      self.children_cache = Some(children);
                      Ok(())
                  }
                  /// Provide statistics about the current state of the discovery process
                  pub fn stats(&self) -> DiscoveryStats {
                      DiscoveryStats {
-                         undecided: self.undecided.as_ref().map(|s| s.len()),
+                         undecided: self.undecided.as_ref().map(HashSet::len),
                      }
                  }
                  pub fn take_quick_sample(
                      &mut self,
                      headrevs: impl IntoIterator<Item = Revision>,
                      size: usize,
                  ) -> Result<Vec<Revision>, GraphError> {
                      self.ensure_undecided()?;
                      let mut sample = {
                          let undecided = self.undecided.as_ref().unwrap();
                          if undecided.len() <= size {
                              return Ok(undecided.iter().cloned().collect());
                          }
                          dagops::heads(&self.graph, undecided.iter())?
                      };
                      if sample.len() >= size {
                          return Ok(self.limit_sample(sample.into_iter().collect(), size));
                      }
                      update_sample(
                          None,
                          headrevs,
                          &mut sample,
                          |r| ParentsIterator::graph_parents(&self.graph, r),
                          Some(size),
                      )?;
                      Ok(sample.into_iter().collect())
                  }
                  /// Extract a sample from `self.undecided`, going from its heads and roots.
                  ///
                  /// The `size` parameter is used to avoid useless computations if
                  /// it turns out to be bigger than the whole set of undecided Revisions.
                  ///
                  /// The sample is taken by using `update_sample` from the heads, then
                  /// from the roots, working on the reverse DAG,
                  /// expressed by `self.children_cache`.
                  ///
                  /// No effort is being made to complete or limit the sample to `size`
                  /// but this method returns another interesting size that it derives
                  /// from its knowledge of the structure of the various sets, leaving
                  /// to the caller the decision to use it or not.
                  fn bidirectional_sample(
                      &mut self,
                      size: usize,
                  ) -> Result<(HashSet<Revision>, usize), GraphError> {
                      self.ensure_undecided()?;
                      {
                          // we don't want to compute children_cache before this
                          // but doing it after extracting self.undecided takes a mutable
                          // ref to self while a shareable one is still active.
                          let undecided = self.undecided.as_ref().unwrap();
                          if undecided.len() <= size {
                              return Ok((undecided.clone(), size));
                          }
                      }
                      self.ensure_children_cache()?;
                      let revs = self.undecided.as_ref().unwrap();
                      let mut sample: HashSet<Revision> = revs.clone();
                      // it's possible that leveraging the children cache would be more
                      // efficient here
                      dagops::retain_heads(&self.graph, &mut sample)?;
                      let revsheads = sample.clone(); // was again heads(revs) in python
                      // update from heads
                      update_sample(
                          Some(revs),
                          revsheads.iter().cloned(),
                          &mut sample,
                          |r| ParentsIterator::graph_parents(&self.graph, r),
                          None,
                      )?;
                      // update from roots
                      let revroots: HashSet<Revision> =
                          dagops::roots(&self.graph, revs)?.into_iter().collect();
                      let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                      let children = self.children_cache.as_ref().unwrap();
                      let empty_vec: Vec<Revision> = Vec::new();
                      update_sample(
                          Some(revs),
                          revroots,
                          &mut sample,
                          |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                          None,
                      )?;
                      Ok((sample, prescribed_size))
                  }
                  /// Fill up sample up to the wished size with random undecided Revisions.
                  ///
                  /// This is intended to be used as a last resort completion if the
                  /// regular sampling algorithm returns too few elements.
                  fn random_complete_sample(
                      &mut self,
                      sample: &mut Vec<Revision>,
                      size: usize,
                  ) {
                      let sample_len = sample.len();
                      if size <= sample_len {
                          return;
                      }
                      let take_from: Vec<Revision> = self
                          .undecided
                          .as_ref()
                          .unwrap()
                          .iter()
                          .filter(|&r| !sample.contains(r))
                          .cloned()
                          .collect();
                      sample.extend(self.limit_sample(take_from, size - sample_len));
                  }
                  pub fn take_full_sample(
                      &mut self,
                      size: usize,
                  ) -> Result<Vec<Revision>, GraphError> {
                      let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                      let size = if self.respect_size {
                          size
                      } else {
                          prescribed_size
                      };
                      let mut sample =
                          self.limit_sample(sample_set.into_iter().collect(), size);
                      self.random_complete_sample(&mut sample, size);
                      Ok(sample)
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::testing::SampleGraph;
                  /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                  ///
                  /// To avoid actual randomness in these tests, we give it a fixed
                  /// random seed, but by default we'll test the random version.
                  fn full_disco() -> PartialDiscovery<SampleGraph> {
                      PartialDiscovery::new_with_seed(
                          SampleGraph,
                          vec![10, 11, 12, 13],
                          [0; 16],
                          true,
                          true,
                      )
                  }
                  /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                  ///
                  /// To avoid actual randomness in tests, we give it a fixed random seed.
                  fn disco12() -> PartialDiscovery<SampleGraph> {
                      PartialDiscovery::new_with_seed(
                          SampleGraph,
                          vec![12],
                          [0; 16],
                          true,
                          true,
                      )
                  }
                  fn sorted_undecided(
                      disco: &PartialDiscovery<SampleGraph>,
                  ) -> Vec<Revision> {
                      let mut as_vec: Vec<Revision> =
                          disco.undecided.as_ref().unwrap().iter().cloned().collect();
                      as_vec.sort();
                      as_vec
                  }
                  fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                      let mut as_vec: Vec<Revision> =
                          disco.missing.iter().cloned().collect();
                      as_vec.sort();
                      as_vec
                  }
                  fn sorted_common_heads(
                      disco: &PartialDiscovery<SampleGraph>,
                  ) -> Result<Vec<Revision>, GraphError> {
                      let mut as_vec: Vec<Revision> =
                          disco.common_heads()?.iter().cloned().collect();
                      as_vec.sort();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_add_common_get_undecided() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      assert_eq!(disco.undecided, None);
                      assert!(!disco.has_info());
                      assert_eq!(disco.stats().undecided, None);
                      disco.add_common_revisions(vec![11, 12])?;
                      assert!(disco.has_info());
                      assert!(!disco.is_complete());
                      assert!(disco.missing.is_empty());
                      // add_common_revisions did not trigger a premature computation
                      // of `undecided`, let's check that and ask for them
                      assert_eq!(disco.undecided, None);
                      disco.ensure_undecided()?;
                      assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                      assert_eq!(disco.stats().undecided, Some(4));
                      Ok(())
                  }
                  /// in this test, we pretend that our peer misses exactly (8+10)::
                  /// and we're comparing all our repo to it (as in a bare push)
                  #[test]
                  fn test_discovery() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      disco.add_common_revisions(vec![11, 12])?;
                      disco.add_missing_revisions(vec![8, 10])?;
                      assert_eq!(sorted_undecided(&disco), vec![5]);
                      assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                      assert!(!disco.is_complete());
                      disco.add_common_revisions(vec![5])?;
                      assert_eq!(sorted_undecided(&disco), vec![]);
                      assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                      assert!(disco.is_complete());
                      assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                      Ok(())
                  }
                  #[test]
                  fn test_add_missing_early_continue() -> Result<(), GraphError> {
                      eprintln!("test_add_missing_early_stop");
                      let mut disco = full_disco();
                      disco.add_common_revisions(vec![13, 3, 4])?;
                      disco.ensure_children_cache()?;
                      // 12 is grand-child of 6 through 9
                      // passing them in this order maximizes the chances of the
                      // early continue to do the wrong thing
                      disco.add_missing_revisions(vec![6, 9, 12])?;
                      assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
                      assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
                      assert!(!disco.is_complete());
                      Ok(())
                  }
                  #[test]
                  fn test_limit_sample_no_need_to() {
                      let sample = vec![1, 2, 3, 4];
                      assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                  }
                  #[test]
                  fn test_limit_sample_less_than_half() {
                      assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
                  }
                  #[test]
                  fn test_limit_sample_more_than_half() {
                      assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
                  }
                  #[test]
                  fn test_limit_sample_no_random() {
                      let mut disco = full_disco();
                      disco.randomize = false;
                      assert_eq!(
                          disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
                          vec![1, 3, 5, 7]
                      );
                  }
                  #[test]
                  fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      disco.undecided = Some((1..=13).collect());
                      let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                      sample_vec.sort();
                      assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                      Ok(())
                  }
                  #[test]
                  fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                      let mut disco = disco12();
                      disco.ensure_undecided()?;
                      let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
                      sample_vec.sort();
                      // r12's only parent is r9, whose unique grand-parent through the
                      // diamond shape is r4. This ends there because the distance from r4
                      // to the root is only 3.
                      assert_eq!(sample_vec, vec![4, 9, 12]);
                      Ok(())
                  }
                  #[test]
                  fn test_children_cache() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      disco.ensure_children_cache()?;
                      let cache = disco.children_cache.unwrap();
                      assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
                      assert_eq!(cache.get(&10).cloned(), None);
                      let mut children_4 = cache.get(&4).cloned().unwrap();
                      children_4.sort();
                      assert_eq!(children_4, vec![5, 6, 7]);
                      let mut children_7 = cache.get(&7).cloned().unwrap();
                      children_7.sort();
                      assert_eq!(children_7, vec![9, 11]);
                      Ok(())
                  }
                  #[test]
                  fn test_complete_sample() {
                      let mut disco = full_disco();
                      let undecided: HashSet<Revision> =
                          [4, 7, 9, 2, 3].iter().cloned().collect();
                      disco.undecided = Some(undecided);
                      let mut sample = vec![0];
                      disco.random_complete_sample(&mut sample, 3);
                      assert_eq!(sample.len(), 3);
                      let mut sample = vec![2, 4, 7];
                      disco.random_complete_sample(&mut sample, 1);
                      assert_eq!(sample.len(), 3);
                  }
                  #[test]
                  fn test_bidirectional_sample() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      disco.undecided = Some((0..=13).into_iter().collect());
                      let (sample_set, size) = disco.bidirectional_sample(7)?;
                      assert_eq!(size, 7);
                      let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                      sample.sort();
                      // our DAG is a bit too small for the results to be really interesting
                      // at least it shows that
                      // - we went both ways
                      // - we didn't take all Revisions (6 is not in the sample)
                      assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                      Ok(())
                  }
              }

rust/hg-core/src/filepatterns.rs

0 +6 -5

              // filepatterns.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Handling of Mercurial-specific patterns.
              use crate::{
                  utils::{
                      files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
                      hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
                      SliceExt,
                  },
                  FastHashMap, PatternError,
              };
              use lazy_static::lazy_static;
              use regex::bytes::{NoExpand, Regex};
              use std::fs::File;
              use std::io::Read;
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
              use std::vec::Vec;
              lazy_static! {
                  static ref RE_ESCAPE: Vec<Vec<u8>> = {
                      let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
                      let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
                      for byte in to_escape {
                          v[*byte as usize].insert(0, b'\\');
                      }
                      v
                  };
              }
              /// These are matched in order
              const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
                  &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
              /// Appended to the regexp of globs
              const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
              #[derive(Debug, Copy, Clone, PartialEq, Eq)]
              pub enum PatternSyntax {
                  /// A regular expression
                  Regexp,
                  /// Glob that matches at the front of the path
                  RootGlob,
                  /// Glob that matches at any suffix of the path (still anchored at
                  /// slashes)
                  Glob,
                  /// a path relative to repository root, which is matched recursively
                  Path,
                  /// A path relative to cwd
                  RelPath,
                  /// an unrooted glob (*.rs matches Rust files in all dirs)
                  RelGlob,
                  /// A regexp that needn't match the start of a name
                  RelRegexp,
                  /// A path relative to repository root, which is matched non-recursively
                  /// (will not match subdirectories)
                  RootFiles,
                  /// A file of patterns to read and include
                  Include,
                  /// A file of patterns to match against files under the same directory
                  SubInclude,
              }
              /// Transforms a glob pattern into a regex
              fn glob_to_re(pat: &[u8]) -> Vec<u8> {
                  let mut input = pat;
                  let mut res: Vec<u8> = vec![];
                  let mut group_depth = 0;
                  while let Some((c, rest)) = input.split_first() {
                      input = rest;
                      match c {
                          b'*' => {
                              for (source, repl) in GLOB_REPLACEMENTS {
                                  if let Some(rest) = input.drop_prefix(source) {
                                      input = rest;
                                      res.extend(*repl);
                                      break;
                                  }
                              }
                          }
                          b'?' => res.extend(b"."),
                          b'[' => {
                              match input.iter().skip(1).position(|b| *b == b']') {
                                  None => res.extend(b"\\["),
                                  Some(end) => {
                                      // Account for the one we skipped
                                      let end = end + 1;
                                      res.extend(b"[");
                                      for (i, b) in input[..end].iter().enumerate() {
                                          if *b == b'!' && i == 0 {
                                              res.extend(b"^")
                                          } else if *b == b'^' && i == 0 {
                                              res.extend(b"\\^")
                                          } else if *b == b'\\' {
                                              res.extend(b"\\\\")
                                          } else {
                                              res.push(*b)
                                          }
                                      }
                                      res.extend(b"]");
                                      input = &input[end + 1..];
                                  }
                              }
                          }
                          b'{' => {
                              group_depth += 1;
                              res.extend(b"(?:")
                          }
                          b'}' if group_depth > 0 => {
                              group_depth -= 1;
                              res.extend(b")");
                          }
                          b',' if group_depth > 0 => res.extend(b"|"),
                          b'\\' => {
                              let c = {
                                  if let Some((c, rest)) = input.split_first() {
                                      input = rest;
                                      c
                                  } else {
                                      c
                                  }
                              };
                              res.extend(&RE_ESCAPE[*c as usize])
                          }
                          _ => res.extend(&RE_ESCAPE[*c as usize]),
                      }
                  }
                  res
              }
              fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
                  pattern
                      .iter()
                      .flat_map(|c| RE_ESCAPE[*c as usize].clone())
                      .collect()
              }
              pub fn parse_pattern_syntax(
                  kind: &[u8],
              ) -> Result<PatternSyntax, PatternError> {
                  match kind {
                      b"re:" => Ok(PatternSyntax::Regexp),
                      b"path:" => Ok(PatternSyntax::Path),
                      b"relpath:" => Ok(PatternSyntax::RelPath),
                      b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
                      b"relglob:" => Ok(PatternSyntax::RelGlob),
                      b"relre:" => Ok(PatternSyntax::RelRegexp),
                      b"glob:" => Ok(PatternSyntax::Glob),
                      b"rootglob:" => Ok(PatternSyntax::RootGlob),
                      b"include:" => Ok(PatternSyntax::Include),
                      b"subinclude:" => Ok(PatternSyntax::SubInclude),
                      _ => Err(PatternError::UnsupportedSyntax(
                          String::from_utf8_lossy(kind).to_string(),
                      )),
                  }
              }
              /// Builds the regex that corresponds to the given pattern.
              /// If within a `syntax: regexp` context, returns the pattern,
              /// otherwise, returns the corresponding regex.
              fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
                  let IgnorePattern {
                      syntax, pattern, ..
                  } = entry;
                  if pattern.is_empty() {
                      return vec![];
                  }
                  match syntax {
                      PatternSyntax::Regexp => pattern.to_owned(),
                      PatternSyntax::RelRegexp => {
                          // The `regex` crate accepts `**` while `re2` and Python's `re`
                          // do not. Checking for `*` correctly triggers the same error all
                          // engines.
                          if pattern[0] == b'^'
                              || pattern[0] == b'*'
                              || pattern.starts_with(b".*")
                          {
                              return pattern.to_owned();
                          }
                          [&b".*"[..], pattern].concat()
                      }
                      PatternSyntax::Path | PatternSyntax::RelPath => {
                          if pattern == b"." {
                              return vec![];
                          }
                          [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
                      }
                      PatternSyntax::RootFiles => {
                          let mut res = if pattern == b"." {
                              vec![]
                          } else {
                              // Pattern is a directory name.
                              [escape_pattern(pattern).as_slice(), b"/"].concat()
                          };
                          // Anything after the pattern must be a non-directory.
                          res.extend(b"[^/]+$");
                          res
                      }
                      PatternSyntax::RelGlob => {
                          let glob_re = glob_to_re(pattern);
                          if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
                              [b".*", rest, GLOB_SUFFIX].concat()
                          } else {
                              [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
                          }
                      }
                      PatternSyntax::Glob | PatternSyntax::RootGlob => {
                          [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
                      }
                      PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
                  }
              }
              const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
                  [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
              /// TODO support other platforms
              #[cfg(unix)]
              pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
                  if bytes.is_empty() {
                      return b".".to_vec();
                  }
                  let sep = b'/';
                  let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
                  if initial_slashes > 2 {
                      // POSIX allows one or two initial slashes, but treats three or more
                      // as single slash.
                      initial_slashes = 1;
                  }
                  let components = bytes
                      .split(|b| *b == sep)
                      .filter(|c| !(c.is_empty() || c == b"."))
                      .fold(vec![], |mut acc, component| {
                          if component != b".."
                              || (initial_slashes == 0 && acc.is_empty())
                              || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
                          {
                              acc.push(component)
                          } else if !acc.is_empty() {
                              acc.pop();
                          }
                          acc
                      });
                  let mut new_bytes = components.join(&sep);
                  if initial_slashes > 0 {
                      let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
                      buf.extend(new_bytes);
                      new_bytes = buf;
                  }
                  if new_bytes.is_empty() {
                      b".".to_vec()
                  } else {
                      new_bytes
                  }
              }
              /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
              /// that don't need to be transformed into a regex.
              pub fn build_single_regex(
                  entry: &IgnorePattern,
              ) -> Result<Option<Vec<u8>>, PatternError> {
                  let IgnorePattern {
                      pattern, syntax, ..
                  } = entry;
                  let pattern = match syntax {
                      PatternSyntax::RootGlob
                      | PatternSyntax::Path
                      | PatternSyntax::RelGlob
                      | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
                      PatternSyntax::Include | PatternSyntax::SubInclude => {
                          return Err(PatternError::NonRegexPattern(entry.clone()))
                      }
                      _ => pattern.to_owned(),
                  };
                  if *syntax == PatternSyntax::RootGlob
                      && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
                  {
                      Ok(None)
                  } else {
                      let mut entry = entry.clone();
                      entry.pattern = pattern;
                      Ok(Some(_build_single_regex(&entry)))
                  }
              }
              lazy_static! {
                  static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
                      let mut m = FastHashMap::default();
                      m.insert(b"re".as_ref(), b"relre:".as_ref());
                      m.insert(b"regexp".as_ref(), b"relre:".as_ref());
                      m.insert(b"glob".as_ref(), b"relglob:".as_ref());
                      m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
                      m.insert(b"include".as_ref(), b"include:".as_ref());
                      m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
                      m
                  };
              }
              #[derive(Debug)]
              pub enum PatternFileWarning {
                  /// (file path, syntax bytes)
                  InvalidSyntax(PathBuf, Vec<u8>),
                  /// File path
                  NoSuchFile(PathBuf),
              }
              pub fn parse_pattern_file_contents<P: AsRef<Path>>(
                  lines: &[u8],
                  file_path: P,
                  warn: bool,
              ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                  let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
+                 #[allow(clippy::trivial_regex)]
                  let comment_escape_regex = Regex::new(r"\\#").unwrap();
                  let mut inputs: Vec<IgnorePattern> = vec![];
                  let mut warnings: Vec<PatternFileWarning> = vec![];
                  let mut current_syntax = b"relre:".as_ref();
                  for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
                      let line_number = line_number + 1;
                      let line_buf;
                      if line.contains(&b'#') {
                          if let Some(cap) = comment_regex.captures(line) {
                              line = &line[..cap.get(1).unwrap().end()]
                          }
                          line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
                          line = &line_buf;
                      }
                      let mut line = line.trim_end();
                      if line.is_empty() {
                          continue;
                      }
                      if let Some(syntax) = line.drop_prefix(b"syntax:") {
                          let syntax = syntax.trim();
                          if let Some(rel_syntax) = SYNTAXES.get(syntax) {
                              current_syntax = rel_syntax;
                          } else if warn {
                              warnings.push(PatternFileWarning::InvalidSyntax(
                                  file_path.as_ref().to_owned(),
                                  syntax.to_owned(),
                              ));
                          }
                          continue;
                      }
                      let mut line_syntax: &[u8] = &current_syntax;
                      for (s, rels) in SYNTAXES.iter() {
                          if let Some(rest) = line.drop_prefix(rels) {
                              line_syntax = rels;
                              line = rest;
                              break;
                          }
                          if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
                              line_syntax = rels;
                              line = rest;
                              break;
                          }
                      }
                      inputs.push(IgnorePattern::new(
                          parse_pattern_syntax(&line_syntax).map_err(|e| match e {
                              PatternError::UnsupportedSyntax(syntax) => {
                                  PatternError::UnsupportedSyntaxInFile(
                                      syntax,
                                      file_path.as_ref().to_string_lossy().into(),
                                      line_number,
                                  )
                              }
                              _ => e,
                          })?,
                          &line,
                          &file_path,
                      ));
                  }
                  Ok((inputs, warnings))
              }
              pub fn read_pattern_file<P: AsRef<Path>>(
                  file_path: P,
                  warn: bool,
              ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                  let mut f = match File::open(file_path.as_ref()) {
                      Ok(f) => Ok(f),
                      Err(e) => match e.kind() {
                          std::io::ErrorKind::NotFound => {
                              return Ok((
                                  vec![],
                                  vec![PatternFileWarning::NoSuchFile(
                                      file_path.as_ref().to_owned(),
                                  )],
                              ))
                          }
                          _ => Err(e),
                      },
                  }?;
                  let mut contents = Vec::new();
                  f.read_to_end(&mut contents)?;
                  Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
              }
              /// Represents an entry in an "ignore" file.
              #[derive(Debug, Eq, PartialEq, Clone)]
              pub struct IgnorePattern {
                  pub syntax: PatternSyntax,
                  pub pattern: Vec<u8>,
                  pub source: PathBuf,
              }
              impl IgnorePattern {
                  pub fn new(
                      syntax: PatternSyntax,
                      pattern: &[u8],
                      source: impl AsRef<Path>,
                  ) -> Self {
                      Self {
                          syntax,
                          pattern: pattern.to_owned(),
                          source: source.as_ref().to_owned(),
                      }
                  }
              }
              pub type PatternResult<T> = Result<T, PatternError>;
              /// Wrapper for `read_pattern_file` that also recursively expands `include:`
              /// patterns.
              ///
              /// `subinclude:` is not treated as a special pattern here: unraveling them
              /// needs to occur in the "ignore" phase.
              pub fn get_patterns_from_file(
                  pattern_file: impl AsRef<Path>,
                  root_dir: impl AsRef<Path>,
              ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
                  let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
                  let patterns = patterns
                      .into_iter()
                      .flat_map(|entry| -> PatternResult<_> {
                          let IgnorePattern {
-                             syntax,
-                             pattern,
-                             source: _,
+                             syntax, pattern, ..
                          } = &entry;
                          Ok(match syntax {
                              PatternSyntax::Include => {
                                  let inner_include =
                                      root_dir.as_ref().join(get_path_from_bytes(&pattern));
                                  let (inner_pats, inner_warnings) = get_patterns_from_file(
                                      &inner_include,
                                      root_dir.as_ref(),
                                  )?;
                                  warnings.extend(inner_warnings);
                                  inner_pats
                              }
                              _ => vec![entry],
                          })
                      })
                      .flatten()
                      .collect();
                  Ok((patterns, warnings))
              }
              /// Holds all the information needed to handle a `subinclude:` pattern.
              pub struct SubInclude {
                  /// Will be used for repository (hg) paths that start with this prefix.
                  /// It is relative to the current working directory, so comparing against
                  /// repository paths is painless.
                  pub prefix: HgPathBuf,
                  /// The file itself, containing the patterns
                  pub path: PathBuf,
                  /// Folder in the filesystem where this it applies
                  pub root: PathBuf,
              }
              impl SubInclude {
                  pub fn new(
                      root_dir: impl AsRef<Path>,
                      pattern: &[u8],
                      source: impl AsRef<Path>,
                  ) -> Result<SubInclude, HgPathError> {
                      let normalized_source =
                          normalize_path_bytes(&get_bytes_from_path(source));
                      let source_root = get_path_from_bytes(&normalized_source);
-                     let source_root = source_root.parent().unwrap_or(source_root.deref());
+                     let source_root =
+                         source_root.parent().unwrap_or_else(|| source_root.deref());
                      let path = source_root.join(get_path_from_bytes(pattern));
-                     let new_root = path.parent().unwrap_or(path.deref());
+                     let new_root = path.parent().unwrap_or_else(|| path.deref());
                      let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
                      Ok(Self {
                          prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
                              if !p.is_empty() {
                                  p.push(b'/');
                              }
                              Ok(p)
                          })?,
                          path: path.to_owned(),
                          root: new_root.to_owned(),
                      })
                  }
              }
              /// Separate and pre-process subincludes from other patterns for the "ignore"
              /// phase.
              pub fn filter_subincludes(
                  ignore_patterns: &[IgnorePattern],
                  root_dir: impl AsRef<Path>,
              ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
                  let mut subincludes = vec![];
                  let mut others = vec![];
                  for ignore_pattern in ignore_patterns.iter() {
                      let IgnorePattern {
                          syntax,
                          pattern,
                          source,
                      } = ignore_pattern;
                      if *syntax == PatternSyntax::SubInclude {
                          subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
                      } else {
                          others.push(ignore_pattern)
                      }
                  }
                  Ok((subincludes, others))
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn escape_pattern_test() {
                      let untouched =
                          br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
                      assert_eq!(escape_pattern(untouched), untouched.to_vec());
                      // All escape codes
                      assert_eq!(
                          escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
                          br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
                              .to_vec()
                      );
                  }
                  #[test]
                  fn glob_test() {
                      assert_eq!(glob_to_re(br#"?"#), br#"."#);
                      assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
                      assert_eq!(glob_to_re(br#"**"#), br#".*"#);
                      assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
                      assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
                      assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
                      assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
                      assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
                  }
                  #[test]
                  fn test_parse_pattern_file_contents() {
                      let lines = b"syntax: glob\n*.elc";
                      assert_eq!(
                          parse_pattern_file_contents(lines, Path::new("file_path"), false)
                              .unwrap()
                              .0,
                          vec![IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"*.elc",
                              Path::new("file_path")
                          )],
                      );
                      let lines = b"syntax: include\nsyntax: glob";
                      assert_eq!(
                          parse_pattern_file_contents(lines, Path::new("file_path"), false)
                              .unwrap()
                              .0,
                          vec![]
                      );
                      let lines = b"glob:**.o";
                      assert_eq!(
                          parse_pattern_file_contents(lines, Path::new("file_path"), false)
                              .unwrap()
                              .0,
                          vec![IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"**.o",
                              Path::new("file_path")
                          )]
                      );
                  }
                  #[test]
                  fn test_build_single_regex() {
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"rust/target/",
                              Path::new("")
                          ))
                          .unwrap(),
                          Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
                      );
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::Regexp,
                              br"rust/target/\d+",
                              Path::new("")
                          ))
                          .unwrap(),
                          Some(br"rust/target/\d+".to_vec()),
                      );
                  }
                  #[test]
                  fn test_build_single_regex_shortcut() {
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RootGlob,
                              b"",
                              Path::new("")
                          ))
                          .unwrap(),
                          None,
                      );
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RootGlob,
                              b"whatever",
                              Path::new("")
                          ))
                          .unwrap(),
                          None,
                      );
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RootGlob,
                              b"*.o",
                              Path::new("")
                          ))
                          .unwrap(),
                          Some(br"[^/]*\.o(?:/|$)".to_vec()),
                      );
                  }
              }

rust/hg-core/src/matchers.rs

0 +6 -6

              // matchers.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Structs and types for matching files and directories.
              use crate::{
                  dirstate::dirs_multiset::DirsChildrenMultiset,
                  filepatterns::{
                      build_single_regex, filter_subincludes, get_patterns_from_file,
                      PatternFileWarning, PatternResult, SubInclude,
                  },
                  utils::{
                      files::find_dirs,
                      hg_path::{HgPath, HgPathBuf},
                      Escaped,
                  },
                  DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
                  PatternSyntax,
              };
              use crate::filepatterns::normalize_path_bytes;
              use std::borrow::ToOwned;
              use std::collections::HashSet;
              use std::fmt::{Display, Error, Formatter};
              use std::iter::FromIterator;
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
              use micro_timer::timed;
              #[derive(Debug, PartialEq)]
              pub enum VisitChildrenSet<'a> {
                  /// Don't visit anything
                  Empty,
                  /// Only visit this directory
                  This,
                  /// Visit this directory and these subdirectories
                  /// TODO Should we implement a `NonEmptyHashSet`?
                  Set(HashSet<&'a HgPath>),
                  /// Visit this directory and all subdirectories
                  Recursive,
              }
              pub trait Matcher {
                  /// Explicitly listed files
                  fn file_set(&self) -> Option<&HashSet<&HgPath>>;
                  /// Returns whether `filename` is in `file_set`
                  fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
                  /// Returns whether `filename` is matched by this matcher
                  fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
                  /// Decides whether a directory should be visited based on whether it
                  /// has potential matches in it or one of its subdirectories, and
                  /// potentially lists which subdirectories of that directory should be
                  /// visited. This is based on the match's primary, included, and excluded
                  /// patterns.
                  ///
                  /// # Example
                  ///
                  /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
                  /// return the following values (assuming the implementation of
                  /// visit_children_set is capable of recognizing this; some implementations
                  /// are not).
                  ///
                  /// ```text
                  /// ```ignore
                  /// '' -> {'foo', 'qux'}
                  /// 'baz' -> set()
                  /// 'foo' -> {'bar'}
                  /// // Ideally this would be `Recursive`, but since the prefix nature of
                  /// // matchers is applied to the entire matcher, we have to downgrade this
                  /// // to `This` due to the (yet to be implemented in Rust) non-prefix
                  /// // `RootFilesIn'-kind matcher being mixed in.
                  /// 'foo/bar' -> 'this'
                  /// 'qux' -> 'this'
                  /// ```
                  /// # Important
                  ///
                  /// Most matchers do not know if they're representing files or
                  /// directories. They see `['path:dir/f']` and don't know whether `f` is a
                  /// file or a directory, so `visit_children_set('dir')` for most matchers
                  /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
                  /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
                  /// it may return `VisitChildrenSet::This`.
                  /// Do not rely on the return being a `HashSet` indicating that there are
                  /// no files in this dir to investigate (or equivalently that if there are
                  /// files to investigate in 'dir' that it will always return
                  /// `VisitChildrenSet::This`).
                  fn visit_children_set(
                      &self,
                      directory: impl AsRef<HgPath>,
                  ) -> VisitChildrenSet;
                  /// Matcher will match everything and `files_set()` will be empty:
                  /// optimization might be possible.
                  fn matches_everything(&self) -> bool;
                  /// Matcher will match exactly the files in `files_set()`: optimization
                  /// might be possible.
                  fn is_exact(&self) -> bool;
              }
              /// Matches everything.
              ///```
              /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
              ///
              /// let matcher = AlwaysMatcher;
              ///
              /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
              /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
              /// ```
              #[derive(Debug)]
              pub struct AlwaysMatcher;
              impl Matcher for AlwaysMatcher {
                  fn file_set(&self) -> Option<&HashSet<&HgPath>> {
                      None
                  }
                  fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
                      false
                  }
                  fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
                      true
                  }
                  fn visit_children_set(
                      &self,
                      _directory: impl AsRef<HgPath>,
                  ) -> VisitChildrenSet {
                      VisitChildrenSet::Recursive
                  }
                  fn matches_everything(&self) -> bool {
                      true
                  }
                  fn is_exact(&self) -> bool {
                      false
                  }
              }
              /// Matches the input files exactly. They are interpreted as paths, not
              /// patterns.
              ///
              ///```
              /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
              ///
              /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
              /// let matcher = FileMatcher::new(&files).unwrap();
              ///
              /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
              /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
              /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
              /// ```
              #[derive(Debug)]
              pub struct FileMatcher<'a> {
                  files: HashSet<&'a HgPath>,
                  dirs: DirsMultiset,
              }
              impl<'a> FileMatcher<'a> {
                  pub fn new(
                      files: &'a [impl AsRef<HgPath>],
                  ) -> Result<Self, DirstateMapError> {
                      Ok(Self {
-                         files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
+                         files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
                          dirs: DirsMultiset::from_manifest(files)?,
                      })
                  }
                  fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
                      self.files.contains(filename.as_ref())
                  }
              }
              impl<'a> Matcher for FileMatcher<'a> {
                  fn file_set(&self) -> Option<&HashSet<&HgPath>> {
                      Some(&self.files)
                  }
                  fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
                      self.inner_matches(filename)
                  }
                  fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
                      self.inner_matches(filename)
                  }
                  fn visit_children_set(
                      &self,
                      directory: impl AsRef<HgPath>,
                  ) -> VisitChildrenSet {
                      if self.files.is_empty() || !self.dirs.contains(&directory) {
                          return VisitChildrenSet::Empty;
                      }
-                     let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
+                     let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
                      let mut candidates: HashSet<&HgPath> =
-                         self.files.union(&dirs_as_set).map(|k| *k).collect();
+                         self.files.union(&dirs_as_set).cloned().collect();
                      candidates.remove(HgPath::new(b""));
                      if !directory.as_ref().is_empty() {
                          let directory = [directory.as_ref().as_bytes(), b"/"].concat();
                          candidates = candidates
                              .iter()
                              .filter_map(|c| {
                                  if c.as_bytes().starts_with(&directory) {
                                      Some(HgPath::new(&c.as_bytes()[directory.len()..]))
                                  } else {
                                      None
                                  }
                              })
                              .collect();
                      }
                      // `self.dirs` includes all of the directories, recursively, so if
                      // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
                      // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                      // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
                      // subdir will be in there without a slash.
                      VisitChildrenSet::Set(
                          candidates
                              .iter()
                              .filter_map(|c| {
                                  if c.bytes().all(|b| *b != b'/') {
                                      Some(*c)
                                  } else {
                                      None
                                  }
                              })
                              .collect(),
                      )
                  }
                  fn matches_everything(&self) -> bool {
                      false
                  }
                  fn is_exact(&self) -> bool {
                      true
                  }
              }
              /// Matches files that are included in the ignore rules.
              /// ```
              /// use hg::{
              ///     matchers::{IncludeMatcher, Matcher},
              ///     IgnorePattern,
              ///     PatternSyntax,
              ///     utils::hg_path::HgPath
              /// };
              /// use std::path::Path;
              /// ///
              /// let ignore_patterns =
              /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
              /// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
              /// ///
              /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
              /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
              /// ```
              pub struct IncludeMatcher<'a> {
                  patterns: Vec<u8>,
                  match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
                  /// Whether all the patterns match a prefix (i.e. recursively)
                  prefix: bool,
                  roots: HashSet<HgPathBuf>,
                  dirs: HashSet<HgPathBuf>,
                  parents: HashSet<HgPathBuf>,
              }
              impl<'a> Matcher for IncludeMatcher<'a> {
                  fn file_set(&self) -> Option<&HashSet<&HgPath>> {
                      None
                  }
                  fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
                      false
                  }
                  fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
                      (self.match_fn)(filename.as_ref())
                  }
                  fn visit_children_set(
                      &self,
                      directory: impl AsRef<HgPath>,
                  ) -> VisitChildrenSet {
                      let dir = directory.as_ref();
                      if self.prefix && self.roots.contains(dir) {
                          return VisitChildrenSet::Recursive;
                      }
                      if self.roots.contains(HgPath::new(b""))
                          || self.roots.contains(dir)
                          || self.dirs.contains(dir)
                          || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
                      {
                          return VisitChildrenSet::This;
                      }
                      if self.parents.contains(directory.as_ref()) {
                          let multiset = self.get_all_parents_children();
                          if let Some(children) = multiset.get(dir) {
                              return VisitChildrenSet::Set(children.to_owned());
                          }
                      }
                      VisitChildrenSet::Empty
                  }
                  fn matches_everything(&self) -> bool {
                      false
                  }
                  fn is_exact(&self) -> bool {
                      false
                  }
              }
              /// Returns a function that matches an `HgPath` against the given regex
              /// pattern.
              ///
              /// This can fail when the pattern is invalid or not supported by the
              /// underlying engine (the `regex` crate), for instance anything with
              /// back-references.
              #[timed]
              fn re_matcher(
                  pattern: &[u8],
              ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
                  use std::io::Write;
                  // The `regex` crate adds `.*` to the start and end of expressions if there
                  // are no anchors, so add the start anchor.
                  let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
                  for byte in pattern {
                      if *byte > 127 {
                          write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
                      } else {
                          escaped_bytes.push(*byte);
                      }
                  }
                  escaped_bytes.push(b')');
                  // Avoid the cost of UTF8 checking
                  //
                  // # Safety
                  // This is safe because we escaped all non-ASCII bytes.
                  let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
                  let re = regex::bytes::RegexBuilder::new(&pattern_string)
                      .unicode(false)
                      // Big repos with big `.hgignore` will hit the default limit and
                      // incur a significant performance hit. One repo's `hg status` hit
                      // multiple *minutes*.
                      .dfa_size_limit(50 * (1 << 20))
                      .build()
                      .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
                  Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
              }
              /// Returns the regex pattern and a function that matches an `HgPath` against
              /// said regex formed by the given ignore patterns.
              fn build_regex_match<'a>(
                  ignore_patterns: &'a [&'a IgnorePattern],
              ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
                  let mut regexps = vec![];
                  let mut exact_set = HashSet::new();
                  for pattern in ignore_patterns {
                      if let Some(re) = build_single_regex(pattern)? {
                          regexps.push(re);
                      } else {
                          let exact = normalize_path_bytes(&pattern.pattern);
                          exact_set.insert(HgPathBuf::from_bytes(&exact));
                      }
                  }
                  let full_regex = regexps.join(&b'|');
                  // An empty pattern would cause the regex engine to incorrectly match the
                  // (empty) root directory
                  let func = if !(regexps.is_empty()) {
                      let matcher = re_matcher(&full_regex)?;
                      let func = move |filename: &HgPath| {
                          exact_set.contains(filename) || matcher(filename)
                      };
                      Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
                  } else {
                      let func = move |filename: &HgPath| exact_set.contains(filename);
                      Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
                  };
                  Ok((full_regex, func))
              }
              /// Returns roots and directories corresponding to each pattern.
              ///
              /// This calculates the roots and directories exactly matching the patterns and
              /// returns a tuple of (roots, dirs). It does not return other directories
              /// which may also need to be considered, like the parent directories.
              fn roots_and_dirs(
                  ignore_patterns: &[IgnorePattern],
              ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
                  let mut roots = Vec::new();
                  let mut dirs = Vec::new();
                  for ignore_pattern in ignore_patterns {
                      let IgnorePattern {
                          syntax, pattern, ..
                      } = ignore_pattern;
                      match syntax {
                          PatternSyntax::RootGlob | PatternSyntax::Glob => {
                              let mut root = vec![];
                              for p in pattern.split(|c| *c == b'/') {
                                  if p.iter().any(|c| match *c {
                                      b'[' | b'{' | b'*' | b'?' => true,
                                      _ => false,
                                  }) {
                                      break;
                                  }
                                  root.push(HgPathBuf::from_bytes(p));
                              }
                              let buf =
                                  root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
                              roots.push(buf);
                          }
                          PatternSyntax::Path | PatternSyntax::RelPath => {
                              let pat = HgPath::new(if pattern == b"." {
                                  &[] as &[u8]
                              } else {
                                  pattern
                              });
                              roots.push(pat.to_owned());
                          }
                          PatternSyntax::RootFiles => {
                              let pat = if pattern == b"." {
                                  &[] as &[u8]
                              } else {
                                  pattern
                              };
                              dirs.push(HgPathBuf::from_bytes(pat));
                          }
                          _ => {
                              roots.push(HgPathBuf::new());
                          }
                      }
                  }
                  (roots, dirs)
              }
              /// Paths extracted from patterns
              #[derive(Debug, PartialEq)]
              struct RootsDirsAndParents {
                  /// Directories to match recursively
                  pub roots: HashSet<HgPathBuf>,
                  /// Directories to match non-recursively
                  pub dirs: HashSet<HgPathBuf>,
                  /// Implicitly required directories to go to items in either roots or dirs
                  pub parents: HashSet<HgPathBuf>,
              }
              /// Extract roots, dirs and parents from patterns.
              fn roots_dirs_and_parents(
                  ignore_patterns: &[IgnorePattern],
              ) -> PatternResult<RootsDirsAndParents> {
                  let (roots, dirs) = roots_and_dirs(ignore_patterns);
                  let mut parents = HashSet::new();
                  parents.extend(
                      DirsMultiset::from_manifest(&dirs)
                          .map_err(|e| match e {
                              DirstateMapError::InvalidPath(e) => e,
                              _ => unreachable!(),
                          })?
                          .iter()
-                         .map(|k| k.to_owned()),
+                         .map(ToOwned::to_owned),
                  );
                  parents.extend(
                      DirsMultiset::from_manifest(&roots)
                          .map_err(|e| match e {
                              DirstateMapError::InvalidPath(e) => e,
                              _ => unreachable!(),
                          })?
                          .iter()
-                         .map(|k| k.to_owned()),
+                         .map(ToOwned::to_owned),
                  );
                  Ok(RootsDirsAndParents {
                      roots: HashSet::from_iter(roots),
                      dirs: HashSet::from_iter(dirs),
                      parents,
                  })
              }
              /// Returns a function that checks whether a given file (in the general sense)
              /// should be matched.
              fn build_match<'a, 'b>(
                  ignore_patterns: &'a [IgnorePattern],
                  root_dir: impl AsRef<Path>,
              ) -> PatternResult<(
                  Vec<u8>,
                  Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
                  Vec<PatternFileWarning>,
              )> {
                  let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
                  // For debugging and printing
                  let mut patterns = vec![];
                  let mut all_warnings = vec![];
                  let (subincludes, ignore_patterns) =
                      filter_subincludes(ignore_patterns, root_dir)?;
                  if !subincludes.is_empty() {
                      // Build prefix-based matcher functions for subincludes
                      let mut submatchers = FastHashMap::default();
                      let mut prefixes = vec![];
                      for SubInclude { prefix, root, path } in subincludes.into_iter() {
                          let (match_fn, warnings) =
                              get_ignore_function(vec![path.to_path_buf()], root)?;
                          all_warnings.extend(warnings);
                          prefixes.push(prefix.to_owned());
                          submatchers.insert(prefix.to_owned(), match_fn);
                      }
                      let match_subinclude = move |filename: &HgPath| {
                          for prefix in prefixes.iter() {
                              if let Some(rel) = filename.relative_to(prefix) {
-                                 if (submatchers.get(prefix).unwrap())(rel) {
+                                 if (submatchers[prefix])(rel) {
                                      return true;
                                  }
                              }
                          }
                          false
                      };
                      match_funcs.push(Box::new(match_subinclude));
                  }
                  if !ignore_patterns.is_empty() {
                      // Either do dumb matching if all patterns are rootfiles, or match
                      // with a regex.
                      if ignore_patterns
                          .iter()
                          .all(|k| k.syntax == PatternSyntax::RootFiles)
                      {
                          let dirs: HashSet<_> = ignore_patterns
                              .iter()
                              .map(|k| k.pattern.to_owned())
                              .collect();
                          let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
                          let match_func = move |path: &HgPath| -> bool {
                              let path = path.as_bytes();
                              let i = path.iter().rfind(|a| **a == b'/');
                              let dir = if let Some(i) = i {
                                  &path[..*i as usize]
                              } else {
                                  b"."
                              };
                              dirs.contains(dir.deref())
                          };
                          match_funcs.push(Box::new(match_func));
                          patterns.extend(b"rootfilesin: ");
                          dirs_vec.sort();
                          patterns.extend(dirs_vec.escaped_bytes());
                      } else {
                          let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
                          patterns = new_re;
                          match_funcs.push(match_func)
                      }
                  }
                  Ok(if match_funcs.len() == 1 {
                      (patterns, match_funcs.remove(0), all_warnings)
                  } else {
                      (
                          patterns,
                          Box::new(move |f: &HgPath| -> bool {
                              match_funcs.iter().any(|match_func| match_func(f))
                          }),
                          all_warnings,
                      )
                  })
              }
              /// Parses all "ignore" files with their recursive includes and returns a
              /// function that checks whether a given file (in the general sense) should be
              /// ignored.
              pub fn get_ignore_function<'a>(
                  all_pattern_files: Vec<PathBuf>,
                  root_dir: impl AsRef<Path>,
              ) -> PatternResult<(
                  Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
                  Vec<PatternFileWarning>,
              )> {
                  let mut all_patterns = vec![];
                  let mut all_warnings = vec![];
                  for pattern_file in all_pattern_files.into_iter() {
                      let (patterns, warnings) =
                          get_patterns_from_file(pattern_file, &root_dir)?;
                      all_patterns.extend(patterns.to_owned());
                      all_warnings.extend(warnings);
                  }
                  let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
                  all_warnings.extend(warnings);
                  Ok((
                      Box::new(move |path: &HgPath| matcher.matches(path)),
                      all_warnings,
                  ))
              }
              impl<'a> IncludeMatcher<'a> {
                  pub fn new(
                      ignore_patterns: Vec<IgnorePattern>,
                      root_dir: impl AsRef<Path>,
                  ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
                      let (patterns, match_fn, warnings) =
                          build_match(&ignore_patterns, root_dir)?;
                      let RootsDirsAndParents {
                          roots,
                          dirs,
                          parents,
                      } = roots_dirs_and_parents(&ignore_patterns)?;
                      let prefix = ignore_patterns.iter().any(|k| match k.syntax {
                          PatternSyntax::Path | PatternSyntax::RelPath => true,
                          _ => false,
                      });
                      Ok((
                          Self {
                              patterns,
                              match_fn,
                              prefix,
                              roots,
                              dirs,
                              parents,
                          },
                          warnings,
                      ))
                  }
                  fn get_all_parents_children(&self) -> DirsChildrenMultiset {
                      // TODO cache
                      let thing = self
                          .dirs
                          .iter()
                          .chain(self.roots.iter())
                          .chain(self.parents.iter());
                      DirsChildrenMultiset::new(thing, Some(&self.parents))
                  }
              }
              impl<'a> Display for IncludeMatcher<'a> {
                  fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
                      // XXX What about exact matches?
                      // I'm not sure it's worth it to clone the HashSet and keep it
                      // around just in case someone wants to display the matcher, plus
                      // it's going to be unreadable after a few entries, but we need to
                      // inform in this display that exact matches are being used and are
                      // (on purpose) missing from the `includes`.
                      write!(
                          f,
                          "IncludeMatcher(includes='{}')",
                          String::from_utf8_lossy(&self.patterns.escaped_bytes())
                      )
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  use std::path::Path;
                  #[test]
                  fn test_roots_and_dirs() {
                      let pats = vec![
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                      ];
                      let (roots, dirs) = roots_and_dirs(&pats);
                      assert_eq!(
                          roots,
                          vec!(
                              HgPathBuf::from_bytes(b"g/h"),
                              HgPathBuf::from_bytes(b"g/h"),
                              HgPathBuf::new()
                          ),
                      );
                      assert_eq!(dirs, vec!());
                  }
                  #[test]
                  fn test_roots_dirs_and_parents() {
                      let pats = vec![
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                      ];
                      let mut roots = HashSet::new();
                      roots.insert(HgPathBuf::from_bytes(b"g/h"));
                      roots.insert(HgPathBuf::new());
                      let dirs = HashSet::new();
                      let mut parents = HashSet::new();
                      parents.insert(HgPathBuf::new());
                      parents.insert(HgPathBuf::from_bytes(b"g"));
                      assert_eq!(
                          roots_dirs_and_parents(&pats).unwrap(),
                          RootsDirsAndParents {
                              roots,
                              dirs,
                              parents
                          }
                      );
                  }
                  #[test]
                  fn test_filematcher_visit_children_set() {
                      // Visitchildrenset
                      let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
                      let matcher = FileMatcher::new(&files).unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"foo.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                  }
                  #[test]
                  fn test_filematcher_visit_children_set_files_and_dirs() {
                      let files = vec![
                          HgPath::new(b"rootfile.txt"),
                          HgPath::new(b"a/file1.txt"),
                          HgPath::new(b"a/b/file2.txt"),
                          // No file in a/b/c
                          HgPath::new(b"a/b/c/d/file4.txt"),
                      ];
                      let matcher = FileMatcher::new(&files).unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"a"));
                      set.insert(HgPath::new(b"rootfile.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"b"));
                      set.insert(HgPath::new(b"file1.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"c"));
                      set.insert(HgPath::new(b"file2.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"d"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b/c")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"file4.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                  }
                  #[test]
                  fn test_includematcher() {
                      // VisitchildrensetPrefix
                      let (matcher, _) = IncludeMatcher::new(
                          vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir",
                              Path::new(""),
                          )],
                          "",
                      )
                      .unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      // OPT: This should probably be 'all' if its parent is?
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // VisitchildrensetRootfilesin
                      let (matcher, _) = IncludeMatcher::new(
                          vec![IgnorePattern::new(
                              PatternSyntax::RootFiles,
                              b"dir/subdir",
                              Path::new(""),
                          )],
                          "",
                      )
                      .unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // VisitchildrensetGlob
                      let (matcher, _) = IncludeMatcher::new(
                          vec![IgnorePattern::new(
                              PatternSyntax::Glob,
                              b"dir/z*",
                              Path::new(""),
                          )],
                          "",
                      )
                      .unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPath::new(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      // OPT: these should probably be set().
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                  }
              }

rust/hg-core/src/revlog.rs

0 +5 0

              // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Mercurial concepts for handling revision history
              pub mod node;
              pub mod nodemap;
              pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
              /// Mercurial revision numbers
              ///
              /// As noted in revlog.c, revision numbers are actually encoded in
              /// 4 bytes, and are liberally converted to ints, whence the i32
              pub type Revision = i32;
              /// Marker expressing the absence of a parent
              ///
              /// Independently of the actual representation, `NULL_REVISION` is guaranteed
              /// to be smaller than all existing revisions.
              pub const NULL_REVISION: Revision = -1;
              /// Same as `mercurial.node.wdirrev`
              ///
              /// This is also equal to `i32::max_value()`, but it's better to spell
              /// it out explicitely, same as in `mercurial.node`
+             #[allow(clippy::unreadable_literal)]
              pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
              /// The simplest expression of what we need of Mercurial DAGs.
              pub trait Graph {
                  /// Return the two parents of the given `Revision`.
                  ///
                  /// Each of the parents can be independently `NULL_REVISION`
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
              }
              #[derive(Clone, Debug, PartialEq)]
              pub enum GraphError {
                  ParentOutOfRange(Revision),
                  WorkingDirectoryUnsupported,
              }
              /// The Mercurial Revlog Index
              ///
              /// This is currently limited to the minimal interface that is needed for
              /// the [`nodemap`](nodemap/index.html) module
              pub trait RevlogIndex {
                  /// Total number of Revisions referenced in this index
                  fn len(&self) -> usize;
+                 fn is_empty(&self) -> bool {
+                     self.len() == 0
+                 }
                  /// Return a reference to the Node or `None` if rev is out of bounds
                  ///
                  /// `NULL_REVISION` is not considered to be out of bounds.
                  fn node(&self, rev: Revision) -> Option<&Node>;
              }

rust/hg-core/src/revlog/node.rs

0 +10 -6

              // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Definitions and utilities for Revision nodes
              //!
              //! In Mercurial code base, it is customary to call "a node" the binary SHA
              //! of a revision.
              use hex::{self, FromHex, FromHexError};
              /// The length in bytes of a `Node`
              ///
              /// This constant is meant to ease refactors of this module, and
              /// are private so that calling code does not expect all nodes have
              /// the same size, should we support several formats concurrently in
              /// the future.
              const NODE_BYTES_LENGTH: usize = 20;
              /// The length in bytes of a `Node`
              ///
              /// see also `NODES_BYTES_LENGTH` about it being private.
              const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
              /// Private alias for readability and to ease future change
              type NodeData = [u8; NODE_BYTES_LENGTH];
              /// Binary revision SHA
              ///
              /// ## Future changes of hash size
              ///
              /// To accomodate future changes of hash size, Rust callers
              /// should use the conversion methods at the boundaries (FFI, actual
              /// computation of hashes and I/O) only, and only if required.
              ///
              /// All other callers outside of unit tests should just handle `Node` values
              /// and never make any assumption on the actual length, using [`nybbles_len`]
              /// if they need a loop boundary.
              ///
              /// All methods that create a `Node` either take a type that enforces
              /// the size or fail immediately at runtime with [`ExactLengthRequired`].
              ///
              /// [`nybbles_len`]: #method.nybbles_len
              /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
              #[derive(Clone, Debug, PartialEq)]
              #[repr(transparent)]
              pub struct Node {
                  data: NodeData,
              }
              /// The node value for NULL_REVISION
              pub const NULL_NODE: Node = Node {
                  data: [0; NODE_BYTES_LENGTH],
              };
              impl From<NodeData> for Node {
                  fn from(data: NodeData) -> Node {
                      Node { data }
                  }
              }
              #[derive(Debug, PartialEq)]
              pub enum NodeError {
                  ExactLengthRequired(usize, String),
                  PrefixTooLong(String),
                  HexError(FromHexError, String),
              }
              /// Low level utility function, also for prefixes
              fn get_nybble(s: &[u8], i: usize) -> u8 {
                  if i % 2 == 0 {
                      s[i / 2] >> 4
                  } else {
                      s[i / 2] & 0x0f
                  }
              }
              impl Node {
                  /// Retrieve the `i`th half-byte of the binary data.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      get_nybble(&self.data, i)
                  }
                  /// Length of the data, in nybbles
                  pub fn nybbles_len(&self) -> usize {
                      // public exposure as an instance method only, so that we can
                      // easily support several sizes of hashes if needed in the future.
                      NODE_NYBBLES_LENGTH
                  }
                  /// Convert from hexadecimal string representation
                  ///
                  /// Exact length is required.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
                      Ok(NodeData::from_hex(hex)
                          .map_err(|e| NodeError::from((e, hex)))?
                          .into())
                  }
                  /// Convert to hexadecimal string representation
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn encode_hex(&self) -> String {
                      hex::encode(self.data)
                  }
                  /// Provide access to binary data
                  ///
                  /// This is needed by FFI layers, for instance to return expected
                  /// binary values to Python.
                  pub fn as_bytes(&self) -> &[u8] {
                      &self.data
                  }
              }
              impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
                  fn from(err_offender: (FromHexError, T)) -> Self {
                      let (err, offender) = err_offender;
                      match err {
                          FromHexError::InvalidStringLength => {
                              NodeError::ExactLengthRequired(
                                  NODE_NYBBLES_LENGTH,
                                  offender.as_ref().to_owned(),
                              )
                          }
                          _ => NodeError::HexError(err, offender.as_ref().to_owned()),
                      }
                  }
              }
              /// The beginning of a binary revision SHA.
              ///
              /// Since it can potentially come from an hexadecimal representation with
              /// odd length, it needs to carry around whether the last 4 bits are relevant
              /// or not.
              #[derive(Debug, PartialEq)]
              pub struct NodePrefix {
                  buf: Vec<u8>,
                  is_odd: bool,
              }
              impl NodePrefix {
                  /// Convert from hexadecimal string representation
                  ///
                  /// Similarly to `hex::decode`, can be used with Unicode string types
                  /// (`String`, `&str`) as well as bytes.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
                      let hex = hex.as_ref();
                      let len = hex.len();
                      if len > NODE_NYBBLES_LENGTH {
                          return Err(NodeError::PrefixTooLong(
                              String::from_utf8_lossy(hex).to_owned().to_string(),
                          ));
                      }
                      let is_odd = len % 2 == 1;
                      let even_part = if is_odd { &hex[..len - 1] } else { hex };
                      let mut buf: Vec<u8> = Vec::from_hex(&even_part)
                          .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
                      if is_odd {
                          let latest_char = char::from(hex[len - 1]);
                          let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
                              (
                                  FromHexError::InvalidHexCharacter {
                                      c: latest_char,
                                      index: len - 1,
                                  },
                                  String::from_utf8_lossy(hex),
                              )
                          })? as u8;
                          buf.push(latest_nybble << 4);
                      }
                      Ok(NodePrefix { buf, is_odd })
                  }
                  pub fn borrow(&self) -> NodePrefixRef {
                      NodePrefixRef {
                          buf: &self.buf,
                          is_odd: self.is_odd,
                      }
                  }
              }
              #[derive(Clone, Debug, PartialEq)]
              pub struct NodePrefixRef<'a> {
                  buf: &'a [u8],
                  is_odd: bool,
              }
              impl<'a> NodePrefixRef<'a> {
                  pub fn len(&self) -> usize {
                      if self.is_odd {
                          self.buf.len() * 2 - 1
                      } else {
                          self.buf.len() * 2
                      }
                  }
+                 pub fn is_empty(&self) -> bool {
+                     self.len() == 0
+                 }
                  pub fn is_prefix_of(&self, node: &Node) -> bool {
                      if self.is_odd {
                          let buf = self.buf;
                          let last_pos = buf.len() - 1;
                          node.data.starts_with(buf.split_at(last_pos).0)
                              && node.data[last_pos] >> 4 == buf[last_pos] >> 4
                      } else {
                          node.data.starts_with(self.buf)
                      }
                  }
                  /// Retrieve the `i`th half-byte from the prefix.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      assert!(i < self.len());
                      get_nybble(self.buf, i)
                  }
                  /// Return the index first nybble that's different from `node`
                  ///
                  /// If the return value is `None` that means that `self` is
                  /// a prefix of `node`, but the current method is a bit slower
                  /// than `is_prefix_of`.
                  ///
                  /// Returned index is as in `get_nybble`, i.e., starting at 0.
                  pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
                      let buf = self.buf;
                      let until = if self.is_odd {
                          buf.len() - 1
                      } else {
                          buf.len()
                      };
-                     for i in 0..until {
-                         if buf[i] != node.data[i] {
-                             if buf[i] & 0xf0 == node.data[i] & 0xf0 {
-                                 return Some(2 * i + 1);
+                     for (i, item) in buf.iter().enumerate().take(until) {
+                         if *item != node.data[i] {
+                             return if *item & 0xf0 == node.data[i] & 0xf0 {
+                                 Some(2 * i + 1)
                              } else {
-                                 return Some(2 * i);
+                             }
+                                 Some(2 * i)
+                             };
                          }
                      }
                      if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
                          Some(until * 2)
                      } else {
                          None
                      }
                  }
              }
              /// A shortcut for full `Node` references
              impl<'a> From<&'a Node> for NodePrefixRef<'a> {
                  fn from(node: &'a Node) -> Self {
                      NodePrefixRef {
                          buf: &node.data,
                          is_odd: false,
                      }
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  fn sample_node() -> Node {
                      let mut data = [0; NODE_BYTES_LENGTH];
                      data.copy_from_slice(&[
 x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
 x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
                      ]);
                      data.into()
                  }
                  /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
                  ///
                  /// The padding is made with zeros
                  pub fn hex_pad_right(hex: &str) -> String {
                      let mut res = hex.to_string();
                      while res.len() < NODE_NYBBLES_LENGTH {
                          res.push('0');
                      }
                      res
                  }
                  fn sample_node_hex() -> String {
                      hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
                  }
                  #[test]
                  fn test_node_from_hex() {
                      assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
                      let mut short = hex_pad_right("0123");
                      short.pop();
                      short.pop();
                      assert_eq!(
                          Node::from_hex(&short),
                          Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
                      );
                      let not_hex = hex_pad_right("012... oops");
                      assert_eq!(
                          Node::from_hex(&not_hex),
                          Err(NodeError::HexError(
                              FromHexError::InvalidHexCharacter { c: '.', index: 3 },
                              not_hex,
                          )),
                      );
                  }
                  #[test]
                  fn test_node_encode_hex() {
                      assert_eq!(sample_node().encode_hex(), sample_node_hex());
                  }
                  #[test]
                  fn test_prefix_from_hex() -> Result<(), NodeError> {
                      assert_eq!(
                          NodePrefix::from_hex("0e1")?,
                          NodePrefix {
                              buf: vec![14, 16],
                              is_odd: true
                          }
                      );
                      assert_eq!(
                          NodePrefix::from_hex("0e1a")?,
                          NodePrefix {
                              buf: vec![14, 26],
                              is_odd: false
                          }
                      );
                      // checking limit case
                      let node_as_vec = sample_node().data.iter().cloned().collect();
                      assert_eq!(
                          NodePrefix::from_hex(sample_node_hex())?,
                          NodePrefix {
                              buf: node_as_vec,
                              is_odd: false
                          }
                      );
                      Ok(())
                  }
                  #[test]
                  fn test_prefix_from_hex_errors() {
                      assert_eq!(
                          NodePrefix::from_hex("testgr"),
                          Err(NodeError::HexError(
                              FromHexError::InvalidHexCharacter { c: 't', index: 0 },
                              "testgr".to_string()
                          ))
                      );
                      let mut long = NULL_NODE.encode_hex();
                      long.push('c');
                      match NodePrefix::from_hex(&long)
                          .expect_err("should be refused as too long")
                      {
                          NodeError::PrefixTooLong(s) => assert_eq!(s, long),
                          err => panic!(format!("Should have been TooLong, got {:?}", err)),
                      }
                  }
                  #[test]
                  fn test_is_prefix_of() -> Result<(), NodeError> {
                      let mut node_data = [0; NODE_BYTES_LENGTH];
                      node_data[0] = 0x12;
                      node_data[1] = 0xca;
                      let node = Node::from(node_data);
                      assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
                      assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
                      Ok(())
                  }
                  #[test]
                  fn test_get_nybble() -> Result<(), NodeError> {
                      let prefix = NodePrefix::from_hex("dead6789cafe")?;
                      assert_eq!(prefix.borrow().get_nybble(0), 13);
                      assert_eq!(prefix.borrow().get_nybble(7), 9);
                      Ok(())
                  }
                  #[test]
                  fn test_first_different_nybble_even_prefix() {
                      let prefix = NodePrefix::from_hex("12ca").unwrap();
                      let prefref = prefix.borrow();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefref.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefref.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefref.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefref.first_different_nybble(&node), None);
                  }
                  #[test]
                  fn test_first_different_nybble_odd_prefix() {
                      let prefix = NodePrefix::from_hex("12c").unwrap();
                      let prefref = prefix.borrow();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefref.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefref.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefref.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefref.first_different_nybble(&node), None);
                  }
              }
              #[cfg(test)]
              pub use tests::hex_pad_right;

rust/hg-core/src/revlog/nodemap.rs

0 +8 -12

              // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Indexing facilities for fast retrieval of `Revision` from `Node`
              //!
              //! This provides a variation on the 16-ary radix tree that is
              //! provided as "nodetree" in revlog.c, ready for append-only persistence
              //! on disk.
              //!
              //! Following existing implicit conventions, the "nodemap" terminology
              //! is used in a more abstract context.
              use super::{
                  node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
                  RevlogIndex, NULL_REVISION,
              };
              use std::cmp::max;
              use std::fmt;
              use std::mem;
              use std::ops::Deref;
              use std::ops::Index;
              use std::slice;
              #[derive(Debug, PartialEq)]
              pub enum NodeMapError {
                  MultipleResults,
                  InvalidNodePrefix(NodeError),
                  /// A `Revision` stored in the nodemap could not be found in the index
                  RevisionNotInIndex(Revision),
              }
              impl From<NodeError> for NodeMapError {
                  fn from(err: NodeError) -> Self {
                      NodeMapError::InvalidNodePrefix(err)
                  }
              }
              /// Mapping system from Mercurial nodes to revision numbers.
              ///
              /// ## `RevlogIndex` and `NodeMap`
              ///
              /// One way to think about their relationship is that
              /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
              /// carried by a [`RevlogIndex`].
              ///
              /// Many of the methods in this trait take a `RevlogIndex` argument
              /// which is used for validation of their results. This index must naturally
              /// be the one the `NodeMap` is about, and it must be consistent.
              ///
              /// Notably, the `NodeMap` must not store
              /// information about more `Revision` values than there are in the index.
              /// In these methods, an encountered `Revision` is not in the index, a
              /// [`RevisionNotInIndex`] error is returned.
              ///
              /// In insert operations, the rule is thus that the `NodeMap` must always
              /// be updated after the `RevlogIndex`
              /// be updated first, and the `NodeMap` second.
              ///
              /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
              /// [`RevlogIndex`]: ../trait.RevlogIndex.html
              pub trait NodeMap {
                  /// Find the unique `Revision` having the given `Node`
                  ///
                  /// If no Revision matches the given `Node`, `Ok(None)` is returned.
                  fn find_node(
                      &self,
                      index: &impl RevlogIndex,
                      node: &Node,
                  ) -> Result<Option<Revision>, NodeMapError> {
                      self.find_bin(index, node.into())
                  }
                  /// Find the unique Revision whose `Node` starts with a given binary prefix
                  ///
                  /// If no Revision matches the given prefix, `Ok(None)` is returned.
                  ///
                  /// If several Revisions match the given prefix, a [`MultipleResults`]
                  /// error is returned.
                  fn find_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefixRef<'a>,
                  ) -> Result<Option<Revision>, NodeMapError>;
                  /// Find the unique Revision whose `Node` hexadecimal string representation
                  /// starts with a given prefix
                  ///
                  /// If no Revision matches the given prefix, `Ok(None)` is returned.
                  ///
                  /// If several Revisions match the given prefix, a [`MultipleResults`]
                  /// error is returned.
                  fn find_hex(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: &str,
                  ) -> Result<Option<Revision>, NodeMapError> {
                      self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
                  }
                  /// Give the size of the shortest node prefix that determines
                  /// the revision uniquely.
                  ///
                  /// From a binary node prefix, if it is matched in the node map, this
                  /// returns the number of hexadecimal digits that would had sufficed
                  /// to find the revision uniquely.
                  ///
                  /// Returns `None` if no `Revision` could be found for the prefix.
                  ///
                  /// If several Revisions match the given prefix, a [`MultipleResults`]
                  /// error is returned.
                  fn unique_prefix_len_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      node_prefix: NodePrefixRef<'a>,
                  ) -> Result<Option<usize>, NodeMapError>;
                  /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
                  /// of the prefix as input.
                  fn unique_prefix_len_hex(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: &str,
                  ) -> Result<Option<usize>, NodeMapError> {
                      self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
                  }
                  /// Same as `unique_prefix_len_bin`, with a full `Node` as input
                  fn unique_prefix_len_node(
                      &self,
                      idx: &impl RevlogIndex,
                      node: &Node,
                  ) -> Result<Option<usize>, NodeMapError> {
                      self.unique_prefix_len_bin(idx, node.into())
                  }
              }
              pub trait MutableNodeMap: NodeMap {
                  fn insert<I: RevlogIndex>(
                      &mut self,
                      index: &I,
                      node: &Node,
                      rev: Revision,
                  ) -> Result<(), NodeMapError>;
              }
              /// Low level NodeTree [`Blocks`] elements
              ///
              /// These are exactly as for instance on persistent storage.
              type RawElement = i32;
              /// High level representation of values in NodeTree
              /// [`Blocks`](struct.Block.html)
              ///
              /// This is the high level representation that most algorithms should
              /// use.
              #[derive(Clone, Debug, Eq, PartialEq)]
              enum Element {
                  Rev(Revision),
                  Block(usize),
                  None,
              }
              impl From<RawElement> for Element {
                  /// Conversion from low level representation, after endianness conversion.
                  ///
                  /// See [`Block`](struct.Block.html) for explanation about the encoding.
                  fn from(raw: RawElement) -> Element {
                      if raw >= 0 {
                          Element::Block(raw as usize)
                      } else if raw == -1 {
                          Element::None
                      } else {
                          Element::Rev(-raw - 2)
                      }
                  }
              }
              impl From<Element> for RawElement {
                  fn from(element: Element) -> RawElement {
                      match element {
                          Element::None => 0,
                          Element::Block(i) => i as RawElement,
                          Element::Rev(rev) => -rev - 2,
                      }
                  }
              }
              /// A logical block of the `NodeTree`, packed with a fixed size.
              ///
              /// These are always used in container types implementing `Index<Block>`,
              /// such as `&Block`
              ///
              /// As an array of integers, its ith element encodes that the
              /// ith potential edge from the block, representing the ith hexadecimal digit
              /// (nybble) `i` is either:
              ///
              /// - absent (value -1)
              /// - another `Block` in the same indexable container (value ≥ 0)
              ///  - a `Revision` leaf (value ≤ -2)
              ///
              /// Endianness has to be fixed for consistency on shared storage across
              /// different architectures.
              ///
              /// A key difference with the C `nodetree` is that we need to be
              /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
              /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
              ///
              /// Another related difference is that `NULL_REVISION` (-1) is not
              /// represented at all, because we want an immutable empty nodetree
              /// to be valid.
              #[derive(Copy, Clone)]
              pub struct Block([u8; BLOCK_SIZE]);
              /// Not derivable for arrays of length >32 until const generics are stable
              impl PartialEq for Block {
                  fn eq(&self, other: &Self) -> bool {
-                     &self.0[..] == &other.0[..]
+                     self.0[..] == other.0[..]
                  }
              }
              pub const BLOCK_SIZE: usize = 64;
              impl Block {
                  fn new() -> Self {
                      // -1 in 2's complement to create an absent node
                      let byte: u8 = 255;
                      Block([byte; BLOCK_SIZE])
                  }
                  fn get(&self, nybble: u8) -> Element {
                      let index = nybble as usize * mem::size_of::<RawElement>();
                      Element::from(RawElement::from_be_bytes([
                          self.0[index],
                          self.0[index + 1],
                          self.0[index + 2],
                          self.0[index + 3],
                      ]))
                  }
                  fn set(&mut self, nybble: u8, element: Element) {
                      let values = RawElement::to_be_bytes(element.into());
                      let index = nybble as usize * mem::size_of::<RawElement>();
                      self.0[index] = values[0];
                      self.0[index + 1] = values[1];
                      self.0[index + 2] = values[2];
                      self.0[index + 3] = values[3];
                  }
              }
              impl fmt::Debug for Block {
                  /// sparse representation for testing and debugging purposes
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      f.debug_map()
                          .entries((0..16).filter_map(|i| match self.get(i) {
                              Element::None => None,
                              element => Some((i, element)),
                          }))
                          .finish()
                  }
              }
              /// A mutable 16-radix tree with the root block logically at the end
              ///
              /// Because of the append only nature of our node trees, we need to
              /// keep the original untouched and store new blocks separately.
              ///
              /// The mutable root `Block` is kept apart so that we don't have to rebump
              /// it on each insertion.
              pub struct NodeTree {
                  readonly: Box<dyn Deref<Target = [Block]> + Send>,
                  growable: Vec<Block>,
                  root: Block,
                  masked_inner_blocks: usize,
              }
              impl Index<usize> for NodeTree {
                  type Output = Block;
                  fn index(&self, i: usize) -> &Block {
                      let ro_len = self.readonly.len();
                      if i < ro_len {
                          &self.readonly[i]
                      } else if i == ro_len + self.growable.len() {
                          &self.root
                      } else {
                          &self.growable[i - ro_len]
                      }
                  }
              }
              /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
              fn has_prefix_or_none(
                  idx: &impl RevlogIndex,
                  prefix: NodePrefixRef,
                  rev: Revision,
              ) -> Result<Option<Revision>, NodeMapError> {
                  idx.node(rev)
                      .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
                      .map(|node| {
                          if prefix.is_prefix_of(node) {
                              Some(rev)
                          } else {
                              None
                          }
                      })
              }
              /// validate that the candidate's node starts indeed with given prefix,
              /// and treat ambiguities related to `NULL_REVISION`.
              ///
              /// From the data in the NodeTree, one can only conclude that some
              /// revision is the only one for a *subprefix* of the one being looked up.
              fn validate_candidate(
                  idx: &impl RevlogIndex,
                  prefix: NodePrefixRef,
                  candidate: (Option<Revision>, usize),
              ) -> Result<(Option<Revision>, usize), NodeMapError> {
                  let (rev, steps) = candidate;
                  if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
                      rev.map_or(Ok((None, steps)), |r| {
                          has_prefix_or_none(idx, prefix, r)
                              .map(|opt| (opt, max(steps, nz_nybble + 1)))
                      })
                  } else {
                      // the prefix is only made of zeros; NULL_REVISION always matches it
                      // and any other *valid* result is an ambiguity
                      match rev {
                          None => Ok((Some(NULL_REVISION), steps + 1)),
                          Some(r) => match has_prefix_or_none(idx, prefix, r)? {
                              None => Ok((Some(NULL_REVISION), steps + 1)),
                              _ => Err(NodeMapError::MultipleResults),
                          },
                      }
                  }
              }
              impl NodeTree {
                  /// Initiate a NodeTree from an immutable slice-like of `Block`
                  ///
                  /// We keep `readonly` and clone its root block if it isn't empty.
                  fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
-                     let root = readonly
-                         .last()
-                         .map(|b| b.clone())
-                         .unwrap_or_else(|| Block::new());
+                     let root = readonly.last().cloned().unwrap_or_else(Block::new);
                      NodeTree {
-                         readonly: readonly,
+                         readonly,
                          growable: Vec::new(),
-                         root: root,
+                         root,
                          masked_inner_blocks: 0,
                      }
                  }
                  /// Create from an opaque bunch of bytes
                  ///
                  /// The created `NodeTreeBytes` from `buffer`,
                  /// of which exactly `amount` bytes are used.
                  ///
                  /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
                  /// - `offset` allows for the final file format to include fixed data
                  ///   (generation number, behavioural flags)
                  /// - `amount` is expressed in bytes, and is not automatically derived from
                  ///   `bytes`, so that a caller that manages them atomically can perform
                  ///   temporary disk serializations and still rollback easily if needed.
                  ///   First use-case for this would be to support Mercurial shell hooks.
                  ///
                  /// panics if `buffer` is smaller than `amount`
                  pub fn load_bytes(
                      bytes: Box<dyn Deref<Target = [u8]> + Send>,
                      amount: usize,
                  ) -> Self {
                      NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
                  }
                  /// Retrieve added `Block` and the original immutable data
                  pub fn into_readonly_and_added(
                      self,
                  ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
                      let mut vec = self.growable;
                      let readonly = self.readonly;
                      if readonly.last() != Some(&self.root) {
                          vec.push(self.root);
                      }
                      (readonly, vec)
                  }
                  /// Retrieve added `Blocks` as bytes, ready to be written to persistent
                  /// storage
                  pub fn into_readonly_and_added_bytes(
                      self,
                  ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
                      let (readonly, vec) = self.into_readonly_and_added();
                      // Prevent running `v`'s destructor so we are in complete control
                      // of the allocation.
                      let vec = mem::ManuallyDrop::new(vec);
                      // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
                      // bytes, so this is perfectly safe.
                      let bytes = unsafe {
                          // Assert that `Block` hasn't been changed and has no padding
                          let _: [u8; 4 * BLOCK_SIZE] =
                              std::mem::transmute([Block::new(); 4]);
                          // /!\ Any use of `vec` after this is use-after-free.
                          // TODO: use `into_raw_parts` once stabilized
                          Vec::from_raw_parts(
                              vec.as_ptr() as *mut u8,
                              vec.len() * BLOCK_SIZE,
                              vec.capacity() * BLOCK_SIZE,
                          )
                      };
                      (readonly, bytes)
                  }
                  /// Total number of blocks
                  fn len(&self) -> usize {
                      self.readonly.len() + self.growable.len() + 1
                  }
                  /// Implemented for completeness
                  ///
                  /// A `NodeTree` always has at least the mutable root block.
                  #[allow(dead_code)]
                  fn is_empty(&self) -> bool {
                      false
                  }
                  /// Main working method for `NodeTree` searches
                  ///
                  /// The first returned value is the result of analysing `NodeTree` data
                  /// *alone*: whereas `None` guarantees that the given prefix is absent
                  /// from the `NodeTree` data (but still could match `NULL_NODE`), with
                  /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
                  /// that could match the prefix. Actually, all that can be inferred from
                  /// the `NodeTree` data is that `rev` is the revision with the longest
                  /// common node prefix with the given prefix.
                  ///
                  /// The second returned value is the size of the smallest subprefix
                  /// of `prefix` that would give the same result, i.e. not the
                  /// `MultipleResults` error variant (again, using only the data of the
                  /// `NodeTree`).
                  fn lookup(
                      &self,
                      prefix: NodePrefixRef,
                  ) -> Result<(Option<Revision>, usize), NodeMapError> {
                      for (i, visit_item) in self.visit(prefix).enumerate() {
                          if let Some(opt) = visit_item.final_revision() {
                              return Ok((opt, i + 1));
                          }
                      }
                      Err(NodeMapError::MultipleResults)
                  }
                  fn visit<'n, 'p>(
                      &'n self,
                      prefix: NodePrefixRef<'p>,
                  ) -> NodeTreeVisitor<'n, 'p> {
                      NodeTreeVisitor {
                          nt: self,
-                         prefix: prefix,
+                         prefix,
                          visit: self.len() - 1,
                          nybble_idx: 0,
                          done: false,
                      }
                  }
                  /// Return a mutable reference for `Block` at index `idx`.
                  ///
                  /// If `idx` lies in the immutable area, then the reference is to
                  /// a newly appended copy.
                  ///
                  /// Returns (new_idx, glen, mut_ref) where
                  ///
                  /// - `new_idx` is the index of the mutable `Block`
                  /// - `mut_ref` is a mutable reference to the mutable Block.
                  /// - `glen` is the new length of `self.growable`
                  ///
                  /// Note: the caller wouldn't be allowed to query `self.growable.len()`
                  /// itself because of the mutable borrow taken with the returned `Block`
                  fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
                      let ro_blocks = &self.readonly;
                      let ro_len = ro_blocks.len();
                      let glen = self.growable.len();
                      if idx < ro_len {
                          self.masked_inner_blocks += 1;
-                         // TODO OPTIM I think this makes two copies
-                         self.growable.push(ro_blocks[idx].clone());
+                         self.growable.push(ro_blocks[idx]);
                          (glen + ro_len, &mut self.growable[glen], glen + 1)
                      } else if glen + ro_len == idx {
                          (idx, &mut self.root, glen)
                      } else {
                          (idx, &mut self.growable[idx - ro_len], glen)
                      }
                  }
                  /// Main insertion method
                  ///
                  /// This will dive in the node tree to find the deepest `Block` for
                  /// `node`, split it as much as needed and record `node` in there.
                  /// The method then backtracks, updating references in all the visited
                  /// blocks from the root.
                  ///
                  /// All the mutated `Block` are copied first to the growable part if
                  /// needed. That happens for those in the immutable part except the root.
                  pub fn insert<I: RevlogIndex>(
                      &mut self,
                      index: &I,
                      node: &Node,
                      rev: Revision,
                  ) -> Result<(), NodeMapError> {
                      let ro_len = &self.readonly.len();
                      let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
                      let read_nybbles = visit_steps.len();
                      // visit_steps cannot be empty, since we always visit the root block
                      let deepest = visit_steps.pop().unwrap();
                      let (mut block_idx, mut block, mut glen) =
                          self.mutable_block(deepest.block_idx);
                      if let Element::Rev(old_rev) = deepest.element {
                          let old_node = index
                              .node(old_rev)
                              .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
                          if old_node == node {
                              return Ok(()); // avoid creating lots of useless blocks
                          }
                          // Looping over the tail of nybbles in both nodes, creating
                          // new blocks until we find the difference
                          let mut new_block_idx = ro_len + glen;
                          let mut nybble = deepest.nybble;
                          for nybble_pos in read_nybbles..node.nybbles_len() {
                              block.set(nybble, Element::Block(new_block_idx));
                              let new_nybble = node.get_nybble(nybble_pos);
                              let old_nybble = old_node.get_nybble(nybble_pos);
                              if old_nybble == new_nybble {
                                  self.growable.push(Block::new());
                                  block = &mut self.growable[glen];
                                  glen += 1;
                                  new_block_idx += 1;
                                  nybble = new_nybble;
                              } else {
                                  let mut new_block = Block::new();
                                  new_block.set(old_nybble, Element::Rev(old_rev));
                                  new_block.set(new_nybble, Element::Rev(rev));
                                  self.growable.push(new_block);
                                  break;
                              }
                          }
                      } else {
                          // Free slot in the deepest block: no splitting has to be done
                          block.set(deepest.nybble, Element::Rev(rev));
                      }
                      // Backtrack over visit steps to update references
                      while let Some(visited) = visit_steps.pop() {
                          let to_write = Element::Block(block_idx);
                          if visit_steps.is_empty() {
                              self.root.set(visited.nybble, to_write);
                              break;
                          }
                          let (new_idx, block, _) = self.mutable_block(visited.block_idx);
                          if block.get(visited.nybble) == to_write {
                              break;
                          }
                          block.set(visited.nybble, to_write);
                          block_idx = new_idx;
                      }
                      Ok(())
                  }
                  /// Make the whole `NodeTree` logically empty, without touching the
                  /// immutable part.
                  pub fn invalidate_all(&mut self) {
                      self.root = Block::new();
                      self.growable = Vec::new();
                      self.masked_inner_blocks = self.readonly.len();
                  }
                  /// Return the number of blocks in the readonly part that are currently
                  /// masked in the mutable part.
                  ///
                  /// The `NodeTree` structure has no efficient way to know how many blocks
                  /// are already unreachable in the readonly part.
                  ///
                  /// After a call to `invalidate_all()`, the returned number can be actually
                  /// bigger than the whole readonly part, a conventional way to mean that
                  /// all the readonly blocks have been masked. This is what is really
                  /// useful to the caller and does not require to know how many were
                  /// actually unreachable to begin with.
                  pub fn masked_readonly_blocks(&self) -> usize {
                      if let Some(readonly_root) = self.readonly.last() {
                          if readonly_root == &self.root {
                              return 0;
                          }
                      } else {
                          return 0;
                      }
                      self.masked_inner_blocks + 1
                  }
              }
              pub struct NodeTreeBytes {
                  buffer: Box<dyn Deref<Target = [u8]> + Send>,
                  len_in_blocks: usize,
              }
              impl NodeTreeBytes {
                  fn new(
                      buffer: Box<dyn Deref<Target = [u8]> + Send>,
                      amount: usize,
                  ) -> Self {
                      assert!(buffer.len() >= amount);
                      let len_in_blocks = amount / BLOCK_SIZE;
                      NodeTreeBytes {
                          buffer,
                          len_in_blocks,
                      }
                  }
              }
              impl Deref for NodeTreeBytes {
                  type Target = [Block];
                  fn deref(&self) -> &[Block] {
                      unsafe {
                          slice::from_raw_parts(
                              (&self.buffer).as_ptr() as *const Block,
                              self.len_in_blocks,
                          )
                      }
                  }
              }
              struct NodeTreeVisitor<'n, 'p> {
                  nt: &'n NodeTree,
                  prefix: NodePrefixRef<'p>,
                  visit: usize,
                  nybble_idx: usize,
                  done: bool,
              }
              #[derive(Debug, PartialEq, Clone)]
              struct NodeTreeVisitItem {
                  block_idx: usize,
                  nybble: u8,
                  element: Element,
              }
              impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
                  type Item = NodeTreeVisitItem;
                  fn next(&mut self) -> Option<Self::Item> {
                      if self.done || self.nybble_idx >= self.prefix.len() {
                          return None;
                      }
                      let nybble = self.prefix.get_nybble(self.nybble_idx);
                      self.nybble_idx += 1;
                      let visit = self.visit;
                      let element = self.nt[visit].get(nybble);
                      if let Element::Block(idx) = element {
                          self.visit = idx;
                      } else {
                          self.done = true;
                      }
                      Some(NodeTreeVisitItem {
                          block_idx: visit,
-                         nybble: nybble,
-                         element: element,
+                         nybble,
+                         element,
                      })
                  }
              }
              impl NodeTreeVisitItem {
                  // Return `Some(opt)` if this item is final, with `opt` being the
                  // `Revision` that it may represent.
                  //
                  // If the item is not terminal, return `None`
                  fn final_revision(&self) -> Option<Option<Revision>> {
                      match self.element {
                          Element::Block(_) => None,
                          Element::Rev(r) => Some(Some(r)),
                          Element::None => Some(None),
                      }
                  }
              }
              impl From<Vec<Block>> for NodeTree {
                  fn from(vec: Vec<Block>) -> Self {
                      Self::new(Box::new(vec))
                  }
              }
              impl fmt::Debug for NodeTree {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      let readonly: &[Block] = &*self.readonly;
                      write!(
                          f,
                          "readonly: {:?}, growable: {:?}, root: {:?}",
                          readonly, self.growable, self.root
                      )
                  }
              }
              impl Default for NodeTree {
                  /// Create a fully mutable empty NodeTree
                  fn default() -> Self {
                      NodeTree::new(Box::new(Vec::new()))
                  }
              }
              impl NodeMap for NodeTree {
                  fn find_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefixRef<'a>,
                  ) -> Result<Option<Revision>, NodeMapError> {
                      validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
                          .map(|(opt, _shortest)| opt)
                  }
                  fn unique_prefix_len_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefixRef<'a>,
                  ) -> Result<Option<usize>, NodeMapError> {
                      validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
                          .map(|(opt, shortest)| opt.map(|_rev| shortest))
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::NodeMapError::*;
                  use super::*;
                  use crate::revlog::node::{hex_pad_right, Node};
                  use std::collections::HashMap;
                  /// Creates a `Block` using a syntax close to the `Debug` output
                  macro_rules! block {
                      {$($nybble:tt : $variant:ident($val:tt)),*} => (
                          {
                              let mut block = Block::new();
                              $(block.set($nybble, Element::$variant($val)));*;
                              block
                          }
                      )
                  }
                  #[test]
                  fn test_block_debug() {
                      let mut block = Block::new();
                      block.set(1, Element::Rev(3));
                      block.set(10, Element::Block(0));
                      assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
                  }
                  #[test]
                  fn test_block_macro() {
                      let block = block! {5: Block(2)};
                      assert_eq!(format!("{:?}", block), "{5: Block(2)}");
                      let block = block! {13: Rev(15), 5: Block(2)};
                      assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
                  }
                  #[test]
                  fn test_raw_block() {
                      let mut raw = [255u8; 64];
                      let mut counter = 0;
                      for val in [0, 15, -2, -1, -3].iter() {
                          for byte in RawElement::to_be_bytes(*val).iter() {
                              raw[counter] = *byte;
                              counter += 1;
                          }
                      }
                      let block = Block(raw);
                      assert_eq!(block.get(0), Element::Block(0));
                      assert_eq!(block.get(1), Element::Block(15));
                      assert_eq!(block.get(3), Element::None);
                      assert_eq!(block.get(2), Element::Rev(0));
                      assert_eq!(block.get(4), Element::Rev(1));
                  }
                  type TestIndex = HashMap<Revision, Node>;
                  impl RevlogIndex for TestIndex {
                      fn node(&self, rev: Revision) -> Option<&Node> {
                          self.get(&rev)
                      }
                      fn len(&self) -> usize {
                          self.len()
                      }
                  }
                  /// Pad hexadecimal Node prefix with zeros on the right
                  ///
                  /// This avoids having to repeatedly write very long hexadecimal
                  /// strings for test data, and brings actual hash size independency.
                  #[cfg(test)]
                  fn pad_node(hex: &str) -> Node {
                      Node::from_hex(&hex_pad_right(hex)).unwrap()
                  }
                  /// Pad hexadecimal Node prefix with zeros on the right, then insert
                  fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
                      idx.insert(rev, pad_node(hex));
                  }
                  fn sample_nodetree() -> NodeTree {
                      NodeTree::from(vec![
                          block![0: Rev(9)],
                          block![0: Rev(0), 1: Rev(9)],
                          block![0: Block(1), 1:Rev(1)],
                      ])
                  }
                  #[test]
                  fn test_nt_debug() {
                      let nt = sample_nodetree();
                      assert_eq!(
                          format!("{:?}", nt),
                          "readonly: \
                           [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
                           growable: [], \
                           root: {0: Block(1), 1: Rev(1)}",
                      );
                  }
                  #[test]
                  fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
                      let mut idx: TestIndex = HashMap::new();
                      pad_insert(&mut idx, 1, "1234deadcafe");
                      let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
                      assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
                      assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
                      assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
                      assert_eq!(nt.find_hex(&idx, "1a")?, None);
                      assert_eq!(nt.find_hex(&idx, "ab")?, None);
                      // and with full binary Nodes
                      assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
                      let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
                      assert_eq!(nt.find_node(&idx, &unknown)?, None);
                      Ok(())
                  }
                  #[test]
                  fn test_immutable_find_one_jump() {
                      let mut idx = TestIndex::new();
                      pad_insert(&mut idx, 9, "012");
                      pad_insert(&mut idx, 0, "00a");
                      let nt = sample_nodetree();
                      assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
                      assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
                      assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
                      assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
                      assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
                      assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
                  }
                  #[test]
                  fn test_mutated_find() -> Result<(), NodeMapError> {
                      let mut idx = TestIndex::new();
                      pad_insert(&mut idx, 9, "012");
                      pad_insert(&mut idx, 0, "00a");
                      pad_insert(&mut idx, 2, "cafe");
                      pad_insert(&mut idx, 3, "15");
                      pad_insert(&mut idx, 1, "10");
                      let nt = NodeTree {
                          readonly: sample_nodetree().readonly,
                          growable: vec![block![0: Rev(1), 5: Rev(3)]],
                          root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
                          masked_inner_blocks: 1,
                      };
                      assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
                      assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
                      assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
                      assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
                      assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
                      assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
                      assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
                      assert_eq!(nt.masked_readonly_blocks(), 2);
                      Ok(())
                  }
                  struct TestNtIndex {
                      index: TestIndex,
                      nt: NodeTree,
                  }
                  impl TestNtIndex {
                      fn new() -> Self {
                          TestNtIndex {
                              index: HashMap::new(),
                              nt: NodeTree::default(),
                          }
                      }
                      fn insert(
                          &mut self,
                          rev: Revision,
                          hex: &str,
                      ) -> Result<(), NodeMapError> {
                          let node = pad_node(hex);
                          self.index.insert(rev, node.clone());
                          self.nt.insert(&self.index, &node, rev)?;
                          Ok(())
                      }
                      fn find_hex(
                          &self,
                          prefix: &str,
                      ) -> Result<Option<Revision>, NodeMapError> {
                          self.nt.find_hex(&self.index, prefix)
                      }
                      fn unique_prefix_len_hex(
                          &self,
                          prefix: &str,
                      ) -> Result<Option<usize>, NodeMapError> {
                          self.nt.unique_prefix_len_hex(&self.index, prefix)
                      }
                      /// Drain `added` and restart a new one
                      fn commit(self) -> Self {
                          let mut as_vec: Vec<Block> =
                              self.nt.readonly.iter().map(|block| block.clone()).collect();
                          as_vec.extend(self.nt.growable);
                          as_vec.push(self.nt.root);
                          Self {
                              index: self.index,
                              nt: NodeTree::from(as_vec).into(),
                          }
                      }
                  }
                  #[test]
                  fn test_insert_full_mutable() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      assert_eq!(idx.find_hex("1")?, Some(0));
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      // let's trigger a simple split
                      idx.insert(1, "1a34")?;
                      assert_eq!(idx.nt.growable.len(), 1);
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      assert_eq!(idx.find_hex("1a")?, Some(1));
                      // reinserting is a no_op
                      idx.insert(1, "1a34")?;
                      assert_eq!(idx.nt.growable.len(), 1);
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      assert_eq!(idx.find_hex("1a")?, Some(1));
                      idx.insert(2, "1a01")?;
                      assert_eq!(idx.nt.growable.len(), 2);
                      assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
                      assert_eq!(idx.find_hex("12")?, Some(0));
                      assert_eq!(idx.find_hex("1a3")?, Some(1));
                      assert_eq!(idx.find_hex("1a0")?, Some(2));
                      assert_eq!(idx.find_hex("1a12")?, None);
                      // now let's make it split and create more than one additional block
                      idx.insert(3, "1a345")?;
                      assert_eq!(idx.nt.growable.len(), 4);
                      assert_eq!(idx.find_hex("1a340")?, Some(1));
                      assert_eq!(idx.find_hex("1a345")?, Some(3));
                      assert_eq!(idx.find_hex("1a341")?, None);
                      // there's no readonly block to mask
                      assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                      Ok(())
                  }
                  #[test]
                  fn test_unique_prefix_len_zero_prefix() {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "00000abcd").unwrap();
                      assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
                      // in the nodetree proper, this will be found at the first nybble
                      // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
                      // but the first difference with `NULL_NODE`
                      assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                      assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                      // same with odd result
                      idx.insert(1, "00123").unwrap();
                      assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
                      assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
                      // these are unchanged of course
                      assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                      assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                  }
                  #[test]
                  fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
                      // check that the splitting loop is long enough
                      let mut nt_idx = TestNtIndex::new();
                      let nt = &mut nt_idx.nt;
                      let idx = &mut nt_idx.index;
                      let node0_hex = hex_pad_right("444444");
                      let mut node1_hex = hex_pad_right("444444").clone();
                      node1_hex.pop();
                      node1_hex.push('5');
                      let node0 = Node::from_hex(&node0_hex).unwrap();
                      let node1 = Node::from_hex(&node1_hex).unwrap();
                      idx.insert(0, node0.clone());
                      nt.insert(idx, &node0, 0)?;
                      idx.insert(1, node1.clone());
                      nt.insert(idx, &node1, 1)?;
                      assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
                      assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
                      Ok(())
                  }
                  #[test]
                  fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      idx.insert(1, "1235")?;
                      idx.insert(2, "131")?;
                      idx.insert(3, "cafe")?;
                      let mut idx = idx.commit();
                      assert_eq!(idx.find_hex("1234")?, Some(0));
                      assert_eq!(idx.find_hex("1235")?, Some(1));
                      assert_eq!(idx.find_hex("131")?, Some(2));
                      assert_eq!(idx.find_hex("cafe")?, Some(3));
                      // we did not add anything since init from readonly
                      assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                      idx.insert(4, "123A")?;
                      assert_eq!(idx.find_hex("1234")?, Some(0));
                      assert_eq!(idx.find_hex("1235")?, Some(1));
                      assert_eq!(idx.find_hex("131")?, Some(2));
                      assert_eq!(idx.find_hex("cafe")?, Some(3));
                      assert_eq!(idx.find_hex("123A")?, Some(4));
                      // we masked blocks for all prefixes of "123", including the root
                      assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                      eprintln!("{:?}", idx.nt);
                      idx.insert(5, "c0")?;
                      assert_eq!(idx.find_hex("cafe")?, Some(3));
                      assert_eq!(idx.find_hex("c0")?, Some(5));
                      assert_eq!(idx.find_hex("c1")?, None);
                      assert_eq!(idx.find_hex("1234")?, Some(0));
                      // inserting "c0" is just splitting the 'c' slot of the mutable root,
                      // it doesn't mask anything
                      assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                      Ok(())
                  }
                  #[test]
                  fn test_invalidate_all() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      idx.insert(1, "1235")?;
                      idx.insert(2, "131")?;
                      idx.insert(3, "cafe")?;
                      let mut idx = idx.commit();
                      idx.nt.invalidate_all();
                      assert_eq!(idx.find_hex("1234")?, None);
                      assert_eq!(idx.find_hex("1235")?, None);
                      assert_eq!(idx.find_hex("131")?, None);
                      assert_eq!(idx.find_hex("cafe")?, None);
                      // all the readonly blocks have been masked, this is the
                      // conventional expected response
                      assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
                      Ok(())
                  }
                  #[test]
                  fn test_into_added_empty() {
                      assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
                      assert!(sample_nodetree()
                          .into_readonly_and_added_bytes()
                          .1
                          .is_empty());
                  }
                  #[test]
                  fn test_into_added_bytes() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(0, "1234")?;
                      let mut idx = idx.commit();
                      idx.insert(4, "cafe")?;
                      let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
                      // only the root block has been changed
                      assert_eq!(bytes.len(), BLOCK_SIZE);
                      // big endian for -2
                      assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
                      // big endian for -6
                      assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
                      Ok(())
                  }
              }

rust/hg-core/src/utils.rs

0 +3 -2

              // utils module
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Contains useful functions, traits, structs, etc. for use in core.
              use crate::utils::hg_path::HgPath;
              use std::{io::Write, ops::Deref};
              pub mod files;
              pub mod hg_path;
              pub mod path_auditor;
              /// Useful until rust/issues/56345 is stable
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::find_slice_in_slice;
              ///
              /// let haystack = b"This is the haystack".to_vec();
              /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
              /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
              /// ```
              pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
              where
                  for<'a> &'a [T]: PartialEq,
              {
                  slice
                      .windows(needle.len())
                      .position(|window| window == needle)
              }
              /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::replace_slice;
              /// let mut line = b"I hate writing tests!".to_vec();
              /// replace_slice(&mut line, b"hate", b"love");
              /// assert_eq!(
              ///     line,
              ///     b"I love writing tests!".to_vec()
              /// );
              /// ```
              pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
              where
                  T: Clone + PartialEq,
              {
                  if buf.len() < from.len() || from.len() != to.len() {
                      return;
                  }
                  for i in 0..=buf.len() - from.len() {
                      if buf[i..].starts_with(from) {
                          buf[i..(i + from.len())].clone_from_slice(to);
                      }
                  }
              }
              pub trait SliceExt {
                  fn trim_end(&self) -> &Self;
                  fn trim_start(&self) -> &Self;
                  fn trim(&self) -> &Self;
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
              }
+             #[allow(clippy::trivially_copy_pass_by_ref)]
              fn is_not_whitespace(c: &u8) -> bool {
                  !(*c as char).is_whitespace()
              }
              impl SliceExt for [u8] {
                  fn trim_end(&self) -> &[u8] {
                      if let Some(last) = self.iter().rposition(is_not_whitespace) {
-                         &self[..last + 1]
+                         &self[..=last]
                      } else {
                          &[]
                      }
                  }
                  fn trim_start(&self) -> &[u8] {
                      if let Some(first) = self.iter().position(is_not_whitespace) {
                          &self[first..]
                      } else {
                          &[]
                      }
                  }
                  /// ```
                  /// use hg::utils::SliceExt;
                  /// assert_eq!(
                  ///     b"  to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"  to trim".trim(),
                  ///     b"to trim"
                  /// );
                  /// ```
                  fn trim(&self) -> &[u8] {
                      self.trim_start().trim_end()
                  }
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
                      if self.starts_with(needle) {
                          Some(&self[needle.len()..])
                      } else {
                          None
                      }
                  }
              }
              pub trait Escaped {
                  /// Return bytes escaped for display to the user
                  fn escaped_bytes(&self) -> Vec<u8>;
              }
              impl Escaped for u8 {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      let mut acc = vec![];
                      match self {
                          c @ b'\'' | c @ b'\\' => {
                              acc.push(b'\\');
                              acc.push(*c);
                          }
                          b'\t' => {
                              acc.extend(br"\\t");
                          }
                          b'\n' => {
                              acc.extend(br"\\n");
                          }
                          b'\r' => {
                              acc.extend(br"\\r");
                          }
                          c if (*c < b' ' || *c >= 127) => {
                              write!(acc, "\\x{:x}", self).unwrap();
                          }
                          c => {
                              acc.push(*c);
                          }
                      }
                      acc
                  }
              }
              impl<'a, T: Escaped> Escaped for &'a [T] {
                  fn escaped_bytes(&self) -> Vec<u8> {
-                     self.iter().flat_map(|item| item.escaped_bytes()).collect()
+                     self.iter().flat_map(Escaped::escaped_bytes).collect()
                  }
              }
              impl<T: Escaped> Escaped for Vec<T> {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.deref().escaped_bytes()
                  }
              }
              impl<'a> Escaped for &'a HgPath {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.as_bytes().escaped_bytes()
                  }
              }

rust/hg-core/src/utils/files.rs

0 +4 -6

              // files.rs
              //
              // Copyright 2019
              // Raphaël Gomès <rgomes@octobus.net>,
              // Yuya Nishihara <yuya@tcha.org>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Functions for fiddling with files.
              use crate::utils::{
                  hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
                  path_auditor::PathAuditor,
                  replace_slice,
              };
              use lazy_static::lazy_static;
              use same_file::is_same_file;
              use std::borrow::ToOwned;
              use std::fs::Metadata;
              use std::iter::FusedIterator;
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
              pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
                  let os_str;
                  #[cfg(unix)]
                  {
                      use std::os::unix::ffi::OsStrExt;
                      os_str = std::ffi::OsStr::from_bytes(bytes);
                  }
                  // TODO Handle other platforms
                  // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                  // Perhaps, the return type would have to be Result<PathBuf>.
                  Path::new(os_str)
              }
              // TODO: need to convert from WTF8 to MBCS bytes on Windows.
              // that's why Vec<u8> is returned.
              #[cfg(unix)]
              pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
                  use std::os::unix::ffi::OsStrExt;
                  path.as_ref().as_os_str().as_bytes().to_vec()
              }
              /// An iterator over repository path yielding itself and its ancestors.
              #[derive(Copy, Clone, Debug)]
              pub struct Ancestors<'a> {
                  next: Option<&'a HgPath>,
              }
              impl<'a> Iterator for Ancestors<'a> {
                  type Item = &'a HgPath;
                  fn next(&mut self) -> Option<Self::Item> {
                      let next = self.next;
                      self.next = match self.next {
                          Some(s) if s.is_empty() => None,
                          Some(s) => {
                              let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
                              Some(HgPath::new(&s.as_bytes()[..p]))
                          }
                          None => None,
                      };
                      next
                  }
              }
              impl<'a> FusedIterator for Ancestors<'a> {}
              /// An iterator over repository path yielding itself and its ancestors.
              #[derive(Copy, Clone, Debug)]
              pub(crate) struct AncestorsWithBase<'a> {
                  next: Option<(&'a HgPath, &'a HgPath)>,
              }
              impl<'a> Iterator for AncestorsWithBase<'a> {
                  type Item = (&'a HgPath, &'a HgPath);
                  fn next(&mut self) -> Option<Self::Item> {
                      let next = self.next;
                      self.next = match self.next {
                          Some((s, _)) if s.is_empty() => None,
                          Some((s, _)) => Some(s.split_filename()),
                          None => None,
                      };
                      next
                  }
              }
              impl<'a> FusedIterator for AncestorsWithBase<'a> {}
              /// Returns an iterator yielding ancestor directories of the given repository
              /// path.
              ///
              /// The path is separated by '/', and must not start with '/'.
              ///
              /// The path itself isn't included unless it is b"" (meaning the root
              /// directory.)
-             pub fn find_dirs<'a>(path: &'a HgPath) -> Ancestors<'a> {
+             pub fn find_dirs(path: &HgPath) -> Ancestors {
                  let mut dirs = Ancestors { next: Some(path) };
                  if !path.is_empty() {
                      dirs.next(); // skip itself
                  }
                  dirs
              }
              /// Returns an iterator yielding ancestor directories of the given repository
              /// path.
              ///
              /// The path is separated by '/', and must not start with '/'.
              ///
              /// The path itself isn't included unless it is b"" (meaning the root
              /// directory.)
-             pub(crate) fn find_dirs_with_base<'a>(
-                 path: &'a HgPath,
-             ) -> AncestorsWithBase<'a> {
+             pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
                  let mut dirs = AncestorsWithBase {
                      next: Some((path, HgPath::new(b""))),
                  };
                  if !path.is_empty() {
                      dirs.next(); // skip itself
                  }
                  dirs
              }
              /// TODO more than ASCII?
              pub fn normalize_case(path: &HgPath) -> HgPathBuf {
                  #[cfg(windows)] // NTFS compares via upper()
                  return path.to_ascii_uppercase();
                  #[cfg(unix)]
                  path.to_ascii_lowercase()
              }
              lazy_static! {
                  static ref IGNORED_CHARS: Vec<Vec<u8>> = {
                      [
 x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
 x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
                      ]
                      .iter()
                      .map(|code| {
                          std::char::from_u32(*code)
                              .unwrap()
                              .encode_utf8(&mut [0; 3])
                              .bytes()
                              .collect()
                      })
                      .collect()
                  };
              }
              fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
                  let mut buf = bytes.to_owned();
                  let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
                  if needs_escaping {
                      for forbidden in IGNORED_CHARS.iter() {
                          replace_slice(&mut buf, forbidden, &[])
                      }
                      buf
                  } else {
                      buf
                  }
              }
              pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
                  hfs_ignore_clean(&bytes.to_ascii_lowercase())
              }
              #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
              pub struct HgMetadata {
                  pub st_dev: u64,
                  pub st_mode: u32,
                  pub st_nlink: u64,
                  pub st_size: u64,
                  pub st_mtime: i64,
                  pub st_ctime: i64,
              }
              // TODO support other plaforms
              #[cfg(unix)]
              impl HgMetadata {
                  pub fn from_metadata(metadata: Metadata) -> Self {
                      use std::os::unix::fs::MetadataExt;
                      Self {
                          st_dev: metadata.dev(),
                          st_mode: metadata.mode(),
                          st_nlink: metadata.nlink(),
                          st_size: metadata.size(),
                          st_mtime: metadata.mtime(),
                          st_ctime: metadata.ctime(),
                      }
                  }
              }
              /// Returns the canonical path of `name`, given `cwd` and `root`
              pub fn canonical_path(
                  root: impl AsRef<Path>,
                  cwd: impl AsRef<Path>,
                  name: impl AsRef<Path>,
              ) -> Result<PathBuf, HgPathError> {
                  // TODO add missing normalization for other platforms
                  let root = root.as_ref();
                  let cwd = cwd.as_ref();
                  let name = name.as_ref();
                  let name = if !name.is_absolute() {
                      root.join(&cwd).join(&name)
                  } else {
                      name.to_owned()
                  };
                  let auditor = PathAuditor::new(&root);
                  if name != root && name.starts_with(&root) {
                      let name = name.strip_prefix(&root).unwrap();
                      auditor.audit_path(path_to_hg_path_buf(name)?)?;
-                     return Ok(name.to_owned());
+                     Ok(name.to_owned())
                  } else if name == root {
-                     return Ok("".into());
+                     Ok("".into())
                  } else {
                      // Determine whether `name' is in the hierarchy at or beneath `root',
                      // by iterating name=name.parent() until it returns `None` (can't
                      // check name == '/', because that doesn't work on windows).
                      let mut name = name.deref();
                      let original_name = name.to_owned();
                      loop {
                          let same = is_same_file(&name, &root).unwrap_or(false);
                          if same {
                              if name == original_name {
                                  // `name` was actually the same as root (maybe a symlink)
                                  return Ok("".into());
                              }
                              // `name` is a symlink to root, so `original_name` is under
                              // root
                              let rel_path = original_name.strip_prefix(&name).unwrap();
                              auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
                              return Ok(rel_path.to_owned());
                          }
                          name = match name.parent() {
                              None => break,
                              Some(p) => p,
                          };
                      }
                      // TODO hint to the user about using --cwd
                      // Bubble up the responsibility to Python for now
                      Err(HgPathError::NotUnderRoot {
                          path: original_name.to_owned(),
                          root: root.to_owned(),
                      })
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn find_dirs_some() {
                      let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn find_dirs_empty() {
                      // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
                      let mut dirs = super::find_dirs(HgPath::new(b""));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_find_dirs_with_base_some() {
                      let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
                      assert_eq!(
                          dirs.next(),
                          Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
                      );
                      assert_eq!(
                          dirs.next(),
                          Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
                      );
                      assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_find_dirs_with_base_empty() {
                      let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
                      assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_canonical_path() {
                      let root = Path::new("/repo");
                      let cwd = Path::new("/dir");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Err(HgPathError::NotUnderRoot {
                              path: PathBuf::from("/dir/filename"),
                              root: root.to_path_buf()
                          })
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Err(HgPathError::NotUnderRoot {
                              path: PathBuf::from("/filename"),
                              root: root.to_path_buf()
                          })
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/");
                      let name = Path::new("repo/filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("filename"))
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/repo");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("filename"))
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/repo/subdir");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("subdir/filename"))
                      );
                  }
                  #[test]
                  fn test_canonical_path_not_rooted() {
                      use std::fs::create_dir;
                      use tempfile::tempdir;
                      let base_dir = tempdir().unwrap();
                      let base_dir_path = base_dir.path();
                      let beneath_repo = base_dir_path.join("a");
                      let root = base_dir_path.join("a/b");
                      let out_of_repo = base_dir_path.join("c");
                      let under_repo_symlink = out_of_repo.join("d");
                      create_dir(&beneath_repo).unwrap();
                      create_dir(&root).unwrap();
                      // TODO make portable
                      std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
                      assert_eq!(
                          canonical_path(&root, Path::new(""), out_of_repo),
                          Ok(PathBuf::from(""))
                      );
                      assert_eq!(
                          canonical_path(&root, Path::new(""), &beneath_repo),
                          Err(HgPathError::NotUnderRoot {
                              path: beneath_repo.to_owned(),
                              root: root.to_owned()
                          })
                      );
                      assert_eq!(
                          canonical_path(&root, Path::new(""), &under_repo_symlink),
                          Ok(PathBuf::from("d"))
                      );
                  }
              }

rust/hg-core/src/utils/hg_path.rs

0 +4 -7

              // hg_path.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use std::borrow::Borrow;
              use std::ffi::{OsStr, OsString};
              use std::fmt;
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
              #[derive(Debug, Eq, PartialEq)]
              pub enum HgPathError {
                  /// Bytes from the invalid `HgPath`
                  LeadingSlash(Vec<u8>),
                  ConsecutiveSlashes {
                      bytes: Vec<u8>,
                      second_slash_index: usize,
                  },
                  ContainsNullByte {
                      bytes: Vec<u8>,
                      null_byte_index: usize,
                  },
                  /// Bytes
                  DecodeError(Vec<u8>),
                  /// The rest come from audit errors
                  EndsWithSlash(HgPathBuf),
                  ContainsIllegalComponent(HgPathBuf),
                  /// Path is inside the `.hg` folder
                  InsideDotHg(HgPathBuf),
                  IsInsideNestedRepo {
                      path: HgPathBuf,
                      nested_repo: HgPathBuf,
                  },
                  TraversesSymbolicLink {
                      path: HgPathBuf,
                      symlink: HgPathBuf,
                  },
                  NotFsCompliant(HgPathBuf),
                  /// `path` is the smallest invalid path
                  NotUnderRoot {
                      path: PathBuf,
                      root: PathBuf,
                  },
              }
              impl ToString for HgPathError {
                  fn to_string(&self) -> String {
                      match self {
                          HgPathError::LeadingSlash(bytes) => {
                              format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
                          }
                          HgPathError::ConsecutiveSlashes {
                              bytes,
                              second_slash_index: pos,
                          } => format!(
                              "Invalid HgPath '{:?}': consecutive slashes at pos {}.",
                              bytes, pos
                          ),
                          HgPathError::ContainsNullByte {
                              bytes,
                              null_byte_index: pos,
                          } => format!(
                              "Invalid HgPath '{:?}': contains null byte at pos {}.",
                              bytes, pos
                          ),
                          HgPathError::DecodeError(bytes) => {
                              format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
                          }
                          HgPathError::EndsWithSlash(path) => {
                              format!("Audit failed for '{}': ends with a slash.", path)
                          }
                          HgPathError::ContainsIllegalComponent(path) => format!(
                              "Audit failed for '{}': contains an illegal component.",
                              path
                          ),
                          HgPathError::InsideDotHg(path) => format!(
                              "Audit failed for '{}': is inside the '.hg' folder.",
                              path
                          ),
                          HgPathError::IsInsideNestedRepo {
                              path,
                              nested_repo: nested,
                          } => format!(
                              "Audit failed for '{}': is inside a nested repository '{}'.",
                              path, nested
                          ),
                          HgPathError::TraversesSymbolicLink { path, symlink } => format!(
                              "Audit failed for '{}': traverses symbolic link '{}'.",
                              path, symlink
                          ),
                          HgPathError::NotFsCompliant(path) => format!(
                              "Audit failed for '{}': cannot be turned into a \
                               filesystem path.",
                              path
                          ),
                          HgPathError::NotUnderRoot { path, root } => format!(
                              "Audit failed for '{}': not under root {}.",
                              path.display(),
                              root.display()
                          ),
                      }
                  }
              }
              impl From<HgPathError> for std::io::Error {
                  fn from(e: HgPathError) -> Self {
                      std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
                  }
              }
              /// This is a repository-relative path (or canonical path):
              ///     - no null characters
              ///     - `/` separates directories
              ///     - no consecutive slashes
              ///     - no leading slash,
              ///     - no `.` nor `..` of special meaning
              ///     - stored in repository and shared across platforms
              ///
              /// Note: there is no guarantee of any `HgPath` being well-formed at any point
              /// in its lifetime for performance reasons and to ease ergonomics. It is
              /// however checked using the `check_state` method before any file-system
              /// operation.
              ///
              /// This allows us to be encoding-transparent as much as possible, until really
              /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
              /// or `Path`) whenever more complex operations are needed:
              /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
              /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
              /// character encoding will be determined on a per-repository basis.
              //
              // FIXME: (adapted from a comment in the stdlib)
              // `HgPath::new()` current implementation relies on `Slice` being
              // layout-compatible with `[u8]`.
              // When attribute privacy is implemented, `Slice` should be annotated as
              // `#[repr(transparent)]`.
              // Anyway, `Slice` representation and layout are considered implementation
              // detail, are not documented and must not be relied upon.
              #[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
              pub struct HgPath {
                  inner: [u8],
              }
              impl HgPath {
                  pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
                      unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
                  }
                  pub fn is_empty(&self) -> bool {
                      self.inner.is_empty()
                  }
                  pub fn len(&self) -> usize {
                      self.inner.len()
                  }
                  fn to_hg_path_buf(&self) -> HgPathBuf {
                      HgPathBuf {
                          inner: self.inner.to_owned(),
                      }
                  }
                  pub fn bytes(&self) -> std::slice::Iter<u8> {
                      self.inner.iter()
                  }
                  pub fn to_ascii_uppercase(&self) -> HgPathBuf {
                      HgPathBuf::from(self.inner.to_ascii_uppercase())
                  }
                  pub fn to_ascii_lowercase(&self) -> HgPathBuf {
                      HgPathBuf::from(self.inner.to_ascii_lowercase())
                  }
                  pub fn as_bytes(&self) -> &[u8] {
                      &self.inner
                  }
                  pub fn contains(&self, other: u8) -> bool {
                      self.inner.contains(&other)
                  }
                  pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
                      self.inner.starts_with(needle.as_ref().as_bytes())
                  }
                  pub fn trim_trailing_slash(&self) -> &Self {
                      Self::new(if self.inner.last() == Some(&b'/') {
                          &self.inner[..self.inner.len() - 1]
                      } else {
                          &self.inner[..]
                      })
                  }
                  /// Returns a tuple of slices `(base, filename)` resulting from the split
                  /// at the rightmost `/`, if any.
                  ///
                  /// # Examples:
                  ///
                  /// ```
                  /// use hg::utils::hg_path::HgPath;
                  ///
                  /// let path = HgPath::new(b"cool/hg/path").split_filename();
                  /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
                  ///
                  /// let path = HgPath::new(b"pathwithoutsep").split_filename();
                  /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
                  /// ```
                  pub fn split_filename(&self) -> (&Self, &Self) {
                      match &self.inner.iter().rposition(|c| *c == b'/') {
                          None => (HgPath::new(""), &self),
                          Some(size) => (
                              HgPath::new(&self.inner[..*size]),
                              HgPath::new(&self.inner[*size + 1..]),
                          ),
                      }
                  }
                  pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
                      let mut inner = self.inner.to_owned();
-                     if inner.len() != 0 && inner.last() != Some(&b'/') {
+                     if !inner.is_empty() && inner.last() != Some(&b'/') {
                          inner.push(b'/');
                      }
                      inner.extend(other.as_ref().bytes());
                      HgPathBuf::from_bytes(&inner)
                  }
                  pub fn parent(&self) -> &Self {
                      let inner = self.as_bytes();
                      HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
                          Some(pos) => &inner[..pos],
                          None => &[],
                      })
                  }
                  /// Given a base directory, returns the slice of `self` relative to the
                  /// base directory. If `base` is not a directory (does not end with a
                  /// `b'/'`), returns `None`.
                  pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
                      let base = base.as_ref();
                      if base.is_empty() {
                          return Some(self);
                      }
                      let is_dir = base.as_bytes().ends_with(b"/");
                      if is_dir && self.starts_with(base) {
                          Some(Self::new(&self.inner[base.len()..]))
                      } else {
                          None
                      }
                  }
                  #[cfg(windows)]
                  /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
                  ///
                  /// Split a pathname into drive/UNC sharepoint and relative path
                  /// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
                  /// be empty.
                  ///
                  /// If you assign
                  ///  result = split_drive(p)
                  /// It is always true that:
                  ///  result[0] + result[1] == p
                  ///
                  /// If the path contained a drive letter, drive_or_unc will contain
                  /// everything up to and including the colon.
                  /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
                  ///
                  /// If the path contained a UNC path, the drive_or_unc will contain the
                  /// host name and share up to but not including the fourth directory
                  /// separator character.
                  /// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
                  /// "/dir")
                  ///
                  /// Paths cannot contain both a drive letter and a UNC path.
                  pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
                      let bytes = self.as_bytes();
                      let is_sep = |b| std::path::is_separator(b as char);
                      if self.len() < 2 {
                          (HgPath::new(b""), &self)
                      } else if is_sep(bytes[0])
                          && is_sep(bytes[1])
                          && (self.len() == 2 || !is_sep(bytes[2]))
                      {
                          // Is a UNC path:
                          // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
                          // \\machine\mountpoint\directory\etc\...
                          //           directory ^^^^^^^^^^^^^^^
                          let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
                          let mountpoint_start_index = if let Some(i) = machine_end_index {
                              i + 2
                          } else {
                              return (HgPath::new(b""), &self);
                          };
                          match bytes[mountpoint_start_index + 1..]
                              .iter()
                              .position(|b| is_sep(*b))
                          {
                              // A UNC path can't have two slashes in a row
                              // (after the initial two)
                              Some(0) => (HgPath::new(b""), &self),
                              Some(i) => {
                                  let (a, b) =
                                      bytes.split_at(mountpoint_start_index + 1 + i);
                                  (HgPath::new(a), HgPath::new(b))
                              }
                              None => (&self, HgPath::new(b"")),
                          }
                      } else if bytes[1] == b':' {
                          // Drive path c:\directory
                          let (a, b) = bytes.split_at(2);
                          (HgPath::new(a), HgPath::new(b))
                      } else {
                          (HgPath::new(b""), &self)
                      }
                  }
                  #[cfg(unix)]
                  /// Split a pathname into drive and path. On Posix, drive is always empty.
                  pub fn split_drive(&self) -> (&HgPath, &HgPath) {
                      (HgPath::new(b""), &self)
                  }
                  /// Checks for errors in the path, short-circuiting at the first one.
                  /// This generates fine-grained errors useful for debugging.
                  /// To simply check if the path is valid during tests, use `is_valid`.
                  pub fn check_state(&self) -> Result<(), HgPathError> {
-                     if self.len() == 0 {
+                     if self.is_empty() {
                          return Ok(());
                      }
                      let bytes = self.as_bytes();
                      let mut previous_byte = None;
                      if bytes[0] == b'/' {
                          return Err(HgPathError::LeadingSlash(bytes.to_vec()));
                      }
                      for (index, byte) in bytes.iter().enumerate() {
                          match byte {
 => {
                                  return Err(HgPathError::ContainsNullByte {
                                      bytes: bytes.to_vec(),
                                      null_byte_index: index,
                                  })
                              }
                              b'/' => {
                                  if previous_byte.is_some() && previous_byte == Some(b'/') {
                                      return Err(HgPathError::ConsecutiveSlashes {
                                          bytes: bytes.to_vec(),
                                          second_slash_index: index,
                                      });
                                  }
                              }
                              _ => (),
                          };
                          previous_byte = Some(*byte);
                      }
                      Ok(())
                  }
                  #[cfg(test)]
                  /// Only usable during tests to force developers to handle invalid states
                  fn is_valid(&self) -> bool {
                      self.check_state().is_ok()
                  }
              }
              impl fmt::Debug for HgPath {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
                  }
              }
              impl fmt::Display for HgPath {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      write!(f, "{}", String::from_utf8_lossy(&self.inner))
                  }
              }
-             #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
+             #[derive(Default, Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
              pub struct HgPathBuf {
                  inner: Vec<u8>,
              }
              impl HgPathBuf {
                  pub fn new() -> Self {
-                     Self { inner: Vec::new() }
+                     Default::default()
                  }
                  pub fn push(&mut self, byte: u8) {
                      self.inner.push(byte);
                  }
                  pub fn from_bytes(s: &[u8]) -> HgPathBuf {
                      HgPath::new(s).to_owned()
                  }
                  pub fn into_vec(self) -> Vec<u8> {
                      self.inner
                  }
-                 pub fn as_ref(&self) -> &[u8] {
-                     self.inner.as_ref()
+                 }
              }
              impl fmt::Debug for HgPathBuf {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
                  }
              }
              impl fmt::Display for HgPathBuf {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      write!(f, "{}", String::from_utf8_lossy(&self.inner))
                  }
              }
              impl Deref for HgPathBuf {
                  type Target = HgPath;
                  #[inline]
                  fn deref(&self) -> &HgPath {
                      &HgPath::new(&self.inner)
                  }
              }
              impl From<Vec<u8>> for HgPathBuf {
                  fn from(vec: Vec<u8>) -> Self {
                      Self { inner: vec }
                  }
              }
              impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
                  fn from(s: &T) -> HgPathBuf {
                      s.as_ref().to_owned()
                  }
              }
              impl Into<Vec<u8>> for HgPathBuf {
                  fn into(self) -> Vec<u8> {
                      self.inner
                  }
              }
              impl Borrow<HgPath> for HgPathBuf {
                  fn borrow(&self) -> &HgPath {
                      &HgPath::new(self.as_bytes())
                  }
              }
              impl ToOwned for HgPath {
                  type Owned = HgPathBuf;
                  fn to_owned(&self) -> HgPathBuf {
                      self.to_hg_path_buf()
                  }
              }
              impl AsRef<HgPath> for HgPath {
                  fn as_ref(&self) -> &HgPath {
                      self
                  }
              }
              impl AsRef<HgPath> for HgPathBuf {
                  fn as_ref(&self) -> &HgPath {
                      self
                  }
              }
              impl Extend<u8> for HgPathBuf {
                  fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
                      self.inner.extend(iter);
                  }
              }
              /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
              /// implemented, these conversion utils will have to work differently depending
              /// on the repository encoding: either `UTF-8` or `MBCS`.
              pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
                  hg_path: P,
              ) -> Result<OsString, HgPathError> {
                  hg_path.as_ref().check_state()?;
                  let os_str;
                  #[cfg(unix)]
                  {
                      use std::os::unix::ffi::OsStrExt;
                      os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
                  }
                  // TODO Handle other platforms
                  // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                  Ok(os_str.to_os_string())
              }
              pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
                  hg_path: P,
              ) -> Result<PathBuf, HgPathError> {
                  Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
              }
              pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
                  os_string: S,
              ) -> Result<HgPathBuf, HgPathError> {
                  let buf;
                  #[cfg(unix)]
                  {
                      use std::os::unix::ffi::OsStrExt;
                      buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
                  }
                  // TODO Handle other platforms
                  // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                  buf.check_state()?;
                  Ok(buf)
              }
              pub fn path_to_hg_path_buf<P: AsRef<Path>>(
                  path: P,
              ) -> Result<HgPathBuf, HgPathError> {
                  let buf;
                  let os_str = path.as_ref().as_os_str();
                  #[cfg(unix)]
                  {
                      use std::os::unix::ffi::OsStrExt;
                      buf = HgPathBuf::from_bytes(&os_str.as_bytes());
                  }
                  // TODO Handle other platforms
                  // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                  buf.check_state()?;
                  Ok(buf)
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn test_path_states() {
                      assert_eq!(
                          Err(HgPathError::LeadingSlash(b"/".to_vec())),
                          HgPath::new(b"/").check_state()
                      );
                      assert_eq!(
                          Err(HgPathError::ConsecutiveSlashes {
                              bytes: b"a/b//c".to_vec(),
                              second_slash_index: 4
                          }),
                          HgPath::new(b"a/b//c").check_state()
                      );
                      assert_eq!(
                          Err(HgPathError::ContainsNullByte {
                              bytes: b"a/b/\0c".to_vec(),
                              null_byte_index: 4
                          }),
                          HgPath::new(b"a/b/\0c").check_state()
                      );
                      // TODO test HgPathError::DecodeError for the Windows implementation.
                      assert_eq!(true, HgPath::new(b"").is_valid());
                      assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
                      // Backslashes in paths are not significant, but allowed
                      assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
                      // Dots in paths are not significant, but allowed
                      assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
                      assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
                  }
                  #[test]
                  fn test_iter() {
                      let path = HgPath::new(b"a");
                      let mut iter = path.bytes();
                      assert_eq!(Some(&b'a'), iter.next());
                      assert_eq!(None, iter.next_back());
                      assert_eq!(None, iter.next());
                      let path = HgPath::new(b"a");
                      let mut iter = path.bytes();
                      assert_eq!(Some(&b'a'), iter.next_back());
                      assert_eq!(None, iter.next_back());
                      assert_eq!(None, iter.next());
                      let path = HgPath::new(b"abc");
                      let mut iter = path.bytes();
                      assert_eq!(Some(&b'a'), iter.next());
                      assert_eq!(Some(&b'c'), iter.next_back());
                      assert_eq!(Some(&b'b'), iter.next_back());
                      assert_eq!(None, iter.next_back());
                      assert_eq!(None, iter.next());
                      let path = HgPath::new(b"abc");
                      let mut iter = path.bytes();
                      assert_eq!(Some(&b'a'), iter.next());
                      assert_eq!(Some(&b'b'), iter.next());
                      assert_eq!(Some(&b'c'), iter.next());
                      assert_eq!(None, iter.next_back());
                      assert_eq!(None, iter.next());
                      let path = HgPath::new(b"abc");
                      let iter = path.bytes();
                      let mut vec = Vec::new();
                      vec.extend(iter);
                      assert_eq!(vec![b'a', b'b', b'c'], vec);
                      let path = HgPath::new(b"abc");
                      let mut iter = path.bytes();
                      assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
                      let path = HgPath::new(b"abc");
                      let mut iter = path.bytes();
                      assert_eq!(None, iter.rposition(|c| *c == b'd'));
                  }
                  #[test]
                  fn test_join() {
                      let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
                      assert_eq!(b"a/b", path.as_bytes());
                      let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
                      assert_eq!(b"a/b/c", path.as_bytes());
                      // No leading slash if empty before join
                      let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
                      assert_eq!(b"b/c", path.as_bytes());
                      // The leading slash is an invalid representation of an `HgPath`, but
                      // it can happen. This creates another invalid representation of
                      // consecutive bytes.
                      // TODO What should be done in this case? Should we silently remove
                      // the extra slash? Should we change the signature to a problematic
                      // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
                      // let the error happen upon filesystem interaction?
                      let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
                      assert_eq!(b"a//b", path.as_bytes());
                      let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
                      assert_eq!(b"a//b", path.as_bytes());
                  }
                  #[test]
                  fn test_relative_to() {
                      let path = HgPath::new(b"");
                      let base = HgPath::new(b"");
                      assert_eq!(Some(path), path.relative_to(base));
                      let path = HgPath::new(b"path");
                      let base = HgPath::new(b"");
                      assert_eq!(Some(path), path.relative_to(base));
                      let path = HgPath::new(b"a");
                      let base = HgPath::new(b"b");
                      assert_eq!(None, path.relative_to(base));
                      let path = HgPath::new(b"a/b");
                      let base = HgPath::new(b"a");
                      assert_eq!(None, path.relative_to(base));
                      let path = HgPath::new(b"a/b");
                      let base = HgPath::new(b"a/");
                      assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
                      let path = HgPath::new(b"nested/path/to/b");
                      let base = HgPath::new(b"nested/path/");
                      assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
                      let path = HgPath::new(b"ends/with/dir/");
                      let base = HgPath::new(b"ends/");
                      assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
                  }
                  #[test]
                  #[cfg(unix)]
                  fn test_split_drive() {
                      // Taken from the Python stdlib's tests
                      assert_eq!(
                          HgPath::new(br"/foo/bar").split_drive(),
                          (HgPath::new(b""), HgPath::new(br"/foo/bar"))
                      );
                      assert_eq!(
                          HgPath::new(br"foo:bar").split_drive(),
                          (HgPath::new(b""), HgPath::new(br"foo:bar"))
                      );
                      assert_eq!(
                          HgPath::new(br":foo:bar").split_drive(),
                          (HgPath::new(b""), HgPath::new(br":foo:bar"))
                      );
                      // Also try NT paths; should not split them
                      assert_eq!(
                          HgPath::new(br"c:\foo\bar").split_drive(),
                          (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
                      );
                      assert_eq!(
                          HgPath::new(b"c:/foo/bar").split_drive(),
                          (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
                      );
                      assert_eq!(
                          HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
                          (
                              HgPath::new(b""),
                              HgPath::new(br"\\conky\mountpoint\foo\bar")
                          )
                      );
                  }
                  #[test]
                  #[cfg(windows)]
                  fn test_split_drive() {
                      assert_eq!(
                          HgPath::new(br"c:\foo\bar").split_drive(),
                          (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
                      );
                      assert_eq!(
                          HgPath::new(b"c:/foo/bar").split_drive(),
                          (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
                      );
                      assert_eq!(
                          HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
                          (
                              HgPath::new(br"\\conky\mountpoint"),
                              HgPath::new(br"\foo\bar")
                          )
                      );
                      assert_eq!(
                          HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
                          (
                              HgPath::new(br"//conky/mountpoint"),
                              HgPath::new(br"/foo/bar")
                          )
                      );
                      assert_eq!(
                          HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
                          (
                              HgPath::new(br""),
                              HgPath::new(br"\\\conky\mountpoint\foo\bar")
                          )
                      );
                      assert_eq!(
                          HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
                          (
                              HgPath::new(br""),
                              HgPath::new(br"///conky/mountpoint/foo/bar")
                          )
                      );
                      assert_eq!(
                          HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
                          (
                              HgPath::new(br""),
                              HgPath::new(br"\\conky\\mountpoint\foo\bar")
                          )
                      );
                      assert_eq!(
                          HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
                          (
                              HgPath::new(br""),
                              HgPath::new(br"//conky//mountpoint/foo/bar")
                          )
                      );
                      // UNC part containing U+0130
                      assert_eq!(
                          HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
                          (
                              HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
                              HgPath::new(br"/foo/bar")
                          )
                      );
                  }
                  #[test]
                  fn test_parent() {
                      let path = HgPath::new(b"");
                      assert_eq!(path.parent(), path);
                      let path = HgPath::new(b"a");
                      assert_eq!(path.parent(), HgPath::new(b""));
                      let path = HgPath::new(b"a/b");
                      assert_eq!(path.parent(), HgPath::new(b"a"));
                      let path = HgPath::new(b"a/other/b");
                      assert_eq!(path.parent(), HgPath::new(b"a/other"));
                  }
              }

rust/hg-core/src/utils/path_auditor.rs

0 +1 -1

              // path_auditor.rs
              //
              // Copyright 2020
              // Raphaël Gomès <rgomes@octobus.net>,
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::utils::{
                  files::lower_clean,
                  find_slice_in_slice,
                  hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
              };
              use std::collections::HashSet;
              use std::path::{Path, PathBuf};
              use std::sync::{Mutex, RwLock};
              /// Ensures that a path is valid for use in the repository i.e. does not use
              /// any banned components, does not traverse a symlink, etc.
              #[derive(Debug, Default)]
              pub struct PathAuditor {
                  audited: Mutex<HashSet<HgPathBuf>>,
                  audited_dirs: RwLock<HashSet<HgPathBuf>>,
                  root: PathBuf,
              }
              impl PathAuditor {
                  pub fn new(root: impl AsRef<Path>) -> Self {
                      Self {
                          root: root.as_ref().to_owned(),
                          ..Default::default()
                      }
                  }
                  pub fn audit_path(
                      &self,
                      path: impl AsRef<HgPath>,
                  ) -> Result<(), HgPathError> {
                      // TODO windows "localpath" normalization
                      let path = path.as_ref();
                      if path.is_empty() {
                          return Ok(());
                      }
                      // TODO case normalization
                      if self.audited.lock().unwrap().contains(path) {
                          return Ok(());
                      }
                      // AIX ignores "/" at end of path, others raise EISDIR.
                      let last_byte = path.as_bytes()[path.len() - 1];
                      if last_byte == b'/' || last_byte == b'\\' {
                          return Err(HgPathError::EndsWithSlash(path.to_owned()));
                      }
                      let parts: Vec<_> = path
                          .as_bytes()
                          .split(|b| std::path::is_separator(*b as char))
                          .collect();
                      let first_component = lower_clean(parts[0]);
                      let first_component = first_component.as_slice();
                      if !path.split_drive().0.is_empty()
                          || (first_component == b".hg"
                              || first_component == b".hg."
                              || first_component == b"")
                          || parts.iter().any(|c| c == b"..")
                      {
                          return Err(HgPathError::InsideDotHg(path.to_owned()));
                      }
                      // Windows shortname aliases
                      for part in parts.iter() {
                          if part.contains(&b'~') {
                              let mut split = part.splitn(2, |b| *b == b'~');
                              let first =
                                  split.next().unwrap().to_owned().to_ascii_uppercase();
                              let last = split.next().unwrap();
                              if last.iter().all(u8::is_ascii_digit)
                                  && (first == b"HG" || first == b"HG8B6C")
                              {
                                  return Err(HgPathError::ContainsIllegalComponent(
                                      path.to_owned(),
                                  ));
                              }
                          }
                      }
                      let lower_path = lower_clean(path.as_bytes());
                      if find_slice_in_slice(&lower_path, b".hg").is_some() {
                          let lower_parts: Vec<_> = path
                              .as_bytes()
                              .split(|b| std::path::is_separator(*b as char))
                              .collect();
                          for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
                              if let Some(pos) = lower_parts[1..]
                                  .iter()
                                  .position(|part| part == &pattern.as_slice())
                              {
                                  let base = lower_parts[..=pos]
                                      .iter()
                                      .fold(HgPathBuf::new(), |acc, p| {
                                          acc.join(HgPath::new(p))
                                      });
                                  return Err(HgPathError::IsInsideNestedRepo {
                                      path: path.to_owned(),
                                      nested_repo: base,
                                  });
                              }
                          }
                      }
                      let parts = &parts[..parts.len().saturating_sub(1)];
                      // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
                      // if there's a "foo/.hg" directory. This also means we won't
                      // accidentally traverse a symlink into some other filesystem (which
                      // is potentially expensive to access).
                      for index in 0..parts.len() {
-                         let prefix = &parts[..index + 1].join(&b'/');
+                         let prefix = &parts[..=index].join(&b'/');
                          let prefix = HgPath::new(prefix);
                          if self.audited_dirs.read().unwrap().contains(prefix) {
                              continue;
                          }
                          self.check_filesystem(&prefix, &path)?;
                          self.audited_dirs.write().unwrap().insert(prefix.to_owned());
                      }
                      self.audited.lock().unwrap().insert(path.to_owned());
                      Ok(())
                  }
                  pub fn check_filesystem(
                      &self,
                      prefix: impl AsRef<HgPath>,
                      path: impl AsRef<HgPath>,
                  ) -> Result<(), HgPathError> {
                      let prefix = prefix.as_ref();
                      let path = path.as_ref();
                      let current_path = self.root.join(
                          hg_path_to_path_buf(prefix)
                              .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
                      );
                      match std::fs::symlink_metadata(&current_path) {
                          Err(e) => {
                              // EINVAL can be raised as invalid path syntax under win32.
                              if e.kind() != std::io::ErrorKind::NotFound
                                  && e.kind() != std::io::ErrorKind::InvalidInput
                                  && e.raw_os_error() != Some(20)
                              {
                                  // Rust does not yet have an `ErrorKind` for
                                  // `NotADirectory` (errno 20)
                                  // It happens if the dirstate contains `foo/bar` and
                                  // foo is not a directory
                                  return Err(HgPathError::NotFsCompliant(path.to_owned()));
                              }
                          }
                          Ok(meta) => {
                              if meta.file_type().is_symlink() {
                                  return Err(HgPathError::TraversesSymbolicLink {
                                      path: path.to_owned(),
                                      symlink: prefix.to_owned(),
                                  });
                              }
                              if meta.file_type().is_dir()
                                  && current_path.join(".hg").is_dir()
                              {
                                  return Err(HgPathError::IsInsideNestedRepo {
                                      path: path.to_owned(),
                                      nested_repo: prefix.to_owned(),
                                  });
                              }
                          }
                      };
                      Ok(())
                  }
                  pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
                      self.audit_path(path).is_ok()
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::utils::files::get_path_from_bytes;
                  use crate::utils::hg_path::path_to_hg_path_buf;
                  #[test]
                  fn test_path_auditor() {
                      let auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
                      let path = HgPath::new(b".hg/00changelog.i");
                      assert_eq!(
                          auditor.audit_path(path),
                          Err(HgPathError::InsideDotHg(path.to_owned()))
                      );
                      let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
                      assert_eq!(
                          auditor.audit_path(path),
                          Err(HgPathError::IsInsideNestedRepo {
                              path: path.to_owned(),
                              nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
                          })
                      );
                      use std::fs::{create_dir, File};
                      use tempfile::tempdir;
                      let base_dir = tempdir().unwrap();
                      let base_dir_path = base_dir.path();
                      let a = base_dir_path.join("a");
                      let b = base_dir_path.join("b");
                      create_dir(&a).unwrap();
                      let in_a_path = a.join("in_a");
                      File::create(in_a_path).unwrap();
                      // TODO make portable
                      std::os::unix::fs::symlink(&a, &b).unwrap();
                      let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
                      eprintln!("buf: {}", buf.display());
                      let path = path_to_hg_path_buf(buf).unwrap();
                      assert_eq!(
                          auditor.audit_path(&path),
                          Err(HgPathError::TraversesSymbolicLink {
                              path: path,
                              symlink: path_to_hg_path_buf(
                                  b.components().skip(2).collect::<PathBuf>()
                              )
                              .unwrap()
                          })
                      );
                  }
              }

rust/hg-cpython/src/cindex.rs

0 +2 -5

              // cindex.rs
              //
              // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings to use the Index defined by the parsers C extension
              //!
              //! Ideally, we should use an Index entirely implemented in Rust,
              //! but this will take some time to get there.
              use cpython::{
                  exc::ImportError, ObjectProtocol, PyClone, PyErr, PyObject, PyResult,
                  PyTuple, Python, PythonObject,
              };
              use hg::revlog::{Node, RevlogIndex};
              use hg::{Graph, GraphError, Revision, WORKING_DIRECTORY_REVISION};
              use libc::c_int;
              const REVLOG_CABI_VERSION: c_int = 2;
              #[repr(C)]
              pub struct Revlog_CAPI {
                  abi_version: c_int,
                  index_length:
                      unsafe extern "C" fn(index: *mut revlog_capi::RawPyObject) -> c_int,
                  index_node: unsafe extern "C" fn(
                      index: *mut revlog_capi::RawPyObject,
                      rev: c_int,
                  ) -> *const Node,
                  index_parents: unsafe extern "C" fn(
                      index: *mut revlog_capi::RawPyObject,
                      rev: c_int,
                      ps: *mut [c_int; 2],
                  ) -> c_int,
              }
              py_capsule!(
                  from mercurial.cext.parsers import revlog_CAPI
                      as revlog_capi for Revlog_CAPI);
              /// A `Graph` backed up by objects and functions from revlog.c
              ///
              /// This implementation of the `Graph` trait, relies on (pointers to)
              /// - the C index object (`index` member)
              /// - the `index_get_parents()` function (`parents` member)
              ///
              /// # Safety
              ///
              /// The C index itself is mutable, and this Rust exposition is **not
              /// protected by the GIL**, meaning that this construct isn't safe with respect
              /// to Python threads.
              ///
              /// All callers of this `Index` must acquire the GIL and must not release it
              /// while working.
              ///
              /// # TODO find a solution to make it GIL safe again.
              ///
              /// This is non trivial, and can wait until we have a clearer picture with
              /// more Rust Mercurial constructs.
              ///
              /// One possibility would be to a `GILProtectedIndex` wrapper enclosing
              /// a `Python<'p>` marker and have it be the one implementing the
              /// `Graph` trait, but this would mean the `Graph` implementor would become
              /// likely to change between subsequent method invocations of the `hg-core`
              /// objects (a serious change of the `hg-core` API):
              /// either exposing ways to mutate the `Graph`, or making it a non persistent
              /// parameter in the relevant methods that need one.
              ///
              /// Another possibility would be to introduce an abstract lock handle into
              /// the core API, that would be tied to `GILGuard` / `Python<'p>`
              /// in the case of the `cpython` crate bindings yet could leave room for other
              /// mechanisms in other contexts.
              pub struct Index {
                  index: PyObject,
                  capi: &'static Revlog_CAPI,
              }
              impl Index {
                  pub fn new(py: Python, index: PyObject) -> PyResult<Self> {
                      let capi = unsafe { revlog_capi::retrieve(py)? };
                      if capi.abi_version != REVLOG_CABI_VERSION {
                          return Err(PyErr::new::<ImportError, _>(
                              py,
                              format!(
                                  "ABI version mismatch: the C ABI revlog version {} \
                                   does not match the {} expected by Rust hg-cpython",
                                  capi.abi_version, REVLOG_CABI_VERSION
                              ),
                          ));
                      }
-                     Ok(Index {
-                         index: index,
-                         capi: capi,
-                     })
+                     Ok(Index { index, capi })
                  }
                  /// return a reference to the CPython Index object in this Struct
                  pub fn inner(&self) -> &PyObject {
                      &self.index
                  }
                  pub fn append(&mut self, py: Python, tup: PyTuple) -> PyResult<PyObject> {
                      self.index.call_method(
                          py,
                          "append",
                          PyTuple::new(py, &[tup.into_object()]),
                          None,
                      )
                  }
              }
              impl Clone for Index {
                  fn clone(&self) -> Self {
                      let guard = Python::acquire_gil();
                      Index {
                          index: self.index.clone_ref(guard.python()),
                          capi: self.capi,
                      }
                  }
              }
              impl PyClone for Index {
                  fn clone_ref(&self, py: Python) -> Self {
                      Index {
                          index: self.index.clone_ref(py),
                          capi: self.capi,
                      }
                  }
              }
              impl Graph for Index {
                  /// wrap a call to the C extern parents function
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                      if rev == WORKING_DIRECTORY_REVISION {
                          return Err(GraphError::WorkingDirectoryUnsupported);
                      }
                      let mut res: [c_int; 2] = [0; 2];
                      let code = unsafe {
                          (self.capi.index_parents)(
                              self.index.as_ptr(),
                              rev as c_int,
                              &mut res as *mut [c_int; 2],
                          )
                      };
                      match code {
 => Ok(res),
                          _ => Err(GraphError::ParentOutOfRange(rev)),
                      }
                  }
              }
              impl RevlogIndex for Index {
                  /// Note C return type is Py_ssize_t (hence signed), but we shall
                  /// force it to unsigned, because it's a length
                  fn len(&self) -> usize {
                      unsafe { (self.capi.index_length)(self.index.as_ptr()) as usize }
                  }
-                 fn node<'a>(&'a self, rev: Revision) -> Option<&'a Node> {
+                 fn node(&self, rev: Revision) -> Option<&Node> {
                      let raw = unsafe {
                          (self.capi.index_node)(self.index.as_ptr(), rev as c_int)
                      };
                      if raw.is_null() {
                          None
                      } else {
                          // TODO it would be much better for the C layer to give us
                          // a length, since the hash length will change in the near
                          // future, but that's probably out of scope for the nodemap
                          // patch series.
                          //
                          // The root of that unsafety relies in the signature of
                          // `capi.index_node()` itself: returning a `Node` pointer
                          // whereas it's a `char *` in the C counterpart.
                          Some(unsafe { &*raw })
                      }
                  }
              }

rust/hg-cpython/src/dirstate/copymap.rs

0 +3 -3

              // copymap.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings for `hg::dirstate::dirstate_map::CopyMap` provided by the
              //! `hg-core` package.
              use cpython::{
                  PyBytes, PyClone, PyDict, PyObject, PyResult, Python, UnsafePyLeaked,
              };
              use std::cell::RefCell;
              use crate::dirstate::dirstate_map::DirstateMap;
              use hg::{utils::hg_path::HgPathBuf, CopyMapIter};
              py_class!(pub class CopyMap |py| {
                  data dirstate_map: DirstateMap;
                  def __getitem__(&self, key: PyObject) -> PyResult<PyBytes> {
                      (*self.dirstate_map(py)).copymapgetitem(py, key)
                  }
                  def __len__(&self) -> PyResult<usize> {
                      self.dirstate_map(py).copymaplen(py)
                  }
                  def __contains__(&self, key: PyObject) -> PyResult<bool> {
                      self.dirstate_map(py).copymapcontains(py, key)
                  }
                  def get(
                      &self,
                      key: PyObject,
                      default: Option<PyObject> = None
                  ) -> PyResult<Option<PyObject>> {
                      self.dirstate_map(py).copymapget(py, key, default)
                  }
                  def pop(
                      &self,
                      key: PyObject,
                      default: Option<PyObject> = None
                  ) -> PyResult<Option<PyObject>> {
                      self.dirstate_map(py).copymappop(py, key, default)
                  }
                  def __iter__(&self) -> PyResult<CopyMapKeysIterator> {
                      self.dirstate_map(py).copymapiter(py)
                  }
                  // Python's `dict()` builtin works with either a subclass of dict
                  // or an abstract mapping. Said mapping needs to implement `__getitem__`
                  // and `keys`.
                  def keys(&self) -> PyResult<CopyMapKeysIterator> {
                      self.dirstate_map(py).copymapiter(py)
                  }
                  def items(&self) -> PyResult<CopyMapItemsIterator> {
                      self.dirstate_map(py).copymapitemsiter(py)
                  }
                  def iteritems(&self) -> PyResult<CopyMapItemsIterator> {
                      self.dirstate_map(py).copymapitemsiter(py)
                  }
                  def __setitem__(
                      &self,
                      key: PyObject,
                      item: PyObject
                  ) -> PyResult<()> {
                      self.dirstate_map(py).copymapsetitem(py, key, item)?;
                      Ok(())
                  }
                  def copy(&self) -> PyResult<PyDict> {
                      self.dirstate_map(py).copymapcopy(py)
                  }
              });
              impl CopyMap {
                  pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
                      Self::create_instance(py, dm)
                  }
                  fn translate_key(
                      py: Python,
                      res: (&HgPathBuf, &HgPathBuf),
                  ) -> PyResult<Option<PyBytes>> {
-                     Ok(Some(PyBytes::new(py, res.0.as_ref())))
+                     Ok(Some(PyBytes::new(py, res.0.as_bytes())))
                  }
                  fn translate_key_value(
                      py: Python,
                      res: (&HgPathBuf, &HgPathBuf),
                  ) -> PyResult<Option<(PyBytes, PyBytes)>> {
                      let (k, v) = res;
                      Ok(Some((
-                         PyBytes::new(py, k.as_ref()),
-                         PyBytes::new(py, v.as_ref()),
+                         PyBytes::new(py, k.as_bytes()),
+                         PyBytes::new(py, v.as_bytes()),
                      )))
                  }
              }
              py_shared_iterator!(
                  CopyMapKeysIterator,
                  UnsafePyLeaked<CopyMapIter<'static>>,
                  CopyMap::translate_key,
                  Option<PyBytes>
              );
              py_shared_iterator!(
                  CopyMapItemsIterator,
                  UnsafePyLeaked<CopyMapIter<'static>>,
                  CopyMap::translate_key_value,
                  Option<(PyBytes, PyBytes)>
              );

rust/hg-cpython/src/dirstate/dirs_multiset.rs

0 +1 -1

              // dirs_multiset.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings for the `hg::dirstate::dirs_multiset` file provided by the
              //! `hg-core` package.
              use std::cell::RefCell;
              use std::convert::TryInto;
              use cpython::{
                  exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
                  Python, UnsafePyLeaked,
              };
              use crate::dirstate::extract_dirstate;
              use hg::{
                  utils::hg_path::{HgPath, HgPathBuf},
                  DirsMultiset, DirsMultisetIter, DirstateMapError, DirstateParseError,
                  EntryState,
              };
              py_class!(pub class Dirs |py| {
                  @shared data inner: DirsMultiset;
                  // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
                  // a `list`)
                  def __new__(
                      _cls,
                      map: PyObject,
                      skip: Option<PyObject> = None
                  ) -> PyResult<Self> {
                      let mut skip_state: Option<EntryState> = None;
                      if let Some(skip) = skip {
                          skip_state = Some(
                              skip.extract::<PyBytes>(py)?.data(py)[0]
                                  .try_into()
                                  .map_err(|e: DirstateParseError| {
                                      PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                  })?,
                          );
                      }
                      let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
                          let dirstate = extract_dirstate(py, &map)?;
                          DirsMultiset::from_dirstate(&dirstate, skip_state)
                              .map_err(|e| {
                                  PyErr::new::<exc::ValueError, _>(py, e.to_string())
                              })?
                      } else {
                          let map: Result<Vec<HgPathBuf>, PyErr> = map
                              .iter(py)?
                              .map(|o| {
                                  Ok(HgPathBuf::from_bytes(
                                      o?.extract::<PyBytes>(py)?.data(py),
                                  ))
                              })
                              .collect();
                          DirsMultiset::from_manifest(&map?)
                              .map_err(|e| {
                                  PyErr::new::<exc::ValueError, _>(py, e.to_string())
                              })?
                      };
                      Self::create_instance(py, inner)
                  }
                  def addpath(&self, path: PyObject) -> PyResult<PyObject> {
                      self.inner(py).borrow_mut().add_path(
                          HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
                      ).and(Ok(py.None())).or_else(|e| {
                          match e {
                              DirstateMapError::EmptyPath => {
                                  Ok(py.None())
                              },
                              e => {
                                  Err(PyErr::new::<exc::ValueError, _>(
                                      py,
                                      e.to_string(),
                                  ))
                              }
                          }
                      })
                  }
                  def delpath(&self, path: PyObject) -> PyResult<PyObject> {
                      self.inner(py).borrow_mut().delete_path(
                          HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
                      )
                          .and(Ok(py.None()))
                          .or_else(|e| {
                              match e {
                                  DirstateMapError::EmptyPath => {
                                      Ok(py.None())
                                  },
                                  e => {
                                      Err(PyErr::new::<exc::ValueError, _>(
                                          py,
                                          e.to_string(),
                                      ))
                                  }
                              }
                          })
                  }
                  def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
                      let leaked_ref = self.inner(py).leak_immutable();
                      DirsMultisetKeysIterator::from_inner(
                          py,
                          unsafe { leaked_ref.map(py, |o| o.iter()) },
                      )
                  }
                  def __contains__(&self, item: PyObject) -> PyResult<bool> {
                      Ok(self.inner(py).borrow().contains(HgPath::new(
                          item.extract::<PyBytes>(py)?.data(py).as_ref(),
                      )))
                  }
              });
              impl Dirs {
                  pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
                      Self::create_instance(py, d)
                  }
                  fn translate_key(
                      py: Python,
                      res: &HgPathBuf,
                  ) -> PyResult<Option<PyBytes>> {
-                     Ok(Some(PyBytes::new(py, res.as_ref())))
+                     Ok(Some(PyBytes::new(py, res.as_bytes())))
                  }
              }
              py_shared_iterator!(
                  DirsMultisetKeysIterator,
                  UnsafePyLeaked<DirsMultisetIter<'static>>,
                  Dirs::translate_key,
                  Option<PyBytes>
              );

rust/hg-cpython/src/dirstate/dirstate_map.rs

0 +12 -8

              // dirstate_map.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
              //! `hg-core` package.
              use std::cell::{Ref, RefCell};
              use std::convert::TryInto;
              use std::time::Duration;
              use cpython::{
                  exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList,
                  PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
                  UnsafePyLeaked,
              };
              use crate::{
                  dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
                  dirstate::non_normal_entries::{
                      NonNormalEntries, NonNormalEntriesIterator,
                  },
                  dirstate::{dirs_multiset::Dirs, make_dirstate_tuple},
              };
              use hg::{
                  utils::hg_path::{HgPath, HgPathBuf},
                  DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
                  DirstateMapError, DirstateParents, DirstateParseError, EntryState,
                  StateMapIter, PARENT_SIZE,
              };
              // TODO
              //     This object needs to share references to multiple members of its Rust
              //     inner struct, namely `copy_map`, `dirs` and `all_dirs`.
              //     Right now `CopyMap` is done, but it needs to have an explicit reference
              //     to `RustDirstateMap` which itself needs to have an encapsulation for
              //     every method in `CopyMap` (copymapcopy, etc.).
              //     This is ugly and hard to maintain.
              //     The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
              //     `py_class!` is already implemented and does not mention
              //     `RustDirstateMap`, rightfully so.
              //     All attributes also have to have a separate refcount data attribute for
              //     leaks, with all methods that go along for reference sharing.
              py_class!(pub class DirstateMap |py| {
                  @shared data inner: RustDirstateMap;
                  def __new__(_cls, _root: PyObject) -> PyResult<Self> {
                      let inner = RustDirstateMap::default();
                      Self::create_instance(py, inner)
                  }
                  def clear(&self) -> PyResult<PyObject> {
                      self.inner(py).borrow_mut().clear();
                      Ok(py.None())
                  }
                  def get(
                      &self,
                      key: PyObject,
                      default: Option<PyObject> = None
                  ) -> PyResult<Option<PyObject>> {
                      let key = key.extract::<PyBytes>(py)?;
                      match self.inner(py).borrow().get(HgPath::new(key.data(py))) {
                          Some(entry) => {
                              Ok(Some(make_dirstate_tuple(py, entry)?))
                          },
                          None => Ok(default)
                      }
                  }
                  def addfile(
                      &self,
                      f: PyObject,
                      oldstate: PyObject,
                      state: PyObject,
                      mode: PyObject,
                      size: PyObject,
                      mtime: PyObject
                  ) -> PyResult<PyObject> {
                      self.inner(py).borrow_mut().add_file(
                          HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
                          oldstate.extract::<PyBytes>(py)?.data(py)[0]
                              .try_into()
                              .map_err(|e: DirstateParseError| {
                                  PyErr::new::<exc::ValueError, _>(py, e.to_string())
                              })?,
                          DirstateEntry {
                              state: state.extract::<PyBytes>(py)?.data(py)[0]
                                  .try_into()
                                  .map_err(|e: DirstateParseError| {
                                      PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                  })?,
                              mode: mode.extract(py)?,
                              size: size.extract(py)?,
                              mtime: mtime.extract(py)?,
                          },
                      ).and(Ok(py.None())).or_else(|e: DirstateMapError| {
                          Err(PyErr::new::<exc::ValueError, _>(py, e.to_string()))
                      })
                  }
                  def removefile(
                      &self,
                      f: PyObject,
                      oldstate: PyObject,
                      size: PyObject
                  ) -> PyResult<PyObject> {
                      self.inner(py).borrow_mut()
                          .remove_file(
                              HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
                              oldstate.extract::<PyBytes>(py)?.data(py)[0]
                                  .try_into()
                                  .map_err(|e: DirstateParseError| {
                                      PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                  })?,
                              size.extract(py)?,
                          )
                          .or_else(|_| {
                              Err(PyErr::new::<exc::OSError, _>(
                                  py,
                                  "Dirstate error".to_string(),
                              ))
                          })?;
                      Ok(py.None())
                  }
                  def dropfile(
                      &self,
                      f: PyObject,
                      oldstate: PyObject
                  ) -> PyResult<PyBool> {
                      self.inner(py).borrow_mut()
                          .drop_file(
                              HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
                              oldstate.extract::<PyBytes>(py)?.data(py)[0]
                                  .try_into()
                                  .map_err(|e: DirstateParseError| {
                                      PyErr::new::<exc::ValueError, _>(py, e.to_string())
                                  })?,
                          )
                          .and_then(|b| Ok(b.to_py_object(py)))
                          .or_else(|_| {
                              Err(PyErr::new::<exc::OSError, _>(
                                  py,
                                  "Dirstate error".to_string(),
                              ))
                          })
                  }
                  def clearambiguoustimes(
                      &self,
                      files: PyObject,
                      now: PyObject
                  ) -> PyResult<PyObject> {
                      let files: PyResult<Vec<HgPathBuf>> = files
                          .iter(py)?
                          .map(|filename| {
                              Ok(HgPathBuf::from_bytes(
                                  filename?.extract::<PyBytes>(py)?.data(py),
                              ))
                          })
                          .collect();
                      self.inner(py).borrow_mut()
                          .clear_ambiguous_times(files?, now.extract(py)?);
                      Ok(py.None())
                  }
                  def other_parent_entries(&self) -> PyResult<PyObject> {
                      let mut inner_shared = self.inner(py).borrow_mut();
                      let (_, other_parent) =
                          inner_shared.get_non_normal_other_parent_entries();
                      let locals = PyDict::new(py);
                      locals.set_item(
                          py,
                          "other_parent",
                          other_parent
                              .iter()
-                             .map(|v| PyBytes::new(py, v.as_ref()))
+                             .map(|v| PyBytes::new(py, v.as_bytes()))
                              .collect::<Vec<PyBytes>>()
                              .to_py_object(py),
                      )?;
                      py.eval("set(other_parent)", None, Some(&locals))
                  }
                  def non_normal_entries(&self) -> PyResult<NonNormalEntries> {
                      NonNormalEntries::from_inner(py, self.clone_ref(py))
                  }
                  def non_normal_entries_contains(&self, key: PyObject) -> PyResult<bool> {
                      let key = key.extract::<PyBytes>(py)?;
                      Ok(self
                          .inner(py)
                          .borrow_mut()
                          .get_non_normal_other_parent_entries().0
                          .contains(HgPath::new(key.data(py))))
                  }
                  def non_normal_entries_display(&self) -> PyResult<PyString> {
                      Ok(
                          PyString::new(
                              py,
                              &format!(
                                  "NonNormalEntries: {:?}",
                                  self
                                      .inner(py)
                                      .borrow_mut()
                                      .get_non_normal_other_parent_entries().0
                                      .iter().map(|o| o))
                              )
                          )
                  }
                  def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> {
                      let key = key.extract::<PyBytes>(py)?;
                      self
                          .inner(py)
                          .borrow_mut()
                          .non_normal_entries_remove(HgPath::new(key.data(py)));
                      Ok(py.None())
                  }
                  def non_normal_entries_union(&self, other: PyObject) -> PyResult<PyList> {
                      let other: PyResult<_> = other.iter(py)?
                                  .map(|f| {
                                      Ok(HgPathBuf::from_bytes(
                                          f?.extract::<PyBytes>(py)?.data(py),
                                      ))
                                  })
                                  .collect();
                      let res = self
                          .inner(py)
                          .borrow_mut()
                          .non_normal_entries_union(other?);
                      let ret = PyList::new(py, &[]);
                      for filename in res.iter() {
                          let as_pystring = PyBytes::new(py, filename.as_bytes());
                          ret.append(py, as_pystring.into_object());
                      }
                      Ok(ret)
                  }
                  def non_normal_entries_iter(&self) -> PyResult<NonNormalEntriesIterator> {
                      // Make sure the sets are defined before we no longer have a mutable
                      // reference to the dmap.
                      self.inner(py)
                          .borrow_mut()
                          .set_non_normal_other_parent_entries(false);
                      let leaked_ref = self.inner(py).leak_immutable();
                      NonNormalEntriesIterator::from_inner(py, unsafe {
                          leaked_ref.map(py, |o| {
                              o.get_non_normal_other_parent_entries_panic().0.iter()
                          })
                      })
                  }
                  def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
                      let d = d.extract::<PyBytes>(py)?;
                      Ok(self.inner(py).borrow_mut()
                          .has_tracked_dir(HgPath::new(d.data(py)))
                          .map_err(|e| {
                              PyErr::new::<exc::ValueError, _>(py, e.to_string())
                          })?
                          .to_py_object(py))
                  }
                  def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
                      let d = d.extract::<PyBytes>(py)?;
                      Ok(self.inner(py).borrow_mut()
                          .has_dir(HgPath::new(d.data(py)))
                          .map_err(|e| {
                              PyErr::new::<exc::ValueError, _>(py, e.to_string())
                          })?
                          .to_py_object(py))
                  }
                  def parents(&self, st: PyObject) -> PyResult<PyTuple> {
                      self.inner(py).borrow_mut()
                          .parents(st.extract::<PyBytes>(py)?.data(py))
                          .and_then(|d| {
                              Ok((PyBytes::new(py, &d.p1), PyBytes::new(py, &d.p2))
                                  .to_py_object(py))
                          })
                          .or_else(|_| {
                              Err(PyErr::new::<exc::OSError, _>(
                                  py,
                                  "Dirstate error".to_string(),
                              ))
                          })
                  }
                  def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
                      let p1 = extract_node_id(py, &p1)?;
                      let p2 = extract_node_id(py, &p2)?;
                      self.inner(py).borrow_mut()
                          .set_parents(&DirstateParents { p1, p2 });
                      Ok(py.None())
                  }
                  def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
                      match self.inner(py).borrow_mut()
                          .read(st.extract::<PyBytes>(py)?.data(py))
                      {
                          Ok(Some(parents)) => Ok(Some(
                              (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
                                  .to_py_object(py)
                                  .into_object(),
                          )),
                          Ok(None) => Ok(Some(py.None())),
                          Err(_) => Err(PyErr::new::<exc::OSError, _>(
                              py,
                              "Dirstate error".to_string(),
                          )),
                      }
                  }
                  def write(
                      &self,
                      p1: PyObject,
                      p2: PyObject,
                      now: PyObject
                  ) -> PyResult<PyBytes> {
                      let now = Duration::new(now.extract(py)?, 0);
                      let parents = DirstateParents {
                          p1: extract_node_id(py, &p1)?,
                          p2: extract_node_id(py, &p2)?,
                      };
                      match self.inner(py).borrow_mut().pack(parents, now) {
                          Ok(packed) => Ok(PyBytes::new(py, &packed)),
                          Err(_) => Err(PyErr::new::<exc::OSError, _>(
                              py,
                              "Dirstate error".to_string(),
                          )),
                      }
                  }
                  def filefoldmapasdict(&self) -> PyResult<PyDict> {
                      let dict = PyDict::new(py);
                      for (key, value) in
                          self.inner(py).borrow_mut().build_file_fold_map().iter()
                      {
-                         dict.set_item(py, key.as_ref().to_vec(), value.as_ref().to_vec())?;
+                         dict.set_item(
+                             py,
+                             key.as_bytes().to_vec(),
+                             value.as_bytes().to_vec(),
+                         )?;
                      }
                      Ok(dict)
                  }
                  def __len__(&self) -> PyResult<usize> {
                      Ok(self.inner(py).borrow().len())
                  }
                  def __contains__(&self, key: PyObject) -> PyResult<bool> {
                      let key = key.extract::<PyBytes>(py)?;
                      Ok(self.inner(py).borrow().contains_key(HgPath::new(key.data(py))))
                  }
                  def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
                      let key = key.extract::<PyBytes>(py)?;
                      let key = HgPath::new(key.data(py));
                      match self.inner(py).borrow().get(key) {
                          Some(entry) => {
                              Ok(make_dirstate_tuple(py, entry)?)
                          },
                          None => Err(PyErr::new::<exc::KeyError, _>(
                              py,
                              String::from_utf8_lossy(key.as_bytes()),
                          )),
                      }
                  }
                  def keys(&self) -> PyResult<DirstateMapKeysIterator> {
                      let leaked_ref = self.inner(py).leak_immutable();
                      DirstateMapKeysIterator::from_inner(
                          py,
                          unsafe { leaked_ref.map(py, |o| o.iter()) },
                      )
                  }
                  def items(&self) -> PyResult<DirstateMapItemsIterator> {
                      let leaked_ref = self.inner(py).leak_immutable();
                      DirstateMapItemsIterator::from_inner(
                          py,
                          unsafe { leaked_ref.map(py, |o| o.iter()) },
                      )
                  }
                  def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
                      let leaked_ref = self.inner(py).leak_immutable();
                      DirstateMapKeysIterator::from_inner(
                          py,
                          unsafe { leaked_ref.map(py, |o| o.iter()) },
                      )
                  }
                  def getdirs(&self) -> PyResult<Dirs> {
                      // TODO don't copy, share the reference
                      self.inner(py).borrow_mut().set_dirs()
                          .map_err(|e| {
                              PyErr::new::<exc::ValueError, _>(py, e.to_string())
                          })?;
                      Dirs::from_inner(
                          py,
                          DirsMultiset::from_dirstate(
                              &self.inner(py).borrow(),
                              Some(EntryState::Removed),
                          )
                          .map_err(|e| {
                              PyErr::new::<exc::ValueError, _>(py, e.to_string())
                          })?,
                      )
                  }
                  def getalldirs(&self) -> PyResult<Dirs> {
                      // TODO don't copy, share the reference
                      self.inner(py).borrow_mut().set_all_dirs()
                          .map_err(|e| {
                              PyErr::new::<exc::ValueError, _>(py, e.to_string())
                          })?;
                      Dirs::from_inner(
                          py,
                          DirsMultiset::from_dirstate(
                              &self.inner(py).borrow(),
                              None,
                          ).map_err(|e| {
                              PyErr::new::<exc::ValueError, _>(py, e.to_string())
                          })?,
                      )
                  }
                  // TODO all copymap* methods, see docstring above
                  def copymapcopy(&self) -> PyResult<PyDict> {
                      let dict = PyDict::new(py);
                      for (key, value) in self.inner(py).borrow().copy_map.iter() {
                          dict.set_item(
                              py,
-                             PyBytes::new(py, key.as_ref()),
-                             PyBytes::new(py, value.as_ref()),
+                             PyBytes::new(py, key.as_bytes()),
+                             PyBytes::new(py, value.as_bytes()),
                          )?;
                      }
                      Ok(dict)
                  }
                  def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
                      let key = key.extract::<PyBytes>(py)?;
                      match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) {
-                         Some(copy) => Ok(PyBytes::new(py, copy.as_ref())),
+                         Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
                          None => Err(PyErr::new::<exc::KeyError, _>(
                              py,
                              String::from_utf8_lossy(key.data(py)),
                          )),
                      }
                  }
                  def copymap(&self) -> PyResult<CopyMap> {
                      CopyMap::from_inner(py, self.clone_ref(py))
                  }
                  def copymaplen(&self) -> PyResult<usize> {
                      Ok(self.inner(py).borrow().copy_map.len())
                  }
                  def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
                      let key = key.extract::<PyBytes>(py)?;
                      Ok(self
                          .inner(py)
                          .borrow()
                          .copy_map
                          .contains_key(HgPath::new(key.data(py))))
                  }
                  def copymapget(
                      &self,
                      key: PyObject,
                      default: Option<PyObject>
                  ) -> PyResult<Option<PyObject>> {
                      let key = key.extract::<PyBytes>(py)?;
                      match self
                          .inner(py)
                          .borrow()
                          .copy_map
                          .get(HgPath::new(key.data(py)))
                      {
                          Some(copy) => Ok(Some(
-                             PyBytes::new(py, copy.as_ref()).into_object(),
+                             PyBytes::new(py, copy.as_bytes()).into_object(),
                          )),
                          None => Ok(default),
                      }
                  }
                  def copymapsetitem(
                      &self,
                      key: PyObject,
                      value: PyObject
                  ) -> PyResult<PyObject> {
                      let key = key.extract::<PyBytes>(py)?;
                      let value = value.extract::<PyBytes>(py)?;
                      self.inner(py).borrow_mut().copy_map.insert(
                          HgPathBuf::from_bytes(key.data(py)),
                          HgPathBuf::from_bytes(value.data(py)),
                      );
                      Ok(py.None())
                  }
                  def copymappop(
                      &self,
                      key: PyObject,
                      default: Option<PyObject>
                  ) -> PyResult<Option<PyObject>> {
                      let key = key.extract::<PyBytes>(py)?;
                      match self
                          .inner(py)
                          .borrow_mut()
                          .copy_map
                          .remove(HgPath::new(key.data(py)))
                      {
                          Some(_) => Ok(None),
                          None => Ok(default),
                      }
                  }
                  def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
                      let leaked_ref = self.inner(py).leak_immutable();
                      CopyMapKeysIterator::from_inner(
                          py,
                          unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
                      )
                  }
                  def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
                      let leaked_ref = self.inner(py).leak_immutable();
                      CopyMapItemsIterator::from_inner(
                          py,
                          unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
                      )
                  }
              });
              impl DirstateMap {
                  pub fn get_inner<'a>(
                      &'a self,
                      py: Python<'a>,
                  ) -> Ref<'a, RustDirstateMap> {
                      self.inner(py).borrow()
                  }
                  fn translate_key(
                      py: Python,
                      res: (&HgPathBuf, &DirstateEntry),
                  ) -> PyResult<Option<PyBytes>> {
-                     Ok(Some(PyBytes::new(py, res.0.as_ref())))
+                     Ok(Some(PyBytes::new(py, res.0.as_bytes())))
                  }
                  fn translate_key_value(
                      py: Python,
                      res: (&HgPathBuf, &DirstateEntry),
                  ) -> PyResult<Option<(PyBytes, PyObject)>> {
                      let (f, entry) = res;
                      Ok(Some((
-                         PyBytes::new(py, f.as_ref()),
+                         PyBytes::new(py, f.as_bytes()),
                          make_dirstate_tuple(py, entry)?,
                      )))
                  }
              }
              py_shared_iterator!(
                  DirstateMapKeysIterator,
                  UnsafePyLeaked<StateMapIter<'static>>,
                  DirstateMap::translate_key,
                  Option<PyBytes>
              );
              py_shared_iterator!(
                  DirstateMapItemsIterator,
                  UnsafePyLeaked<StateMapIter<'static>>,
                  DirstateMap::translate_key_value,
                  Option<(PyBytes, PyObject)>
              );
              fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<[u8; PARENT_SIZE]> {
                  let bytes = obj.extract::<PyBytes>(py)?;
                  match bytes.data(py).try_into() {
                      Ok(s) => Ok(s),
                      Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
                  }
              }

rust/hg-cpython/src/dirstate/non_normal_entries.rs

0 +1 -1

              // non_normal_other_parent_entries.rs
              //
              // Copyright 2020 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use cpython::{
                  exc::NotImplementedError, CompareOp, ObjectProtocol, PyBytes, PyClone,
                  PyErr, PyList, PyObject, PyResult, PyString, Python, PythonObject,
                  ToPyObject, UnsafePyLeaked,
              };
              use crate::dirstate::DirstateMap;
              use hg::utils::hg_path::HgPathBuf;
              use std::cell::RefCell;
              use std::collections::hash_set;
              py_class!(pub class NonNormalEntries |py| {
                  data dmap: DirstateMap;
                  def __contains__(&self, key: PyObject) -> PyResult<bool> {
                      self.dmap(py).non_normal_entries_contains(py, key)
                  }
                  def remove(&self, key: PyObject) -> PyResult<PyObject> {
                      self.dmap(py).non_normal_entries_remove(py, key)
                  }
                  def union(&self, other: PyObject) -> PyResult<PyList> {
                      self.dmap(py).non_normal_entries_union(py, other)
                  }
                  def __richcmp__(&self, other: PyObject, op: CompareOp) -> PyResult<bool> {
                      match op {
                          CompareOp::Eq => self.is_equal_to(py, other),
                          CompareOp::Ne => Ok(!self.is_equal_to(py, other)?),
                          _ => Err(PyErr::new::<NotImplementedError, _>(py, ""))
                      }
                  }
                  def __repr__(&self) -> PyResult<PyString> {
                      self.dmap(py).non_normal_entries_display(py)
                  }
                  def __iter__(&self) -> PyResult<NonNormalEntriesIterator> {
                      self.dmap(py).non_normal_entries_iter(py)
                  }
              });
              impl NonNormalEntries {
                  pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
                      Self::create_instance(py, dm)
                  }
                  fn is_equal_to(&self, py: Python, other: PyObject) -> PyResult<bool> {
                      for item in other.iter(py)? {
                          if !self.dmap(py).non_normal_entries_contains(py, item?)? {
                              return Ok(false);
                          }
                      }
                      Ok(true)
                  }
                  fn translate_key(
                      py: Python,
                      key: &HgPathBuf,
                  ) -> PyResult<Option<PyBytes>> {
-                     Ok(Some(PyBytes::new(py, key.as_ref())))
+                     Ok(Some(PyBytes::new(py, key.as_bytes())))
                  }
              }
              type NonNormalEntriesIter<'a> = hash_set::Iter<'a, HgPathBuf>;
              py_shared_iterator!(
                  NonNormalEntriesIterator,
                  UnsafePyLeaked<NonNormalEntriesIter<'static>>,
                  NonNormalEntries::translate_key,
                  Option<PyBytes>
              );

rust/hg-cpython/src/dirstate/status.rs

0 +4 -6

              // status.rs
              //
              // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings for the `hg::status` module provided by the
              //! `hg-core` crate. From Python, this will be seen as
              //! `rustext.dirstate.status`.
              use crate::{dirstate::DirstateMap, exceptions::FallbackError};
              use cpython::{
                  exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
                  PyResult, PyTuple, Python, PythonObject, ToPyObject,
              };
              use hg::{
                  matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
                  parse_pattern_syntax, status,
                  utils::{
                      files::{get_bytes_from_path, get_path_from_bytes},
                      hg_path::{HgPath, HgPathBuf},
                  },
                  BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
                  StatusOptions,
              };
              use std::borrow::{Borrow, Cow};
              /// This will be useless once trait impls for collection are added to `PyBytes`
              /// upstream.
              fn collect_pybytes_list(
                  py: Python,
                  collection: &[impl AsRef<HgPath>],
              ) -> PyList {
                  let list = PyList::new(py, &[]);
                  for path in collection.iter() {
                      list.append(
                          py,
                          PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
                      )
                  }
                  list
              }
              fn collect_bad_matches(
                  py: Python,
                  collection: &[(impl AsRef<HgPath>, BadMatch)],
              ) -> PyResult<PyList> {
                  let list = PyList::new(py, &[]);
                  let os = py.import("os")?;
                  let get_error_message = |code: i32| -> PyResult<_> {
                      os.call(
                          py,
                          "strerror",
                          PyTuple::new(py, &[code.to_py_object(py).into_object()]),
                          None,
                      )
                  };
                  for (path, bad_match) in collection.iter() {
                      let message = match bad_match {
                          BadMatch::OsError(code) => get_error_message(*code)?,
                          BadMatch::BadType(bad_type) => format!(
                              "unsupported file type (type is {})",
                              bad_type.to_string()
                          )
                          .to_py_object(py)
                          .into_object(),
                      };
                      list.append(
                          py,
                          (PyBytes::new(py, path.as_ref().as_bytes()), message)
                              .to_py_object(py)
                              .into_object(),
                      )
                  }
                  Ok(list)
              }
              fn handle_fallback(py: Python, err: StatusError) -> PyErr {
                  match err {
                      StatusError::Pattern(e) => {
                          let as_string = e.to_string();
                          log::trace!("Rust status fallback: `{}`", &as_string);
                          PyErr::new::<FallbackError, _>(py, &as_string)
                      }
                      e => PyErr::new::<ValueError, _>(py, e.to_string()),
                  }
              }
              pub fn status_wrapper(
                  py: Python,
                  dmap: DirstateMap,
                  matcher: PyObject,
                  root_dir: PyObject,
                  ignore_files: PyList,
                  check_exec: bool,
                  last_normal_time: i64,
                  list_clean: bool,
                  list_ignored: bool,
                  list_unknown: bool,
                  collect_traversed_dirs: bool,
              ) -> PyResult<PyTuple> {
                  let bytes = root_dir.extract::<PyBytes>(py)?;
                  let root_dir = get_path_from_bytes(bytes.data(py));
                  let dmap: DirstateMap = dmap.to_py_object(py);
                  let dmap = dmap.get_inner(py);
                  let ignore_files: PyResult<Vec<_>> = ignore_files
                      .iter(py)
                      .map(|b| {
                          let file = b.extract::<PyBytes>(py)?;
                          Ok(get_path_from_bytes(file.data(py)).to_owned())
                      })
                      .collect();
                  let ignore_files = ignore_files?;
                  match matcher.get_type(py).name(py).borrow() {
                      "alwaysmatcher" => {
                          let matcher = AlwaysMatcher;
                          let ((lookup, status_res), warnings) = status(
                              &dmap,
                              &matcher,
                              &root_dir,
                              ignore_files,
                              StatusOptions {
                                  check_exec,
                                  last_normal_time,
                                  list_clean,
                                  list_ignored,
                                  list_unknown,
                                  collect_traversed_dirs,
                              },
                          )
                          .map_err(|e| handle_fallback(py, e))?;
                          build_response(py, lookup, status_res, warnings)
                      }
                      "exactmatcher" => {
                          let files = matcher.call_method(
                              py,
                              "files",
                              PyTuple::new(py, &[]),
                              None,
                          )?;
                          let files: PyList = files.cast_into(py)?;
                          let files: PyResult<Vec<HgPathBuf>> = files
                              .iter(py)
                              .map(|f| {
                                  Ok(HgPathBuf::from_bytes(
                                      f.extract::<PyBytes>(py)?.data(py),
                                  ))
                              })
                              .collect();
                          let files = files?;
                          let matcher = FileMatcher::new(&files)
                              .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
                          let ((lookup, status_res), warnings) = status(
                              &dmap,
                              &matcher,
                              &root_dir,
                              ignore_files,
                              StatusOptions {
                                  check_exec,
                                  last_normal_time,
                                  list_clean,
                                  list_ignored,
                                  list_unknown,
                                  collect_traversed_dirs,
                              },
                          )
                          .map_err(|e| handle_fallback(py, e))?;
                          build_response(py, lookup, status_res, warnings)
                      }
                      "includematcher" => {
                          // Get the patterns from Python even though most of them are
                          // redundant with those we will parse later on, as they include
                          // those passed from the command line.
                          let ignore_patterns: PyResult<Vec<_>> = matcher
                              .getattr(py, "_kindpats")?
                              .iter(py)?
                              .map(|k| {
                                  let k = k?;
                                  let syntax = parse_pattern_syntax(
                                      &[
                                          k.get_item(py, 0)?
                                              .extract::<PyBytes>(py)?
                                              .data(py),
                                          &b":"[..],
                                      ]
                                      .concat(),
                                  )
                                  .map_err(|e| {
                                      handle_fallback(py, StatusError::Pattern(e))
                                  })?;
                                  let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
                                  let pattern = pattern.data(py);
                                  let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
                                  let source = get_path_from_bytes(source.data(py));
                                  let new = IgnorePattern::new(syntax, pattern, source);
                                  Ok(new)
                              })
                              .collect();
                          let ignore_patterns = ignore_patterns?;
                          let mut all_warnings = vec![];
                          let (matcher, warnings) =
                              IncludeMatcher::new(ignore_patterns, &root_dir)
                                  .map_err(|e| handle_fallback(py, e.into()))?;
                          all_warnings.extend(warnings);
                          let ((lookup, status_res), warnings) = status(
                              &dmap,
                              &matcher,
                              &root_dir,
                              ignore_files,
                              StatusOptions {
                                  check_exec,
                                  last_normal_time,
                                  list_clean,
                                  list_ignored,
                                  list_unknown,
                                  collect_traversed_dirs,
                              },
                          )
                          .map_err(|e| handle_fallback(py, e))?;
                          all_warnings.extend(warnings);
                          build_response(py, lookup, status_res, all_warnings)
                      }
-                     e => {
-                         return Err(PyErr::new::<ValueError, _>(
-                             py,
-                             format!("Unsupported matcher {}", e),
-                         ));
+                     }
+                     e => Err(PyErr::new::<ValueError, _>(
+                         py,
+                         format!("Unsupported matcher {}", e),
+                     )),
                  }
              }
              fn build_response(
                  py: Python,
                  lookup: Vec<Cow<HgPath>>,
                  status_res: DirstateStatus,
                  warnings: Vec<PatternFileWarning>,
              ) -> PyResult<PyTuple> {
                  let modified = collect_pybytes_list(py, status_res.modified.as_ref());
                  let added = collect_pybytes_list(py, status_res.added.as_ref());
                  let removed = collect_pybytes_list(py, status_res.removed.as_ref());
                  let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
                  let clean = collect_pybytes_list(py, status_res.clean.as_ref());
                  let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
                  let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
                  let lookup = collect_pybytes_list(py, lookup.as_ref());
                  let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
                  let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
                  let py_warnings = PyList::new(py, &[]);
                  for warning in warnings.iter() {
                      // We use duck-typing on the Python side for dispatch, good enough for
                      // now.
                      match warning {
                          PatternFileWarning::InvalidSyntax(file, syn) => {
                              py_warnings.append(
                                  py,
                                  (
                                      PyBytes::new(py, &get_bytes_from_path(&file)),
                                      PyBytes::new(py, syn),
                                  )
                                      .to_py_object(py)
                                      .into_object(),
                              );
                          }
                          PatternFileWarning::NoSuchFile(file) => py_warnings.append(
                              py,
                              PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
                          ),
                      }
                  }
                  Ok(PyTuple::new(
                      py,
                      &[
                          lookup.into_object(),
                          modified.into_object(),
                          added.into_object(),
                          removed.into_object(),
                          deleted.into_object(),
                          clean.into_object(),
                          ignored.into_object(),
                          unknown.into_object(),
                          py_warnings.into_object(),
                          bad.into_object(),
                          traversed.into_object(),
                      ][..],
                  ))
              }

rust/hg-cpython/src/parsers.rs

0 +4 -4

              // parsers.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings for the `hg::dirstate::parsers` module provided by the
              //! `hg-core` package.
              //!
              //! From Python, this will be seen as `mercurial.rustext.parsers`
              use cpython::{
                  exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
                  PythonObject, ToPyObject,
              };
              use hg::{
                  pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
                  DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
                  PARENT_SIZE,
              };
              use std::convert::TryInto;
              use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
              use std::time::Duration;
              fn parse_dirstate_wrapper(
                  py: Python,
                  dmap: PyDict,
                  copymap: PyDict,
                  st: PyBytes,
              ) -> PyResult<PyTuple> {
                  let mut dirstate_map = FastHashMap::default();
                  let mut copies = FastHashMap::default();
                  match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
                      Ok(parents) => {
                          for (filename, entry) in &dirstate_map {
                              dmap.set_item(
                                  py,
-                                 PyBytes::new(py, filename.as_ref()),
+                                 PyBytes::new(py, filename.as_bytes()),
                                  make_dirstate_tuple(py, entry)?,
                              )?;
                          }
                          for (path, copy_path) in copies {
                              copymap.set_item(
                                  py,
-                                 PyBytes::new(py, path.as_ref()),
-                                 PyBytes::new(py, copy_path.as_ref()),
+                                 PyBytes::new(py, path.as_bytes()),
+                                 PyBytes::new(py, copy_path.as_bytes()),
                              )?;
                          }
                          Ok(
                              (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
                                  .to_py_object(py),
                          )
                      }
                      Err(e) => Err(PyErr::new::<exc::ValueError, _>(
                          py,
                          match e {
                              DirstateParseError::TooLittleData => {
                                  "too little data for parents".to_string()
                              }
                              DirstateParseError::Overflow => {
                                  "overflow in dirstate".to_string()
                              }
                              DirstateParseError::CorruptedEntry(e) => e,
                              DirstateParseError::Damaged => {
                                  "dirstate appears to be damaged".to_string()
                              }
                          },
                      )),
                  }
              }
              fn pack_dirstate_wrapper(
                  py: Python,
                  dmap: PyDict,
                  copymap: PyDict,
                  pl: PyTuple,
                  now: PyInt,
              ) -> PyResult<PyBytes> {
                  let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
                  let p1: &[u8] = p1.data(py);
                  let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
                  let p2: &[u8] = p2.data(py);
                  let mut dirstate_map = extract_dirstate(py, &dmap)?;
                  let copies: Result<FastHashMap<HgPathBuf, HgPathBuf>, PyErr> = copymap
                      .items(py)
                      .iter()
                      .map(|(key, value)| {
                          Ok((
                              HgPathBuf::from_bytes(key.extract::<PyBytes>(py)?.data(py)),
                              HgPathBuf::from_bytes(value.extract::<PyBytes>(py)?.data(py)),
                          ))
                      })
                      .collect();
                  if p1.len() != PARENT_SIZE || p2.len() != PARENT_SIZE {
                      return Err(PyErr::new::<exc::ValueError, _>(
                          py,
                          "expected a 20-byte hash".to_string(),
                      ));
                  }
                  match pack_dirstate(
                      &mut dirstate_map,
                      &copies?,
                      DirstateParents {
                          p1: p1.try_into().unwrap(),
                          p2: p2.try_into().unwrap(),
                      },
                      Duration::from_secs(now.as_object().extract::<u64>(py)?),
                  ) {
                      Ok(packed) => {
                          for (filename, entry) in &dirstate_map {
                              dmap.set_item(
                                  py,
-                                 PyBytes::new(py, filename.as_ref()),
+                                 PyBytes::new(py, filename.as_bytes()),
                                  make_dirstate_tuple(py, entry)?,
                              )?;
                          }
                          Ok(PyBytes::new(py, &packed))
                      }
                      Err(error) => Err(PyErr::new::<exc::ValueError, _>(
                          py,
                          match error {
                              DirstatePackError::CorruptedParent => {
                                  "expected a 20-byte hash".to_string()
                              }
                              DirstatePackError::CorruptedEntry(e) => e,
                              DirstatePackError::BadSize(expected, actual) => {
                                  format!("bad dirstate size: {} != {}", actual, expected)
                              }
                          },
                      )),
                  }
              }
              /// Create the module, with `__package__` given from parent
              pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
                  let dotted_name = &format!("{}.parsers", package);
                  let m = PyModule::new(py, dotted_name)?;
                  m.add(py, "__package__", package)?;
                  m.add(py, "__doc__", "Parsers - Rust implementation")?;
                  m.add(
                      py,
                      "parse_dirstate",
                      py_fn!(
                          py,
                          parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
                      ),
                  )?;
                  m.add(
                      py,
                      "pack_dirstate",
                      py_fn!(
                          py,
                          pack_dirstate_wrapper(
                              dmap: PyDict,
                              copymap: PyDict,
                              pl: PyTuple,
                              now: PyInt
                          )
                      ),
                  )?;
                  let sys = PyModule::import(py, "sys")?;
                  let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
                  sys_modules.set_item(py, dotted_name, &m)?;
                  Ok(m)
              }

rust/hg-cpython/src/utils.rs

0 +2 -5

              use cpython::exc::ValueError;
              use cpython::{PyBytes, PyDict, PyErr, PyObject, PyResult, PyTuple, Python};
              use hg::revlog::Node;
              use std::convert::TryFrom;
              #[allow(unused)]
              pub fn print_python_trace(py: Python) -> PyResult<PyObject> {
                  eprintln!("===============================");
                  eprintln!("Printing Python stack from Rust");
                  eprintln!("===============================");
                  let traceback = py.import("traceback")?;
                  let sys = py.import("sys")?;
                  let kwargs = PyDict::new(py);
                  kwargs.set_item(py, "file", sys.get(py, "stderr")?)?;
                  traceback.call(py, "print_stack", PyTuple::new(py, &[]), Some(&kwargs))
              }
              // Necessary evil for the time being, could maybe be moved to
              // a TryFrom in Node itself
              const NODE_BYTES_LENGTH: usize = 20;
              type NodeData = [u8; NODE_BYTES_LENGTH];
              /// Copy incoming Python bytes given as `PyObject` into `Node`,
              /// doing the necessary checks
              pub fn node_from_py_object<'a>(
                  py: Python,
                  bytes: &'a PyObject,
              ) -> PyResult<Node> {
                  let as_py_bytes: &'a PyBytes = bytes.extract(py)?;
                  node_from_py_bytes(py, as_py_bytes)
              }
              /// Clone incoming Python bytes given as `PyBytes` as a `Node`,
              /// doing the necessary checks.
-             pub fn node_from_py_bytes<'a>(
-                 py: Python,
-                 bytes: &'a PyBytes,
-             ) -> PyResult<Node> {
+             pub fn node_from_py_bytes(py: Python, bytes: &PyBytes) -> PyResult<Node> {
                  <NodeData>::try_from(bytes.data(py))
                      .map_err(|_| {
                          PyErr::new::<ValueError, _>(
                              py,
                              format!("{}-byte hash required", NODE_BYTES_LENGTH),
                          )
                      })
-                     .map(|n| n.into())
+                     .map(Into::into)
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages