upstream/mercurial-mirror Commit - r43109:ce6797ef

rust: apply more formatting fixes...

Yuya Nishihara -

r43109:ce6797ef default

parent child

rust/hg-core/src/ancestors.rs

0 0 -1

             // ancestors.rs
             //
             // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Rust versions of generic DAG ancestors algorithms for Mercurial
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::dagops;
             use std::cmp::max;
             use std::collections::{BinaryHeap, HashSet};
             /// Iterator over the ancestors of a given list of revisions
             /// This is a generic type, defined and implemented for any Graph, so that
             /// it's easy to
             ///
             /// - unit test in pure Rust
             /// - bind to main Mercurial code, potentially in several ways and have these
             ///   bindings evolve over time
             pub struct AncestorsIterator<G: Graph> {
                 graph: G,
                 visit: BinaryHeap<Revision>,
                 seen: HashSet<Revision>,
                 stoprev: Revision,
             }
             /// Lazy ancestors set, backed by AncestorsIterator
             pub struct LazyAncestors<G: Graph + Clone> {
                 graph: G,
                 containsiter: AncestorsIterator<G>,
                 initrevs: Vec<Revision>,
                 stoprev: Revision,
                 inclusive: bool,
             }
             pub struct MissingAncestors<G: Graph> {
                 graph: G,
                 bases: HashSet<Revision>,
                 max_base: Revision,
             }
             impl<G: Graph> AncestorsIterator<G> {
                 /// Constructor.
                 ///
                 /// if `inclusive` is true, then the init revisions are emitted in
                 /// particular, otherwise iteration starts from their parents.
                 pub fn new(
                     graph: G,
                     initrevs: impl IntoIterator<Item = Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Result<Self, GraphError> {
                     let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
                     if inclusive {
                         let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
                         let seen = visit.iter().map(|&x| x).collect();
                         return Ok(AncestorsIterator {
                             visit: visit,
                             seen: seen,
                             stoprev: stoprev,
                             graph: graph,
                         });
                     }
                     let mut this = AncestorsIterator {
                         visit: BinaryHeap::new(),
                         seen: HashSet::new(),
                         stoprev: stoprev,
                         graph: graph,
                     };
                     this.seen.insert(NULL_REVISION);
                     for rev in filtered_initrevs {
                         for parent in this.graph.parents(rev)?.iter().cloned() {
                             this.conditionally_push_rev(parent);
                         }
                     }
                     Ok(this)
                 }
                 #[inline]
                 fn conditionally_push_rev(&mut self, rev: Revision) {
                     if self.stoprev <= rev && self.seen.insert(rev) {
                         self.visit.push(rev);
                     }
                 }
                 /// Consumes partially the iterator to tell if the given target
                 /// revision
                 /// is in the ancestors it emits.
                 /// This is meant for iterators actually dedicated to that kind of
                 /// purpose
                 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
                     if self.seen.contains(&target) && target != NULL_REVISION {
                         return Ok(true);
                     }
                     for item in self {
                         let rev = item?;
                         if rev == target {
                             return Ok(true);
                         }
                         if rev < target {
                             return Ok(false);
                         }
                     }
                     Ok(false)
                 }
                 pub fn peek(&self) -> Option<Revision> {
                     self.visit.peek().map(|&r| r)
                 }
                 /// Tell if the iterator is about an empty set
                 ///
                 /// The result does not depend whether the iterator has been consumed
                 /// or not.
                 /// This is mostly meant for iterators backing a lazy ancestors set
                 pub fn is_empty(&self) -> bool {
                     if self.visit.len() > 0 {
                         return false;
                     }
                     if self.seen.len() > 1 {
                         return false;
                     }
                     // at this point, the seen set is at most a singleton.
                     // If not `self.inclusive`, it's still possible that it has only
                     // the null revision
                     self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
                 }
             }
             /// Main implementation for the iterator
             ///
             /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
             /// with a few non crucial differences:
             ///
             /// - there's no filtering of invalid parent revisions. Actually, it should be
             ///   consistent and more efficient to filter them from the end caller.
             /// - we don't have the optimization for adjacent revisions (i.e., the case
             ///   where `p1 == rev - 1`), because it amounts to update the first element of
             ///   the heap without sifting, which Rust's BinaryHeap doesn't let us do.
             /// - we save a few pushes by comparing with `stoprev` before pushing
             impl<G: Graph> Iterator for AncestorsIterator<G> {
                 type Item = Result<Revision, GraphError>;
                 fn next(&mut self) -> Option<Self::Item> {
                     let current = match self.visit.peek() {
                         None => {
                             return None;
                         }
                         Some(c) => *c,
                     };
                     let [p1, p2] = match self.graph.parents(current) {
                         Ok(ps) => ps,
                         Err(e) => return Some(Err(e)),
                     };
                     if p1 < self.stoprev || !self.seen.insert(p1) {
                         self.visit.pop();
                     } else {
                         *(self.visit.peek_mut().unwrap()) = p1;
                     };
                     self.conditionally_push_rev(p2);
                     Some(Ok(current))
                 }
             }
             impl<G: Graph + Clone> LazyAncestors<G> {
                 pub fn new(
                     graph: G,
                     initrevs: impl IntoIterator<Item = Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Result<Self, GraphError> {
                     let v: Vec<Revision> = initrevs.into_iter().collect();
                     Ok(LazyAncestors {
                         graph: graph.clone(),
                         containsiter: AncestorsIterator::new(
                             graph,
                             v.iter().cloned(),
                             stoprev,
                             inclusive,
                         )?,
                         initrevs: v,
                         stoprev: stoprev,
                         inclusive: inclusive,
                     })
                 }
                 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
                     self.containsiter.contains(rev)
                 }
                 pub fn is_empty(&self) -> bool {
                     self.containsiter.is_empty()
                 }
                 pub fn iter(&self) -> AncestorsIterator<G> {
                     // the arguments being the same as for self.containsiter, we know
                     // for sure that AncestorsIterator constructor can't fail
                     AncestorsIterator::new(
                         self.graph.clone(),
                         self.initrevs.iter().cloned(),
                         self.stoprev,
                         self.inclusive,
                     )
                     .unwrap()
                 }
             }
             impl<G: Graph> MissingAncestors<G> {
                 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
                     let mut created = MissingAncestors {
                         graph: graph,
                         bases: HashSet::new(),
                         max_base: NULL_REVISION,
                     };
                     created.add_bases(bases);
                     created
                 }
                 pub fn has_bases(&self) -> bool {
                     !self.bases.is_empty()
                 }
                 /// Return a reference to current bases.
                 ///
                 /// This is useful in unit tests, but also setdiscovery.py does
                 /// read the bases attribute of a ancestor.missingancestors instance.
                 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
                     &self.bases
                 }
                 /// Computes the relative heads of current bases.
                 ///
                 /// The object is still usable after this.
                 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     dagops::heads(&self.graph, self.bases.iter())
                 }
                 /// Consumes the object and returns the relative heads of its bases.
                 pub fn into_bases_heads(
                     mut self,
                 ) -> Result<HashSet<Revision>, GraphError> {
                     dagops::retain_heads(&self.graph, &mut self.bases)?;
                     Ok(self.bases)
                 }
                 /// Add some revisions to `self.bases`
                 ///
                 /// Takes care of keeping `self.max_base` up to date.
                 pub fn add_bases(
                     &mut self,
                     new_bases: impl IntoIterator<Item = Revision>,
                 ) {
                     let mut max_base = self.max_base;
                     self.bases.extend(
                         new_bases
                             .into_iter()
                             .filter(|&rev| rev != NULL_REVISION)
                             .map(|r| {
                                 if r > max_base {
                                     max_base = r;
                                 }
                                 r
                             }),
                     );
                     self.max_base = max_base;
                 }
                 /// Remove all ancestors of self.bases from the revs set (in place)
                 pub fn remove_ancestors_from(
                     &mut self,
                     revs: &mut HashSet<Revision>,
                 ) -> Result<(), GraphError> {
                     revs.retain(|r| !self.bases.contains(r));
                     // the null revision is always an ancestor. Logically speaking
                     // it's debatable in case bases is empty, but the Python
                     // implementation always adds NULL_REVISION to bases, making it
                     // unconditionnally true.
                     revs.remove(&NULL_REVISION);
                     if revs.is_empty() {
                         return Ok(());
                     }
                     // anything in revs > start is definitely not an ancestor of bases
                     // revs <= start need to be investigated
                     if self.max_base == NULL_REVISION {
                         return Ok(());
                     }
                     // whatever happens, we'll keep at least keepcount of them
                     // knowing this gives us a earlier stop condition than
                     // going all the way to the root
                     let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
                     let mut curr = self.max_base;
                     while curr != NULL_REVISION && revs.len() > keepcount {
                         if self.bases.contains(&curr) {
                             revs.remove(&curr);
                             self.add_parents(curr)?;
                         }
                         curr -= 1;
                     }
                     Ok(())
                 }
                 /// Add the parents of `rev` to `self.bases`
                 ///
                 /// This has no effect on `self.max_base`
                 #[inline]
                 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
                     if rev == NULL_REVISION {
                         return Ok(());
                     }
                     for p in self.graph.parents(rev)?.iter().cloned() {
                         // No need to bother the set with inserting NULL_REVISION over and
                         // over
                         if p != NULL_REVISION {
                             self.bases.insert(p);
                         }
                     }
                     Ok(())
                 }
                 /// Return all the ancestors of revs that are not ancestors of self.bases
                 ///
                 /// This may include elements from revs.
                 ///
                 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
                 /// revision number order, which is a topological order.
                 pub fn missing_ancestors(
                     &mut self,
                     revs: impl IntoIterator<Item = Revision>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     // just for convenience and comparison with Python version
                     let bases_visit = &mut self.bases;
                     let mut revs: HashSet<Revision> = revs
                         .into_iter()
                         .filter(|r| !bases_visit.contains(r))
                         .collect();
                     let revs_visit = &mut revs;
                     let mut both_visit: HashSet<Revision> =
                         revs_visit.intersection(&bases_visit).cloned().collect();
                     if revs_visit.is_empty() {
                         return Ok(Vec::new());
                     }
                     let max_revs = revs_visit.iter().cloned().max().unwrap();
                     let start = max(self.max_base, max_revs);
                     // TODO heuristics for with_capacity()?
                     let mut missing: Vec<Revision> = Vec::new();
                     for curr in (0..=start).rev() {
                         if revs_visit.is_empty() {
                             break;
                         }
                         if both_visit.remove(&curr) {
                             // curr's parents might have made it into revs_visit through
                             // another path
                             for p in self.graph.parents(curr)?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 revs_visit.remove(&p);
                                 bases_visit.insert(p);
                                 both_visit.insert(p);
                             }
                         } else if revs_visit.remove(&curr) {
                             missing.push(curr);
                             for p in self.graph.parents(curr)?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 if bases_visit.contains(&p) {
                                     // p is already known to be an ancestor of revs_visit
                                     revs_visit.remove(&p);
                                     both_visit.insert(p);
                                 } else if both_visit.contains(&p) {
                                     // p should have been in bases_visit
                                     revs_visit.remove(&p);
                                     bases_visit.insert(p);
                                 } else {
                                     // visit later
                                     revs_visit.insert(p);
                                 }
                             }
                         } else if bases_visit.contains(&curr) {
                             for p in self.graph.parents(curr)?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 if revs_visit.remove(&p) || both_visit.contains(&p) {
                                     // p is an ancestor of bases_visit, and is implicitly
                                     // in revs_visit, which means p is ::revs & ::bases.
                                     bases_visit.insert(p);
                                     both_visit.insert(p);
                                 } else {
                                     bases_visit.insert(p);
                                 }
                             }
                         }
                     }
                     missing.reverse();
                     Ok(missing)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::{SampleGraph, VecGraph};
                 use std::iter::FromIterator;
                 fn list_ancestors<G: Graph>(
                     graph: G,
                     initrevs: Vec<Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Vec<Revision> {
                     AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
                         .unwrap()
                         .map(|res| res.unwrap())
                         .collect()
                 }
                 #[test]
                 /// Same tests as test-ancestor.py, without membership
                 /// (see also test-ancestor.py.out)
                 fn test_list_ancestor() {
                     assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 0, false),
                         vec![8, 7, 4, 3, 2, 1, 0]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![1, 3], 0, false),
                         vec![1, 0]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 0, true),
                         vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 6, false),
                         vec![8, 7]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 6, true),
                         vec![13, 11, 8, 7]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 11, true),
                         vec![13, 11]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![11, 13], 12, true),
                         vec![13]
                     );
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![10, 1], 0, true),
                         vec![10, 5, 4, 2, 1, 0]
                     );
                 }
                 #[test]
                 /// Corner case that's not directly in test-ancestors.py, but
                 /// that happens quite often, as demonstrated by running the whole
                 /// suite.
                 /// For instance, run tests/test-obsolete-checkheads.t
                 fn test_nullrev_input() {
                     let mut iter =
                         AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
                     assert_eq!(iter.next(), None)
                 }
                 #[test]
                 fn test_contains() {
                     let mut lazy =
                         AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
                     assert!(lazy.contains(1).unwrap());
                     assert!(!lazy.contains(3).unwrap());
                     let mut lazy =
                         AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
                     assert!(!lazy.contains(NULL_REVISION).unwrap());
                 }
                 #[test]
                 fn test_peek() {
                     let mut iter =
                         AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
                     // peek() gives us the next value
                     assert_eq!(iter.peek(), Some(10));
                     // but it's not been consumed
                     assert_eq!(iter.next(), Some(Ok(10)));
                     // and iteration resumes normally
                     assert_eq!(iter.next(), Some(Ok(5)));
                     // let's drain the iterator to test peek() at the end
                     while iter.next().is_some() {}
                     assert_eq!(iter.peek(), None);
                 }
                 #[test]
                 fn test_empty() {
                     let mut iter =
                         AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
                     assert!(!iter.is_empty());
                     while iter.next().is_some() {}
                     assert!(!iter.is_empty());
                     let iter =
                         AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
                     assert!(iter.is_empty());
                     // case where iter.seen == {NULL_REVISION}
                     let iter =
                         AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
                     assert!(iter.is_empty());
                 }
                 /// A corrupted Graph, supporting error handling tests
                 #[derive(Clone, Debug)]
                 struct Corrupted;
                 impl Graph for Corrupted {
                     fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                         match rev {
 => Ok([0, -1]),
                             r => Err(GraphError::ParentOutOfRange(r)),
                         }
                     }
                 }
                 #[test]
                 fn test_initrev_out_of_range() {
                     // inclusive=false looks up initrev's parents right away
                     match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
                         Ok(_) => panic!("Should have been ParentOutOfRange"),
                         Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
                     }
                 }
                 #[test]
                 fn test_next_out_of_range() {
                     // inclusive=false looks up initrev's parents right away
                     let mut iter =
                         AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
                     assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
                 }
                 #[test]
                 fn test_lazy_iter_contains() {
                     let mut lazy =
                         LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
                     let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
                     // compare with iterator tests on the same initial revisions
                     assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
                     // contains() results are correct, unaffected by the fact that
                     // we consumed entirely an iterator out of lazy
                     assert_eq!(lazy.contains(2), Ok(true));
                     assert_eq!(lazy.contains(9), Ok(false));
                 }
                 #[test]
                 fn test_lazy_contains_iter() {
                     let mut lazy =
                         LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
                     assert_eq!(lazy.contains(2), Ok(true));
                     assert_eq!(lazy.contains(6), Ok(false));
                     // after consumption of 2 by the inner iterator, results stay
                     // consistent
                     assert_eq!(lazy.contains(2), Ok(true));
                     assert_eq!(lazy.contains(5), Ok(false));
                     // iter() still gives us a fresh iterator
                     let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
                     assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
                 }
                 #[test]
                 /// Test constructor, add/get bases and heads
                 fn test_missing_bases() -> Result<(), GraphError> {
                     let mut missing_ancestors =
                         MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
                     let mut as_vec: Vec<Revision> =
                         missing_ancestors.get_bases().iter().cloned().collect();
                     as_vec.sort();
                     assert_eq!(as_vec, [1, 3, 5]);
                     assert_eq!(missing_ancestors.max_base, 5);
                     missing_ancestors.add_bases([3, 7, 8].iter().cloned());
                     as_vec = missing_ancestors.get_bases().iter().cloned().collect();
                     as_vec.sort();
                     assert_eq!(as_vec, [1, 3, 5, 7, 8]);
                     assert_eq!(missing_ancestors.max_base, 8);
                     as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     assert_eq!(as_vec, [3, 5, 7, 8]);
                     Ok(())
                 }
                 fn assert_missing_remove(
                     bases: &[Revision],
                     revs: &[Revision],
                     expected: &[Revision],
                 ) {
                     let mut missing_ancestors =
                         MissingAncestors::new(SampleGraph, bases.iter().cloned());
                     let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
                     missing_ancestors
                         .remove_ancestors_from(&mut revset)
                         .unwrap();
                     let mut as_vec: Vec<Revision> = revset.into_iter().collect();
                     as_vec.sort();
                     assert_eq!(as_vec.as_slice(), expected);
                 }
                 #[test]
                 fn test_missing_remove() {
                     assert_missing_remove(
                         &[1, 2, 3, 4, 7],
                         Vec::from_iter(1..10).as_slice(),
                         &[5, 6, 8, 9],
                     );
                     assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
                     assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
                 }
                 fn assert_missing_ancestors(
                     bases: &[Revision],
                     revs: &[Revision],
                     expected: &[Revision],
                 ) {
                     let mut missing_ancestors =
                         MissingAncestors::new(SampleGraph, bases.iter().cloned());
                     let missing = missing_ancestors
                         .missing_ancestors(revs.iter().cloned())
                         .unwrap();
                     assert_eq!(missing.as_slice(), expected);
                 }
                 #[test]
                 fn test_missing_ancestors() {
                     // examples taken from test-ancestors.py by having it run
                     // on the same graph (both naive and fast Python algs)
                     assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
                     assert_missing_ancestors(&[11], &[10], &[5, 10]);
                     assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
                 }
                 /// An interesting case found by a random generator similar to
                 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
                 /// failed this, yet none of the integration tests of the whole suite
                 /// catched it.
                 #[test]
                 fn test_remove_ancestors_from_case1() {
                     let graph: VecGraph = vec![
                         [NULL_REVISION, NULL_REVISION],
                         [0, NULL_REVISION],
                         [1, 0],
                         [2, 1],
                         [3, NULL_REVISION],
                         [4, NULL_REVISION],
                         [5, 1],
                         [2, NULL_REVISION],
                         [7, NULL_REVISION],
                         [8, NULL_REVISION],
                         [9, NULL_REVISION],
                         [10, 1],
                         [3, NULL_REVISION],
                         [12, NULL_REVISION],
                         [13, NULL_REVISION],
                         [14, NULL_REVISION],
                         [4, NULL_REVISION],
                         [16, NULL_REVISION],
                         [17, NULL_REVISION],
                         [18, NULL_REVISION],
                         [19, 11],
                         [20, NULL_REVISION],
                         [21, NULL_REVISION],
                         [22, NULL_REVISION],
                         [23, NULL_REVISION],
                         [2, NULL_REVISION],
                         [3, NULL_REVISION],
                         [26, 24],
                         [27, NULL_REVISION],
                         [28, NULL_REVISION],
                         [12, NULL_REVISION],
                         [1, NULL_REVISION],
                         [1, 9],
                         [32, NULL_REVISION],
                         [33, NULL_REVISION],
                         [34, 31],
                         [35, NULL_REVISION],
                         [36, 26],
                         [37, NULL_REVISION],
                         [38, NULL_REVISION],
                         [39, NULL_REVISION],
                         [40, NULL_REVISION],
                         [41, NULL_REVISION],
                         [42, 26],
                         [0, NULL_REVISION],
                         [44, NULL_REVISION],
                         [45, 4],
                         [40, NULL_REVISION],
                         [47, NULL_REVISION],
                         [36, 0],
                         [49, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [51, NULL_REVISION],
                         [52, NULL_REVISION],
                         [53, NULL_REVISION],
                         [14, NULL_REVISION],
                         [55, NULL_REVISION],
                         [15, NULL_REVISION],
                         [23, NULL_REVISION],
                         [58, NULL_REVISION],
                         [59, NULL_REVISION],
                         [2, NULL_REVISION],
                         [61, 59],
                         [62, NULL_REVISION],
                         [63, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [65, NULL_REVISION],
                         [66, NULL_REVISION],
                         [67, NULL_REVISION],
                         [68, NULL_REVISION],
                         [37, 28],
                         [69, 25],
                         [71, NULL_REVISION],
                         [72, NULL_REVISION],
                         [50, 2],
                         [74, NULL_REVISION],
                         [12, NULL_REVISION],
                         [18, NULL_REVISION],
                         [77, NULL_REVISION],
                         [78, NULL_REVISION],
                         [79, NULL_REVISION],
                         [43, 33],
                         [81, NULL_REVISION],
                         [82, NULL_REVISION],
                         [83, NULL_REVISION],
                         [84, 45],
                         [85, NULL_REVISION],
                         [86, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [88, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                         [76, 83],
                         [44, NULL_REVISION],
                         [92, NULL_REVISION],
                         [93, NULL_REVISION],
                         [9, NULL_REVISION],
                         [95, 67],
                         [96, NULL_REVISION],
                         [97, NULL_REVISION],
                         [NULL_REVISION, NULL_REVISION],
                     ];
                     let problem_rev = 28 as Revision;
                     let problem_base = 70 as Revision;
                     // making the problem obvious: problem_rev is a parent of problem_base
                     assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
                     let mut missing_ancestors: MissingAncestors<VecGraph> =
                         MissingAncestors::new(
                             graph,
                             [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
                                 .iter()
                                 .cloned(),
                         );
                     assert!(missing_ancestors.bases.contains(&problem_base));
                     let mut revs: HashSet<Revision> =
                         [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
                             .iter()
                             .cloned()
                             .collect();
                     missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
                     assert!(!revs.contains(&problem_rev));
                 }
             }

rust/hg-core/src/dagops.rs

0 +4 -5

             // dagops.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Miscellaneous DAG operations
             //!
             //! # Terminology
-            //! - By *relative heads* of a collection of revision numbers (`Revision`),
+            //! - By *relative heads* of a collection of revision numbers (`Revision`), we
-            //!   we mean those revisions that have no children among the collection.
+            //!   mean those revisions that have no children among the collection.
-            //! - Similarly *relative roots* of a collection of `Revision`, we mean
+            //! - Similarly *relative roots* of a collection of `Revision`, we mean those
-            //!   those whose parents, if any, don't belong to the collection.
+            //!   whose parents, if any, don't belong to the collection.
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::ancestors::AncestorsIterator;
             use std::collections::{BTreeSet, HashSet};
             fn remove_parents(
                 graph: &impl Graph,
                 rev: Revision,
                 set: &mut HashSet<Revision>,
             ) -> Result<(), GraphError> {
                 for parent in graph.parents(rev)?.iter() {
                     if *parent != NULL_REVISION {
                         set.remove(parent);
                     }
                 }
                 Ok(())
             }
             /// Relative heads out of some revisions, passed as an iterator.
             ///
             /// These heads are defined as those revisions that have no children
             /// among those emitted by the iterator.
             ///
             /// # Performance notes
             /// Internally, this clones the iterator, and builds a `HashSet` out of it.
             ///
             /// This function takes an `Iterator` instead of `impl IntoIterator` to
             /// guarantee that cloning the iterator doesn't result in cloning the full
             /// construct it comes from.
             pub fn heads<'a>(
                 graph: &impl Graph,
                 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
             ) -> Result<HashSet<Revision>, GraphError> {
                 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
                 heads.remove(&NULL_REVISION);
                 for rev in iter_revs {
                     if *rev != NULL_REVISION {
                         remove_parents(graph, *rev, &mut heads)?;
                     }
                 }
                 Ok(heads)
             }
             /// Retain in `revs` only its relative heads.
             ///
             /// This is an in-place operation, so that control of the incoming
             /// set is left to the caller.
             /// - a direct Python binding would probably need to build its own `HashSet`
             ///   from an incoming iterable, even if its sole purpose is to extract the
             ///   heads.
             /// - a Rust caller can decide whether cloning beforehand is appropriate
             ///
             /// # Performance notes
             /// Internally, this function will store a full copy of `revs` in a `Vec`.
             pub fn retain_heads(
                 graph: &impl Graph,
                 revs: &mut HashSet<Revision>,
             ) -> Result<(), GraphError> {
                 revs.remove(&NULL_REVISION);
                 // we need to construct an iterable copy of revs to avoid itering while
                 // mutating
                 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
                 for rev in as_vec {
                     if rev != NULL_REVISION {
                         remove_parents(graph, rev, revs)?;
                     }
                 }
                 Ok(())
             }
             /// Roots of `revs`, passed as a `HashSet`
             ///
             /// They are returned in arbitrary order
             pub fn roots<G: Graph>(
                 graph: &G,
                 revs: &HashSet<Revision>,
             ) -> Result<Vec<Revision>, GraphError> {
                 let mut roots: Vec<Revision> = Vec::new();
                 for rev in revs {
                     if graph
                         .parents(*rev)?
                         .iter()
                         .filter(|p| **p != NULL_REVISION)
                         .all(|p| !revs.contains(p))
                     {
                         roots.push(*rev);
                     }
                 }
                 Ok(roots)
             }
             /// Compute the topological range between two collections of revisions
             ///
             /// This is equivalent to the revset `<roots>::<heads>`.
             ///
             /// Currently, the given `Graph` has to implement `Clone`, which means
             /// actually cloning just a reference-counted Python pointer if
             /// it's passed over through `rust-cpython`. This is due to the internal
             /// use of `AncestorsIterator`
             ///
             /// # Algorithmic details
             ///
             /// This is a two-pass swipe inspired from what `reachableroots2` from
             /// `mercurial.cext.parsers` does to obtain the same results.
             ///
             /// - first, we climb up the DAG from `heads` in topological order, keeping
             ///   them in the vector `heads_ancestors` vector, and adding any element of
             ///   `roots` we find among them to the resulting range.
             /// - Then, we iterate on that recorded vector so that a revision is always
             ///   emitted after its parents and add all revisions whose parents are already
             ///   in the range to the results.
             ///
             /// # Performance notes
             ///
             /// The main difference with the C implementation is that
             /// the latter uses a flat array with bit flags, instead of complex structures
             /// like `HashSet`, making it faster in most scenarios. In theory, it's
             /// possible that the present implementation could be more memory efficient
             /// for very large repositories with many branches.
             pub fn range(
                 graph: &(impl Graph + Clone),
                 roots: impl IntoIterator<Item = Revision>,
                 heads: impl IntoIterator<Item = Revision>,
             ) -> Result<BTreeSet<Revision>, GraphError> {
                 let mut range = BTreeSet::new();
                 let roots: HashSet<Revision> = roots.into_iter().collect();
                 let min_root: Revision = match roots.iter().cloned().min() {
                     None => {
                         return Ok(range);
                     }
                     Some(r) => r,
                 };
                 // Internally, AncestorsIterator currently maintains a `HashSet`
                 // of all seen revision, which is also what we record, albeit in an ordered
                 // way. There's room for improvement on this duplication.
                 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
                 let mut heads_ancestors: Vec<Revision> = Vec::new();
                 for revres in ait {
                     let rev = revres?;
                     if roots.contains(&rev) {
                         range.insert(rev);
                     }
                     heads_ancestors.push(rev);
                 }
                 for rev in heads_ancestors.into_iter().rev() {
                     for parent in graph.parents(rev)?.iter() {
                         if *parent != NULL_REVISION && range.contains(parent) {
                             range.insert(rev);
                         }
                     }
                 }
                 Ok(range)
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
                 fn retain_heads_sorted(
                     graph: &impl Graph,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
                     retain_heads(graph, &mut revs)?;
                     let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_retain_heads() -> Result<(), GraphError> {
                     assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                     assert_eq!(
                         retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                         vec![1, 6, 12]
                     );
                     assert_eq!(
                         retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                         vec![3, 5, 8, 9]
                     );
                     Ok(())
                 }
                 /// Apply `heads()` to the given slice and return as a sorted `Vec`
                 fn heads_sorted(
                     graph: &impl Graph,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     let heads = heads(graph, revs.iter())?;
                     let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_heads() -> Result<(), GraphError> {
                     assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                     assert_eq!(
                         heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                         vec![1, 6, 12]
                     );
                     assert_eq!(
                         heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                         vec![3, 5, 8, 9]
                     );
                     Ok(())
                 }
                 /// Apply `roots()` and sort the result for easier comparison
                 fn roots_sorted(
                     graph: &impl Graph,
                     revs: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut as_vec = roots(graph, &revs.iter().cloned().collect())?;
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_roots() -> Result<(), GraphError> {
                     assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
                     assert_eq!(
                         roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                         vec![0, 4, 12]
                     );
                     assert_eq!(
                         roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                         vec![1, 8]
                     );
                     Ok(())
                 }
                 /// Apply `range()` and convert the result into a Vec for easier comparison
                 fn range_vec(
                     graph: impl Graph + Clone,
                     roots: &[Revision],
                     heads: &[Revision],
                 ) -> Result<Vec<Revision>, GraphError> {
                     range(&graph, roots.iter().cloned(), heads.iter().cloned())
                         .map(|bs| bs.into_iter().collect())
                 }
                 #[test]
                 fn test_range() -> Result<(), GraphError> {
                     assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
                     assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
                     assert_eq!(
                         range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
                         vec![5, 10]
                     );
                     assert_eq!(
                         range_vec(SampleGraph, &[5, 6], &[10, 12])?,
                         vec![5, 6, 9, 10, 12]
                     );
                     Ok(())
                 }
             }

rust/hg-core/src/dirstate/dirs_multiset.rs

0 0 -1

             // dirs_multiset.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! A multiset of directory names.
             //!
             //! Used to counts the references to directories in a manifest or dirstate.
             use crate::{
                 dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError,
             };
             use std::collections::hash_map::Entry;
             use std::collections::HashMap;
             #[derive(PartialEq, Debug)]
             pub struct DirsMultiset {
                 inner: HashMap<Vec<u8>, u32>,
             }
             impl DirsMultiset {
                 /// Initializes the multiset from a dirstate.
                 ///
                 /// If `skip_state` is provided, skips dirstate entries with equal state.
                 pub fn from_dirstate(
                     vec: &HashMap<Vec<u8>, DirstateEntry>,
                     skip_state: Option<EntryState>,
                 ) -> Self {
                     let mut multiset = DirsMultiset {
                         inner: HashMap::new(),
                     };
                     for (filename, DirstateEntry { state, .. }) in vec {
                         // This `if` is optimized out of the loop
                         if let Some(skip) = skip_state {
                             if skip != *state {
                                 multiset.add_path(filename);
                             }
                         } else {
                             multiset.add_path(filename);
                         }
                     }
                     multiset
                 }
                 /// Initializes the multiset from a manifest.
                 pub fn from_manifest(vec: &Vec<Vec<u8>>) -> Self {
                     let mut multiset = DirsMultiset {
                         inner: HashMap::new(),
                     };
                     for filename in vec {
                         multiset.add_path(filename);
                     }
                     multiset
                 }
                 /// Increases the count of deepest directory contained in the path.
                 ///
                 /// If the directory is not yet in the map, adds its parents.
                 pub fn add_path(&mut self, path: &[u8]) {
                     for subpath in files::find_dirs(path) {
                         if let Some(val) = self.inner.get_mut(subpath) {
                             *val += 1;
                             break;
                         }
                         self.inner.insert(subpath.to_owned(), 1);
                     }
                 }
                 /// Decreases the count of deepest directory contained in the path.
                 ///
                 /// If it is the only reference, decreases all parents until one is
                 /// removed.
                 /// If the directory is not in the map, something horrible has happened.
                 pub fn delete_path(
                     &mut self,
                     path: &[u8],
                 ) -> Result<(), DirstateMapError> {
                     for subpath in files::find_dirs(path) {
                         match self.inner.entry(subpath.to_owned()) {
                             Entry::Occupied(mut entry) => {
                                 let val = entry.get().clone();
                                 if val > 1 {
                                     entry.insert(val - 1);
                                     break;
                                 }
                                 entry.remove();
                             }
                             Entry::Vacant(_) => {
                                 return Err(DirstateMapError::PathNotFound(
                                     path.to_owned(),
                                 ))
                             }
                         };
                     }
                     Ok(())
                 }
                 pub fn contains(&self, key: &[u8]) -> bool {
                     self.inner.contains_key(key)
                 }
                 pub fn iter(&self) -> impl Iterator<Item = &Vec<u8>> {
                     self.inner.keys()
                 }
                 pub fn len(&self) -> usize {
                     self.inner.len()
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use std::collections::HashMap;
                 #[test]
                 fn test_delete_path_path_not_found() {
                     let mut map = DirsMultiset::from_manifest(&vec![]);
                     let path = b"doesnotexist/";
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(path.to_vec())),
                         map.delete_path(path)
                     );
                 }
                 #[test]
                 fn test_delete_path_empty_path() {
                     let mut map = DirsMultiset::from_manifest(&vec![vec![]]);
                     let path = b"";
                     assert_eq!(Ok(()), map.delete_path(path));
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(path.to_vec())),
                         map.delete_path(path)
                     );
                 }
                 #[test]
                 fn test_delete_path_successful() {
                     let mut map = DirsMultiset {
                         inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
                             .iter()
                             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
                             .collect(),
                     };
                     assert_eq!(Ok(()), map.delete_path(b"a/b/"));
                     assert_eq!(Ok(()), map.delete_path(b"a/b/"));
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
                         map.delete_path(b"a/b/")
                     );
                     assert_eq!(2, *map.inner.get(&b"a".to_vec()).unwrap());
                     assert_eq!(1, *map.inner.get(&b"a/c".to_vec()).unwrap());
                     eprintln!("{:?}", map);
                     assert_eq!(Ok(()), map.delete_path(b"a/"));
                     eprintln!("{:?}", map);
                     assert_eq!(Ok(()), map.delete_path(b"a/c/"));
                     assert_eq!(
                         Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
                         map.delete_path(b"a/c/")
                     );
                 }
                 #[test]
                 fn test_add_path_empty_path() {
                     let mut map = DirsMultiset::from_manifest(&vec![]);
                     let path = b"";
                     map.add_path(path);
                     assert_eq!(1, map.len());
                 }
                 #[test]
                 fn test_add_path_successful() {
                     let mut map = DirsMultiset::from_manifest(&vec![]);
                     map.add_path(b"a/");
                     assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
                     assert_eq!(1, *map.inner.get(&Vec::new()).unwrap());
                     assert_eq!(2, map.len());
                     // Non directory should be ignored
                     map.add_path(b"a");
                     assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
                     assert_eq!(2, map.len());
                     // Non directory will still add its base
                     map.add_path(b"a/b");
                     assert_eq!(2, *map.inner.get(&b"a".to_vec()).unwrap());
                     assert_eq!(2, map.len());
                     // Duplicate path works
                     map.add_path(b"a/");
                     assert_eq!(3, *map.inner.get(&b"a".to_vec()).unwrap());
                     // Nested dir adds to its base
                     map.add_path(b"a/b/");
                     assert_eq!(4, *map.inner.get(&b"a".to_vec()).unwrap());
                     assert_eq!(1, *map.inner.get(&b"a/b".to_vec()).unwrap());
                     // but not its base's base, because it already existed
                     map.add_path(b"a/b/c/");
                     assert_eq!(4, *map.inner.get(&b"a".to_vec()).unwrap());
                     assert_eq!(2, *map.inner.get(&b"a/b".to_vec()).unwrap());
                     map.add_path(b"a/c/");
                     assert_eq!(1, *map.inner.get(&b"a/c".to_vec()).unwrap());
                     let expected = DirsMultiset {
                         inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
                             .iter()
                             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
                             .collect(),
                     };
                     assert_eq!(map, expected);
                 }
                 #[test]
                 fn test_dirsmultiset_new_empty() {
                     let new = DirsMultiset::from_manifest(&vec![]);
                     let expected = DirsMultiset {
                         inner: HashMap::new(),
                     };
                     assert_eq!(expected, new);
                     let new = DirsMultiset::from_dirstate(&HashMap::new(), None);
                     let expected = DirsMultiset {
                         inner: HashMap::new(),
                     };
                     assert_eq!(expected, new);
                 }
                 #[test]
                 fn test_dirsmultiset_new_no_skip() {
                     let input_vec = ["a/", "b/", "a/c", "a/d/"]
                         .iter()
                         .map(|e| e.as_bytes().to_vec())
                         .collect();
                     let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
                         .iter()
                         .map(|(k, v)| (k.as_bytes().to_vec(), *v))
                         .collect();
                     let new = DirsMultiset::from_manifest(&input_vec);
                     let expected = DirsMultiset {
                         inner: expected_inner,
                     };
                     assert_eq!(expected, new);
                     let input_map = ["a/", "b/", "a/c", "a/d/"]
                         .iter()
                         .map(|f| {
                             (
                                 f.as_bytes().to_vec(),
                                 DirstateEntry {
                                     state: EntryState::Normal,
                                     mode: 0,
                                     mtime: 0,
                                     size: 0,
                                 },
                             )
                         })
                         .collect();
                     let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
                         .iter()
                         .map(|(k, v)| (k.as_bytes().to_vec(), *v))
                         .collect();
                     let new = DirsMultiset::from_dirstate(&input_map, None);
                     let expected = DirsMultiset {
                         inner: expected_inner,
                     };
                     assert_eq!(expected, new);
                 }
                 #[test]
                 fn test_dirsmultiset_new_skip() {
                     let input_map = [
                         ("a/", EntryState::Normal),
                         ("a/b/", EntryState::Normal),
                         ("a/c", EntryState::Removed),
                         ("a/d/", EntryState::Merged),
                     ]
                     .iter()
                     .map(|(f, state)| {
                         (
                             f.as_bytes().to_vec(),
                             DirstateEntry {
                                 state: *state,
                                 mode: 0,
                                 mtime: 0,
                                 size: 0,
                             },
                         )
                     })
                     .collect();
                     // "a" incremented with "a/c" and "a/d/"
                     let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
                         .iter()
                         .map(|(k, v)| (k.as_bytes().to_vec(), *v))
                         .collect();
                     let new =
                         DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal));
                     let expected = DirsMultiset {
                         inner: expected_inner,
                     };
                     assert_eq!(expected, new);
                 }
             }

rust/hg-core/src/discovery.rs

0 0 -1

             // discovery.rs
             //
             // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Discovery operations
             //!
             //! This is a Rust counterpart to the `partialdiscovery` class of
             //! `mercurial.setdiscovery`
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::ancestors::MissingAncestors;
             use crate::dagops;
             use rand::seq::SliceRandom;
             use rand::{thread_rng, RngCore, SeedableRng};
             use std::cmp::{max, min};
             use std::collections::{HashMap, HashSet, VecDeque};
             type Rng = rand_pcg::Pcg32;
             pub struct PartialDiscovery<G: Graph + Clone> {
                 target_heads: Option<Vec<Revision>>,
                 graph: G, // plays the role of self._repo
                 common: MissingAncestors<G>,
                 undecided: Option<HashSet<Revision>>,
                 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
                 missing: HashSet<Revision>,
                 rng: Rng,
                 respect_size: bool,
                 randomize: bool,
             }
             pub struct DiscoveryStats {
                 pub undecided: Option<usize>,
             }
             /// Update an existing sample to match the expected size
             ///
             /// The sample is updated with revisions exponentially distant from each
             /// element of `heads`.
             ///
             /// If a target size is specified, the sampling will stop once this size is
             /// reached. Otherwise sampling will happen until roots of the <revs> set are
             /// reached.
             ///
             /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
             ///   represented by `parentfn`
             /// - `heads`: set of DAG head revs
             /// - `sample`: a sample to update
             /// - `parentfn`: a callable to resolve parents for a revision
             /// - `quicksamplesize`: optional target size of the sample
             fn update_sample<I>(
                 revs: Option<&HashSet<Revision>>,
                 heads: impl IntoIterator<Item = Revision>,
                 sample: &mut HashSet<Revision>,
                 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                 quicksamplesize: Option<usize>,
             ) -> Result<(), GraphError>
             where
                 I: Iterator<Item = Revision>,
             {
                 let mut distances: HashMap<Revision, u32> = HashMap::new();
                 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                 let mut factor: u32 = 1;
                 let mut seen: HashSet<Revision> = HashSet::new();
                 while let Some(current) = visit.pop_front() {
                     if !seen.insert(current) {
                         continue;
                     }
                     let d = *distances.entry(current).or_insert(1);
                     if d > factor {
                         factor *= 2;
                     }
                     if d == factor {
                         sample.insert(current);
                         if let Some(sz) = quicksamplesize {
                             if sample.len() >= sz {
                                 return Ok(());
                             }
                         }
                     }
                     for p in parentsfn(current)? {
                         if let Some(revs) = revs {
                             if !revs.contains(&p) {
                                 continue;
                             }
                         }
                         distances.entry(p).or_insert(d + 1);
                         visit.push_back(p);
                     }
                 }
                 Ok(())
             }
             struct ParentsIterator {
                 parents: [Revision; 2],
                 cur: usize,
             }
             impl ParentsIterator {
                 fn graph_parents(
                     graph: &impl Graph,
                     r: Revision,
                 ) -> Result<ParentsIterator, GraphError> {
                     Ok(ParentsIterator {
                         parents: graph.parents(r)?,
                         cur: 0,
                     })
                 }
             }
             impl Iterator for ParentsIterator {
                 type Item = Revision;
                 fn next(&mut self) -> Option<Revision> {
                     if self.cur > 1 {
                         return None;
                     }
                     let rev = self.parents[self.cur];
                     self.cur += 1;
                     if rev == NULL_REVISION {
                         return self.next();
                     }
                     Some(rev)
                 }
             }
             impl<G: Graph + Clone> PartialDiscovery<G> {
                 /// Create a PartialDiscovery object, with the intent
                 /// of comparing our `::<target_heads>` revset to the contents of another
                 /// repo.
                 ///
                 /// For now `target_heads` is passed as a vector, and will be used
                 /// at the first call to `ensure_undecided()`.
                 ///
                 /// If we want to make the signature more flexible,
                 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                 /// object since we'll keep it in the meanwhile
                 ///
                 /// The `respect_size` boolean controls how the sampling methods
                 /// will interpret the size argument requested by the caller. If it's
                 /// `false`, they are allowed to produce a sample whose size is more
                 /// appropriate to the situation (typically bigger).
                 ///
                 /// The `randomize` boolean affects sampling, and specifically how
                 /// limiting or last-minute expanding is been done:
                 ///
                 /// If `true`, both will perform random picking from `self.undecided`.
                 /// This is currently the best for actual discoveries.
                 ///
                 /// If `false`, a reproductible picking strategy is performed. This is
                 /// useful for integration tests.
                 pub fn new(
                     graph: G,
                     target_heads: Vec<Revision>,
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     let mut seed: [u8; 16] = [0; 16];
                     if randomize {
                         thread_rng().fill_bytes(&mut seed);
                     }
                     Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                 }
                 pub fn new_with_seed(
                     graph: G,
                     target_heads: Vec<Revision>,
                     seed: [u8; 16],
                     respect_size: bool,
                     randomize: bool,
                 ) -> Self {
                     PartialDiscovery {
                         undecided: None,
                         children_cache: None,
                         target_heads: Some(target_heads),
                         graph: graph.clone(),
                         common: MissingAncestors::new(graph, vec![]),
                         missing: HashSet::new(),
                         rng: Rng::from_seed(seed),
                         respect_size: respect_size,
                         randomize: randomize,
                     }
                 }
                 /// Extract at most `size` random elements from sample and return them
                 /// as a vector
                 fn limit_sample(
                     &mut self,
                     mut sample: Vec<Revision>,
                     size: usize,
                 ) -> Vec<Revision> {
                     if !self.randomize {
                         sample.sort();
                         sample.truncate(size);
                         return sample;
                     }
                     let sample_len = sample.len();
                     if sample_len <= size {
                         return sample;
                     }
                     let rng = &mut self.rng;
                     let dropped_size = sample_len - size;
                     let limited_slice = if size < dropped_size {
                         sample.partial_shuffle(rng, size).0
                     } else {
                         sample.partial_shuffle(rng, dropped_size).1
                     };
                     limited_slice.to_owned()
                 }
                 /// Register revisions known as being common
                 pub fn add_common_revisions(
                     &mut self,
                     common: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let before_len = self.common.get_bases().len();
                     self.common.add_bases(common);
                     if self.common.get_bases().len() == before_len {
                         return Ok(());
                     }
                     if let Some(ref mut undecided) = self.undecided {
                         self.common.remove_ancestors_from(undecided)?;
                     }
                     Ok(())
                 }
                 /// Register revisions known as being missing
                 ///
                 /// # Performance note
                 ///
                 /// Except in the most trivial case, the first call of this method has
                 /// the side effect of computing `self.undecided` set for the first time,
                 /// and the related caches it might need for efficiency of its internal
                 /// computation. This is typically faster if more information is
                 /// available in `self.common`. Therefore, for good performance, the
                 /// caller should avoid calling this too early.
                 pub fn add_missing_revisions(
                     &mut self,
                     missing: impl IntoIterator<Item = Revision>,
                 ) -> Result<(), GraphError> {
                     let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
                     if tovisit.is_empty() {
                         return Ok(());
                     }
                     self.ensure_children_cache()?;
                     self.ensure_undecided()?; // for safety of possible future refactors
                     let children = self.children_cache.as_ref().unwrap();
                     let mut seen: HashSet<Revision> = HashSet::new();
                     let undecided_mut = self.undecided.as_mut().unwrap();
                     while let Some(rev) = tovisit.pop_front() {
                         if !self.missing.insert(rev) {
                             // either it's known to be missing from a previous
                             // invocation, and there's no need to iterate on its
                             // children (we now they are all missing)
                             // or it's from a previous iteration of this loop
                             // and its children have already been queued
                             continue;
                         }
                         undecided_mut.remove(&rev);
                         match children.get(&rev) {
                             None => {
                                 continue;
                             }
                             Some(this_children) => {
                                 for child in this_children.iter().cloned() {
                                     if seen.insert(child) {
                                         tovisit.push_back(child);
                                     }
                                 }
                             }
                         }
                     }
                     Ok(())
                 }
                 /// Do we have any information about the peer?
                 pub fn has_info(&self) -> bool {
                     self.common.has_bases()
                 }
                 /// Did we acquire full knowledge of our Revisions that the peer has?
                 pub fn is_complete(&self) -> bool {
                     self.undecided.as_ref().map_or(false, |s| s.is_empty())
                 }
                 /// Return the heads of the currently known common set of revisions.
                 ///
                 /// If the discovery process is not complete (see `is_complete()`), the
                 /// caller must be aware that this is an intermediate state.
                 ///
                 /// On the other hand, if it is complete, then this is currently
                 /// the only way to retrieve the end results of the discovery process.
                 ///
                 /// We may introduce in the future an `into_common_heads` call that
                 /// would be more appropriate for normal Rust callers, dropping `self`
                 /// if it is complete.
                 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     self.common.bases_heads()
                 }
                 /// Force first computation of `self.undecided`
                 ///
                 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                 /// unwrapped to get workable immutable or mutable references without
                 /// any panic.
                 ///
                 /// This is an imperative call instead of an access with added lazyness
                 /// to reduce easily the scope of mutable borrow for the caller,
                 /// compared to undecided(&'a mut self) -> &'a… that would keep it
                 /// as long as the resulting immutable one.
                 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                     if self.undecided.is_some() {
                         return Ok(());
                     }
                     let tgt = self.target_heads.take().unwrap();
                     self.undecided =
                         Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                     Ok(())
                 }
                 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                     if self.children_cache.is_some() {
                         return Ok(());
                     }
                     self.ensure_undecided()?;
                     let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
                     for &rev in self.undecided.as_ref().unwrap() {
                         for p in ParentsIterator::graph_parents(&self.graph, rev)? {
                             children.entry(p).or_insert_with(|| Vec::new()).push(rev);
                         }
                     }
                     self.children_cache = Some(children);
                     Ok(())
                 }
                 /// Provide statistics about the current state of the discovery process
                 pub fn stats(&self) -> DiscoveryStats {
                     DiscoveryStats {
                         undecided: self.undecided.as_ref().map(|s| s.len()),
                     }
                 }
                 pub fn take_quick_sample(
                     &mut self,
                     headrevs: impl IntoIterator<Item = Revision>,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     self.ensure_undecided()?;
                     let mut sample = {
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok(undecided.iter().cloned().collect());
                         }
                         dagops::heads(&self.graph, undecided.iter())?
                     };
                     if sample.len() >= size {
                         return Ok(self.limit_sample(sample.into_iter().collect(), size));
                     }
                     update_sample(
                         None,
                         headrevs,
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         Some(size),
                     )?;
                     Ok(sample.into_iter().collect())
                 }
                 /// Extract a sample from `self.undecided`, going from its heads and roots.
                 ///
                 /// The `size` parameter is used to avoid useless computations if
                 /// it turns out to be bigger than the whole set of undecided Revisions.
                 ///
                 /// The sample is taken by using `update_sample` from the heads, then
                 /// from the roots, working on the reverse DAG,
                 /// expressed by `self.children_cache`.
                 ///
                 /// No effort is being made to complete or limit the sample to `size`
                 /// but this method returns another interesting size that it derives
                 /// from its knowledge of the structure of the various sets, leaving
                 /// to the caller the decision to use it or not.
                 fn bidirectional_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<(HashSet<Revision>, usize), GraphError> {
                     self.ensure_undecided()?;
                     {
                         // we don't want to compute children_cache before this
                         // but doing it after extracting self.undecided takes a mutable
                         // ref to self while a shareable one is still active.
                         let undecided = self.undecided.as_ref().unwrap();
                         if undecided.len() <= size {
                             return Ok((undecided.clone(), size));
                         }
                     }
                     self.ensure_children_cache()?;
                     let revs = self.undecided.as_ref().unwrap();
                     let mut sample: HashSet<Revision> = revs.clone();
                     // it's possible that leveraging the children cache would be more
                     // efficient here
                     dagops::retain_heads(&self.graph, &mut sample)?;
                     let revsheads = sample.clone(); // was again heads(revs) in python
                     // update from heads
                     update_sample(
                         Some(revs),
                         revsheads.iter().cloned(),
                         &mut sample,
                         |r| ParentsIterator::graph_parents(&self.graph, r),
                         None,
                     )?;
                     // update from roots
                     let revroots: HashSet<Revision> =
                         dagops::roots(&self.graph, revs)?.into_iter().collect();
                     let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                     let children = self.children_cache.as_ref().unwrap();
                     let empty_vec: Vec<Revision> = Vec::new();
                     update_sample(
                         Some(revs),
                         revroots,
                         &mut sample,
                         |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                         None,
                     )?;
                     Ok((sample, prescribed_size))
                 }
                 /// Fill up sample up to the wished size with random undecided Revisions.
                 ///
                 /// This is intended to be used as a last resort completion if the
                 /// regular sampling algorithm returns too few elements.
                 fn random_complete_sample(
                     &mut self,
                     sample: &mut Vec<Revision>,
                     size: usize,
                 ) {
                     let sample_len = sample.len();
                     if size <= sample_len {
                         return;
                     }
                     let take_from: Vec<Revision> = self
                         .undecided
                         .as_ref()
                         .unwrap()
                         .iter()
                         .filter(|&r| !sample.contains(r))
                         .cloned()
                         .collect();
                     sample.extend(self.limit_sample(take_from, size - sample_len));
                 }
                 pub fn take_full_sample(
                     &mut self,
                     size: usize,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                     let size = if self.respect_size {
                         size
                     } else {
                         prescribed_size
                     };
                     let mut sample =
                         self.limit_sample(sample_set.into_iter().collect(), size);
                     self.random_complete_sample(&mut sample, size);
                     Ok(sample)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::testing::SampleGraph;
                 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                 ///
                 /// To avoid actual randomness in these tests, we give it a fixed
                 /// random seed, but by default we'll test the random version.
                 fn full_disco() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![10, 11, 12, 13],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                 ///
                 /// To avoid actual randomness in tests, we give it a fixed random seed.
                 fn disco12() -> PartialDiscovery<SampleGraph> {
                     PartialDiscovery::new_with_seed(
                         SampleGraph,
                         vec![12],
                         [0; 16],
                         true,
                         true,
                     )
                 }
                 fn sorted_undecided(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.undecided.as_ref().unwrap().iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                     let mut as_vec: Vec<Revision> =
                         disco.missing.iter().cloned().collect();
                     as_vec.sort();
                     as_vec
                 }
                 fn sorted_common_heads(
                     disco: &PartialDiscovery<SampleGraph>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     let mut as_vec: Vec<Revision> =
                         disco.common_heads()?.iter().cloned().collect();
                     as_vec.sort();
                     Ok(as_vec)
                 }
                 #[test]
                 fn test_add_common_get_undecided() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     assert_eq!(disco.undecided, None);
                     assert!(!disco.has_info());
                     assert_eq!(disco.stats().undecided, None);
                     disco.add_common_revisions(vec![11, 12])?;
                     assert!(disco.has_info());
                     assert!(!disco.is_complete());
                     assert!(disco.missing.is_empty());
                     // add_common_revisions did not trigger a premature computation
                     // of `undecided`, let's check that and ask for them
                     assert_eq!(disco.undecided, None);
                     disco.ensure_undecided()?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                     assert_eq!(disco.stats().undecided, Some(4));
                     Ok(())
                 }
                 /// in this test, we pretend that our peer misses exactly (8+10)::
                 /// and we're comparing all our repo to it (as in a bare push)
                 #[test]
                 fn test_discovery() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![11, 12])?;
                     disco.add_missing_revisions(vec![8, 10])?;
                     assert_eq!(sorted_undecided(&disco), vec![5]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(!disco.is_complete());
                     disco.add_common_revisions(vec![5])?;
                     assert_eq!(sorted_undecided(&disco), vec![]);
                     assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                     assert!(disco.is_complete());
                     assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_add_missing_early_continue() -> Result<(), GraphError> {
                     eprintln!("test_add_missing_early_stop");
                     let mut disco = full_disco();
                     disco.add_common_revisions(vec![13, 3, 4])?;
                     disco.ensure_children_cache()?;
                     // 12 is grand-child of 6 through 9
                     // passing them in this order maximizes the chances of the
                     // early continue to do the wrong thing
                     disco.add_missing_revisions(vec![6, 9, 12])?;
                     assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
                     assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
                     assert!(!disco.is_complete());
                     Ok(())
                 }
                 #[test]
                 fn test_limit_sample_no_need_to() {
                     let sample = vec![1, 2, 3, 4];
                     assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                 }
                 #[test]
                 fn test_limit_sample_less_than_half() {
                     assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
                 }
                 #[test]
                 fn test_limit_sample_more_than_half() {
                     assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
                 }
                 #[test]
                 fn test_limit_sample_no_random() {
                     let mut disco = full_disco();
                     disco.randomize = false;
                     assert_eq!(
                         disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
                         vec![1, 3, 5, 7]
                     );
                 }
                 #[test]
                 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((1..=13).collect());
                     let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                     sample_vec.sort();
                     assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                     Ok(())
                 }
                 #[test]
                 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                     let mut disco = disco12();
                     disco.ensure_undecided()?;
                     let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
                     sample_vec.sort();
                     // r12's only parent is r9, whose unique grand-parent through the
                     // diamond shape is r4. This ends there because the distance from r4
                     // to the root is only 3.
                     assert_eq!(sample_vec, vec![4, 9, 12]);
                     Ok(())
                 }
                 #[test]
                 fn test_children_cache() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.ensure_children_cache()?;
                     let cache = disco.children_cache.unwrap();
                     assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
                     assert_eq!(cache.get(&10).cloned(), None);
                     let mut children_4 = cache.get(&4).cloned().unwrap();
                     children_4.sort();
                     assert_eq!(children_4, vec![5, 6, 7]);
                     let mut children_7 = cache.get(&7).cloned().unwrap();
                     children_7.sort();
                     assert_eq!(children_7, vec![9, 11]);
                     Ok(())
                 }
                 #[test]
                 fn test_complete_sample() {
                     let mut disco = full_disco();
                     let undecided: HashSet<Revision> =
                         [4, 7, 9, 2, 3].iter().cloned().collect();
                     disco.undecided = Some(undecided);
                     let mut sample = vec![0];
                     disco.random_complete_sample(&mut sample, 3);
                     assert_eq!(sample.len(), 3);
                     let mut sample = vec![2, 4, 7];
                     disco.random_complete_sample(&mut sample, 1);
                     assert_eq!(sample.len(), 3);
                 }
                 #[test]
                 fn test_bidirectional_sample() -> Result<(), GraphError> {
                     let mut disco = full_disco();
                     disco.undecided = Some((0..=13).into_iter().collect());
                     let (sample_set, size) = disco.bidirectional_sample(7)?;
                     assert_eq!(size, 7);
                     let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                     sample.sort();
                     // our DAG is a bit too small for the results to be really interesting
                     // at least it shows that
                     // - we went both ways
                     // - we didn't take all Revisions (6 is not in the sample)
                     assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                     Ok(())
                 }
             }

rust/hg-core/src/filepatterns.rs

0 +2 -1

             // filepatterns.rs
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Handling of Mercurial-specific patterns.
             use crate::{
                 utils::{files::get_path_from_bytes, SliceExt},
                 LineNumber, PatternError, PatternFileError,
             };
             use lazy_static::lazy_static;
             use regex::bytes::{NoExpand, Regex};
             use std::collections::HashMap;
             use std::fs::File;
             use std::io::Read;
             use std::vec::Vec;
             lazy_static! {
                 static ref RE_ESCAPE: Vec<Vec<u8>> = {
                     let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
                     let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
                     for byte in to_escape {
                         v[*byte as usize].insert(0, b'\\');
                     }
                     v
                 };
             }
             /// These are matched in order
             const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
                 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
             #[derive(Debug, Copy, Clone, PartialEq, Eq)]
             pub enum PatternSyntax {
                 Regexp,
                 /// Glob that matches at the front of the path
                 RootGlob,
-                /// Glob that matches at any suffix of the path (still anchored at slashes)
+                /// Glob that matches at any suffix of the path (still anchored at
+                /// slashes)
                 Glob,
                 Path,
                 RelPath,
                 RelGlob,
                 RelRegexp,
                 RootFiles,
             }
             /// Transforms a glob pattern into a regex
             fn glob_to_re(pat: &[u8]) -> Vec<u8> {
                 let mut input = pat;
                 let mut res: Vec<u8> = vec![];
                 let mut group_depth = 0;
                 while let Some((c, rest)) = input.split_first() {
                     input = rest;
                     match c {
                         b'*' => {
                             for (source, repl) in GLOB_REPLACEMENTS {
                                 if input.starts_with(source) {
                                     input = &input[source.len()..];
                                     res.extend(*repl);
                                     break;
                                 }
                             }
                         }
                         b'?' => res.extend(b"."),
                         b'[' => {
                             match input.iter().skip(1).position(|b| *b == b']') {
                                 None => res.extend(b"\\["),
                                 Some(end) => {
                                     // Account for the one we skipped
                                     let end = end + 1;
                                     res.extend(b"[");
                                     for (i, b) in input[..end].iter().enumerate() {
                                         if *b == b'!' && i == 0 {
                                             res.extend(b"^")
                                         } else if *b == b'^' && i == 0 {
                                             res.extend(b"\\^")
                                         } else if *b == b'\\' {
                                             res.extend(b"\\\\")
                                         } else {
                                             res.push(*b)
                                         }
                                     }
                                     res.extend(b"]");
                                     input = &input[end + 1..];
                                 }
                             }
                         }
                         b'{' => {
                             group_depth += 1;
                             res.extend(b"(?:")
                         }
                         b'}' if group_depth > 0 => {
                             group_depth -= 1;
                             res.extend(b")");
                         }
                         b',' if group_depth > 0 => res.extend(b"|"),
                         b'\\' => {
                             let c = {
                                 if let Some((c, rest)) = input.split_first() {
                                     input = rest;
                                     c
                                 } else {
                                     c
                                 }
                             };
                             res.extend(&RE_ESCAPE[*c as usize])
                         }
                         _ => res.extend(&RE_ESCAPE[*c as usize]),
                     }
                 }
                 res
             }
             fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
                 pattern
                     .iter()
                     .flat_map(|c| RE_ESCAPE[*c as usize].clone())
                     .collect()
             }
             fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
                 match kind {
                     b"re" => Ok(PatternSyntax::Regexp),
                     b"path" => Ok(PatternSyntax::Path),
                     b"relpath" => Ok(PatternSyntax::RelPath),
                     b"rootfilesin" => Ok(PatternSyntax::RootFiles),
                     b"relglob" => Ok(PatternSyntax::RelGlob),
                     b"relre" => Ok(PatternSyntax::RelRegexp),
                     b"glob" => Ok(PatternSyntax::Glob),
                     b"rootglob" => Ok(PatternSyntax::RootGlob),
                     _ => Err(PatternError::UnsupportedSyntax(
                         String::from_utf8_lossy(kind).to_string(),
                     )),
                 }
             }
             /// Builds the regex that corresponds to the given pattern.
             /// If within a `syntax: regexp` context, returns the pattern,
             /// otherwise, returns the corresponding regex.
             fn _build_single_regex(
                 syntax: PatternSyntax,
                 pattern: &[u8],
                 globsuffix: &[u8],
             ) -> Vec<u8> {
                 if pattern.is_empty() {
                     return vec![];
                 }
                 match syntax {
                     PatternSyntax::Regexp => pattern.to_owned(),
                     PatternSyntax::RelRegexp => {
                         if pattern[0] == b'^' {
                             return pattern.to_owned();
                         }
                         let mut res = b".*".to_vec();
                         res.extend(pattern);
                         res
                     }
                     PatternSyntax::Path | PatternSyntax::RelPath => {
                         if pattern == b"." {
                             return vec![];
                         }
                         let mut pattern = escape_pattern(pattern);
                         pattern.extend(b"(?:/|$)");
                         pattern
                     }
                     PatternSyntax::RootFiles => {
                         let mut res = if pattern == b"." {
                             vec![]
                         } else {
                             // Pattern is a directory name.
                             let mut as_vec: Vec<u8> = escape_pattern(pattern);
                             as_vec.push(b'/');
                             as_vec
                         };
                         // Anything after the pattern must be a non-directory.
                         res.extend(b"[^/]+$");
                         res
                     }
                     PatternSyntax::Glob
                     | PatternSyntax::RelGlob
                     | PatternSyntax::RootGlob => {
                         let mut res: Vec<u8> = vec![];
                         if syntax == PatternSyntax::RelGlob {
                             res.extend(b"(?:|.*/)");
                         }
                         res.extend(glob_to_re(pattern));
                         res.extend(globsuffix.iter());
                         res
                     }
                 }
             }
             const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
                 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
             /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
             /// that don't need to be transformed into a regex.
             pub fn build_single_regex(
                 kind: &[u8],
                 pat: &[u8],
                 globsuffix: &[u8],
             ) -> Result<Vec<u8>, PatternError> {
                 let enum_kind = parse_pattern_syntax(kind)?;
                 if enum_kind == PatternSyntax::RootGlob
                     && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
                 {
                     let mut escaped = escape_pattern(pat);
                     escaped.extend(b"(?:/|$)");
                     Ok(escaped)
                 } else {
                     Ok(_build_single_regex(enum_kind, pat, globsuffix))
                 }
             }
             lazy_static! {
                 static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
                     let mut m = HashMap::new();
                     m.insert(b"re".as_ref(), b"relre:".as_ref());
                     m.insert(b"regexp".as_ref(), b"relre:".as_ref());
                     m.insert(b"glob".as_ref(), b"relglob:".as_ref());
                     m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
                     m.insert(b"include".as_ref(), b"include".as_ref());
                     m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
                     m
                 };
             }
             pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
             type WarningTuple = (Vec<u8>, Vec<u8>);
             pub fn parse_pattern_file_contents(
                 lines: &[u8],
                 file_path: &[u8],
                 warn: bool,
             ) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
                 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
                 let comment_escape_regex = Regex::new(r"\\#").unwrap();
                 let mut inputs: Vec<PatternTuple> = vec![];
                 let mut warnings: Vec<WarningTuple> = vec![];
                 let mut current_syntax = b"relre:".as_ref();
                 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
                     let line_number = line_number + 1;
                     let line_buf;
                     if line.contains(&b'#') {
                         if let Some(cap) = comment_regex.captures(line) {
                             line = &line[..cap.get(1).unwrap().end()]
                         }
                         line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
                         line = &line_buf;
                     }
                     let mut line = line.trim_end();
                     if line.is_empty() {
                         continue;
                     }
                     if line.starts_with(b"syntax:") {
                         let syntax = line[b"syntax:".len()..].trim();
                         if let Some(rel_syntax) = SYNTAXES.get(syntax) {
                             current_syntax = rel_syntax;
                         } else if warn {
                             warnings.push((file_path.to_owned(), syntax.to_owned()));
                         }
                         continue;
                     }
                     let mut line_syntax: &[u8] = &current_syntax;
                     for (s, rels) in SYNTAXES.iter() {
                         if line.starts_with(rels) {
                             line_syntax = rels;
                             line = &line[rels.len()..];
                             break;
                         } else if line.starts_with(&[s, b":".as_ref()].concat()) {
                             line_syntax = rels;
                             line = &line[s.len() + 1..];
                             break;
                         }
                     }
                     inputs.push((
                         [line_syntax, line].concat(),
                         line_number,
                         line.to_owned(),
                     ));
                 }
                 (inputs, warnings)
             }
             pub fn read_pattern_file(
                 file_path: &[u8],
                 warn: bool,
             ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
                 let mut f = File::open(get_path_from_bytes(file_path))?;
                 let mut contents = Vec::new();
                 f.read_to_end(&mut contents)?;
                 Ok(parse_pattern_file_contents(&contents, file_path, warn))
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 #[test]
                 fn escape_pattern_test() {
                     let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
                     assert_eq!(escape_pattern(untouched), untouched.to_vec());
                     // All escape codes
                     assert_eq!(
                         escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
                         br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
                             .to_vec()
                     );
                 }
                 #[test]
                 fn glob_test() {
                     assert_eq!(glob_to_re(br#"?"#), br#"."#);
                     assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
                     assert_eq!(glob_to_re(br#"**"#), br#".*"#);
                     assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
                     assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
                     assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
                     assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
                     assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
                 }
                 #[test]
                 fn test_parse_pattern_file_contents() {
                     let lines = b"syntax: glob\n*.elc";
                     assert_eq!(
                         vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
                         parse_pattern_file_contents(lines, b"file_path", false).0,
                     );
                     let lines = b"syntax: include\nsyntax: glob";
                     assert_eq!(
                         parse_pattern_file_contents(lines, b"file_path", false).0,
                         vec![]
                     );
                     let lines = b"glob:**.o";
                     assert_eq!(
                         parse_pattern_file_contents(lines, b"file_path", false).0,
                         vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
                     );
                 }
                 #[test]
                 fn test_build_single_regex_shortcut() {
                     assert_eq!(
                         br"(?:/|$)".to_vec(),
                         build_single_regex(b"rootglob", b"", b"").unwrap()
                     );
                     assert_eq!(
                         br"whatever(?:/|$)".to_vec(),
                         build_single_regex(b"rootglob", b"whatever", b"").unwrap()
                     );
                     assert_eq!(
                         br"[^/]*\.o".to_vec(),
                         build_single_regex(b"rootglob", b"*.o", b"").unwrap()
                     );
                 }
             }

rust/hg-core/src/utils.rs

0 +1 -2

             // utils module
             //
             // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Contains useful functions, traits, structs, etc. for use in core.
             pub mod files;
             /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
             ///
             /// # Examples
             ///
             /// ```
             /// use crate::hg::utils::replace_slice;
             /// let mut line = b"I hate writing tests!".to_vec();
             /// replace_slice(&mut line, b"hate", b"love");
             /// assert_eq!(
             ///     line,
             ///     b"I love writing tests!".to_vec()
-            ///);
+            /// );
-            ///
             /// ```
             pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
             where
                 T: Clone + PartialEq,
             {
                 if buf.len() < from.len() || from.len() != to.len() {
                     return;
                 }
                 for i in 0..=buf.len() - from.len() {
                     if buf[i..].starts_with(from) {
                         buf[i..(i + from.len())].clone_from_slice(to);
                     }
                 }
             }
             pub trait SliceExt {
                 fn trim_end(&self) -> &Self;
                 fn trim_start(&self) -> &Self;
                 fn trim(&self) -> &Self;
             }
             fn is_not_whitespace(c: &u8) -> bool {
                 !(*c as char).is_whitespace()
             }
             impl SliceExt for [u8] {
                 fn trim_end(&self) -> &[u8] {
                     if let Some(last) = self.iter().rposition(is_not_whitespace) {
                         &self[..last + 1]
                     } else {
                         &[]
                     }
                 }
                 fn trim_start(&self) -> &[u8] {
                     if let Some(first) = self.iter().position(is_not_whitespace) {
                         &self[first..]
                     } else {
                         &[]
                     }
                 }
                 /// ```
                 /// use hg::utils::SliceExt;
                 /// assert_eq!(
                 ///     b"  to trim  ".trim(),
                 ///     b"to trim"
                 /// );
                 /// assert_eq!(
                 ///     b"to trim  ".trim(),
                 ///     b"to trim"
                 /// );
                 /// assert_eq!(
                 ///     b"  to trim".trim(),
                 ///     b"to trim"
                 /// );
                 /// ```
                 fn trim(&self) -> &[u8] {
                     self.trim_start().trim_end()
                 }
             }

rust/hg-cpython/src/filepatterns.rs

0 +2 -1

             // filepatterns.rs
             //
             // Copyright 2019, Georges Racinet <gracinet@anybox.fr>,
             // Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Bindings for the `hg::filepatterns` module provided by the
             //! `hg-core` crate. From Python, this will be seen as `rustext.filepatterns`
-            //! and can be used as replacement for the the pure `filepatterns` Python module.
+            //! and can be used as replacement for the the pure `filepatterns` Python
+            //! module.
             //!
             use crate::exceptions::{PatternError, PatternFileError};
             use cpython::{
                 PyBytes, PyDict, PyModule, PyObject, PyResult, PyTuple, Python, ToPyObject,
             };
             use hg::{build_single_regex, read_pattern_file, LineNumber, PatternTuple};
             /// Rust does not like functions with different return signatures.
             /// The 3-tuple version is always returned by the hg-core function,
             /// the (potential) conversion is handled at this level since it is not likely
             /// to have any measurable impact on performance.
             ///
             /// The Python implementation passes a function reference for `warn` instead
             /// of a boolean that is used to emit warnings while parsing. The Rust
             /// implementation chooses to accumulate the warnings and propagate them to
             /// Python upon completion. See the `readpatternfile` function in `match.py`
             /// for more details.
             fn read_pattern_file_wrapper(
                 py: Python,
                 file_path: PyObject,
                 warn: bool,
                 source_info: bool,
             ) -> PyResult<PyTuple> {
                 match read_pattern_file(file_path.extract::<PyBytes>(py)?.data(py), warn) {
                     Ok((patterns, warnings)) => {
                         if source_info {
                             let itemgetter = |x: &PatternTuple| {
                                 (PyBytes::new(py, &x.0), x.1, PyBytes::new(py, &x.2))
                             };
                             let results: Vec<(PyBytes, LineNumber, PyBytes)> =
                                 patterns.iter().map(itemgetter).collect();
                             return Ok((results, warnings_to_py_bytes(py, &warnings))
                                 .to_py_object(py));
                         }
                         let itemgetter = |x: &PatternTuple| PyBytes::new(py, &x.0);
                         let results: Vec<PyBytes> =
                             patterns.iter().map(itemgetter).collect();
                         Ok(
                             (results, warnings_to_py_bytes(py, &warnings))
                                 .to_py_object(py),
                         )
                     }
                     Err(e) => Err(PatternFileError::pynew(py, e)),
                 }
             }
             fn warnings_to_py_bytes(
                 py: Python,
                 warnings: &[(Vec<u8>, Vec<u8>)],
             ) -> Vec<(PyBytes, PyBytes)> {
                 warnings
                     .iter()
                     .map(|(path, syn)| (PyBytes::new(py, path), PyBytes::new(py, syn)))
                     .collect()
             }
             fn build_single_regex_wrapper(
                 py: Python,
                 kind: PyObject,
                 pat: PyObject,
                 globsuffix: PyObject,
             ) -> PyResult<PyBytes> {
                 match build_single_regex(
                     kind.extract::<PyBytes>(py)?.data(py),
                     pat.extract::<PyBytes>(py)?.data(py),
                     globsuffix.extract::<PyBytes>(py)?.data(py),
                 ) {
                     Ok(regex) => Ok(PyBytes::new(py, &regex)),
                     Err(e) => Err(PatternError::pynew(py, e)),
                 }
             }
             pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
                 let dotted_name = &format!("{}.filepatterns", package);
                 let m = PyModule::new(py, dotted_name)?;
                 m.add(py, "__package__", package)?;
                 m.add(
                     py,
                     "__doc__",
                     "Patterns files parsing - Rust implementation",
                 )?;
                 m.add(
                     py,
                     "build_single_regex",
                     py_fn!(
                         py,
                         build_single_regex_wrapper(
                             kind: PyObject,
                             pat: PyObject,
                             globsuffix: PyObject
                         )
                     ),
                 )?;
                 m.add(
                     py,
                     "read_pattern_file",
                     py_fn!(
                         py,
                         read_pattern_file_wrapper(
                             file_path: PyObject,
                             warn: bool,
                             source_info: bool
                         )
                     ),
                 )?;
                 m.add(py, "PatternError", py.get_type::<PatternError>())?;
                 let sys = PyModule::import(py, "sys")?;
                 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
                 sys_modules.set_item(py, dotted_name, &m)?;
                 Ok(m)
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages