diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -8,6 +8,7 @@ //! Rust versions of generic DAG ancestors algorithms for Mercurial use super::{Graph, GraphError, Revision, NULL_REVISION}; +use std::cmp::max; use std::collections::{BinaryHeap, HashSet}; /// Iterator over the ancestors of a given list of revisions @@ -24,6 +25,11 @@ pub struct AncestorsIterator { stoprev: Revision, } +pub struct MissingAncestors { + graph: G, + bases: HashSet, +} + impl AncestorsIterator { /// Constructor. /// @@ -131,10 +137,187 @@ impl Iterator for AncestorsIte } } +impl MissingAncestors { + pub fn new(graph: G, bases: impl IntoIterator) -> Self { + let mut bases: HashSet = bases.into_iter().collect(); + if bases.is_empty() { + bases.insert(NULL_REVISION); + } + MissingAncestors { graph, bases } + } + + pub fn has_bases(&self) -> bool { + self.bases.iter().any(|&b| b != NULL_REVISION) + } + + /// Return a reference to current bases. + /// + /// This is useful in unit tests, but also setdiscovery.py does + /// read the bases attribute of a ancestor.missingancestors instance. + pub fn get_bases<'a>(&'a self) -> &'a HashSet { + &self.bases + } + + pub fn add_bases( + &mut self, + new_bases: impl IntoIterator, + ) { + self.bases.extend(new_bases); + } + + /// Remove all ancestors of self.bases from the revs set (in place) + pub fn remove_ancestors_from( + &mut self, + revs: &mut HashSet, + ) -> Result<(), GraphError> { + revs.retain(|r| !self.bases.contains(r)); + // the null revision is always an ancestor + revs.remove(&NULL_REVISION); + if revs.is_empty() { + return Ok(()); + } + // anything in revs > start is definitely not an ancestor of bases + // revs <= start need to be investigated + // TODO optim: if a missingancestors is to be used several times, + // we shouldn't need to iterate each time on bases + let start = match self.bases.iter().cloned().max() { + Some(m) => m, + None => { + // bases is empty (shouldn't happen, but let's be safe) + return Ok(()); + } + }; + // whatever happens, we'll keep at least keepcount of them + // knowing this gives us a earlier stop condition than + // going all the way to the root + let keepcount = revs.iter().filter(|r| **r > start).count(); + + let mut curr = start; + while curr != NULL_REVISION && revs.len() > keepcount { + if self.bases.contains(&curr) { + revs.remove(&curr); + self.add_parents(curr)?; + } + curr -= 1; + } + Ok(()) + } + + /// Add rev's parents to self.bases + #[inline] + fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> { + // No need to bother the set with inserting NULL_REVISION over and + // over + for p in self.graph.parents(rev)?.iter().cloned() { + if p != NULL_REVISION { + self.bases.insert(p); + } + } + Ok(()) + } + + /// Return all the ancestors of revs that are not ancestors of self.bases + /// + /// This may include elements from revs. + /// + /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in + /// revision number order, which is a topological order. + pub fn missing_ancestors( + &mut self, + revs: impl IntoIterator, + ) -> Result, GraphError> { + // just for convenience and comparison with Python version + let bases_visit = &mut self.bases; + let mut revs: HashSet = revs + .into_iter() + .filter(|r| !bases_visit.contains(r)) + .collect(); + let revs_visit = &mut revs; + let mut both_visit: HashSet = + revs_visit.intersection(&bases_visit).cloned().collect(); + if revs_visit.is_empty() { + return Ok(Vec::new()); + } + + let max_bases = + bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION); + let max_revs = + revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION); + let start = max(max_bases, max_revs); + + // TODO heuristics for with_capacity()? + let mut missing: Vec = Vec::new(); + for curr in (0..=start).map(|i| start - i) { + if revs_visit.is_empty() { + break; + } + if both_visit.contains(&curr) { + // curr's parents might have made it into revs_visit through + // another path + // TODO optim: Rust's HashSet.remove returns a boolean telling + // if it happened. This will spare us one set lookup + both_visit.remove(&curr); + for p in self.graph.parents(curr)?.iter().cloned() { + if p == NULL_REVISION { + continue; + } + revs_visit.remove(&p); + bases_visit.insert(p); + both_visit.insert(p); + } + continue; + } + // in Rust, one can't just use mutable variables assignation + // to be more straightforward. Instead of Python's + // thisvisit and othervisit, we'll differentiate with a boolean + let this_visit_is_revs = { + if revs_visit.remove(&curr) { + missing.push(curr); + true + } else if bases_visit.contains(&curr) { + false + } else { + // not an ancestor of revs or bases: ignore + continue; + } + }; + + for p in self.graph.parents(curr)?.iter().cloned() { + if p == NULL_REVISION { + continue; + } + let in_other_visit = if this_visit_is_revs { + bases_visit.contains(&p) + } else { + revs_visit.contains(&p) + }; + if in_other_visit || both_visit.contains(&p) { + // p is implicitely in this_visit. + // This means p is or should be in bothvisit + // TODO optim: hence if bothvisit, we look up twice + revs_visit.remove(&p); + bases_visit.insert(p); + both_visit.insert(p); + } else { + // visit later + if this_visit_is_revs { + revs_visit.insert(p); + } else { + bases_visit.insert(p); + } + } + } + } + missing.reverse(); + Ok(missing) + } +} + #[cfg(test)] mod tests { use super::*; + use std::iter::FromIterator; #[derive(Clone, Debug)] struct Stub; @@ -252,4 +435,211 @@ mod tests { AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap(); assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0)))); } + + #[test] + /// Test constructor, add/get bases + fn test_missing_bases() { + let mut missing_ancestors = + MissingAncestors::new(Stub, [5, 3, 1, 3].iter().cloned()); + let mut as_vec: Vec = + missing_ancestors.get_bases().iter().cloned().collect(); + as_vec.sort(); + assert_eq!(as_vec, [1, 3, 5]); + + missing_ancestors.add_bases([3, 7, 8].iter().cloned()); + as_vec = missing_ancestors.get_bases().iter().cloned().collect(); + as_vec.sort(); + assert_eq!(as_vec, [1, 3, 5, 7, 8]); + } + + fn assert_missing_remove( + bases: &[Revision], + revs: &[Revision], + expected: &[Revision], + ) { + let mut missing_ancestors = + MissingAncestors::new(Stub, bases.iter().cloned()); + let mut revset: HashSet = revs.iter().cloned().collect(); + missing_ancestors + .remove_ancestors_from(&mut revset) + .unwrap(); + let mut as_vec: Vec = revset.into_iter().collect(); + as_vec.sort(); + assert_eq!(as_vec.as_slice(), expected); + } + + #[test] + fn test_missing_remove() { + assert_missing_remove( + &[1, 2, 3, 4, 7], + Vec::from_iter(1..10).as_slice(), + &[5, 6, 8, 9], + ); + assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]); + assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]); + } + + fn assert_missing_ancestors( + bases: &[Revision], + revs: &[Revision], + expected: &[Revision], + ) { + let mut missing_ancestors = + MissingAncestors::new(Stub, bases.iter().cloned()); + let missing = missing_ancestors + .missing_ancestors(revs.iter().cloned()) + .unwrap(); + assert_eq!(missing.as_slice(), expected); + } + + #[test] + fn test_missing_ancestors() { + // examples taken from test-ancestors.py by having it run + // on the same graph (both naive and fast Python algs) + assert_missing_ancestors(&[10], &[11], &[3, 7, 11]); + assert_missing_ancestors(&[11], &[10], &[5, 10]); + assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]); + } + + // A Graph represented by a vector whose indices are revisions + // and values are parents of the revisions + type VecGraph = Vec<[Revision; 2]>; + + impl Graph for VecGraph { + fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { + Ok(self[rev as usize]) + } + } + + /// An interesting case found by a random generator similar to + /// the one in test-ancestor.py. An early version of Rust MissingAncestors + /// failed this, yet none of the integration tests of the whole suite + /// catched it. + #[test] + fn test_remove_ancestors_from_case1() { + let graph: VecGraph = vec![ + [NULL_REVISION, NULL_REVISION], + [0, NULL_REVISION], + [1, 0], + [2, 1], + [3, NULL_REVISION], + [4, NULL_REVISION], + [5, 1], + [2, NULL_REVISION], + [7, NULL_REVISION], + [8, NULL_REVISION], + [9, NULL_REVISION], + [10, 1], + [3, NULL_REVISION], + [12, NULL_REVISION], + [13, NULL_REVISION], + [14, NULL_REVISION], + [4, NULL_REVISION], + [16, NULL_REVISION], + [17, NULL_REVISION], + [18, NULL_REVISION], + [19, 11], + [20, NULL_REVISION], + [21, NULL_REVISION], + [22, NULL_REVISION], + [23, NULL_REVISION], + [2, NULL_REVISION], + [3, NULL_REVISION], + [26, 24], + [27, NULL_REVISION], + [28, NULL_REVISION], + [12, NULL_REVISION], + [1, NULL_REVISION], + [1, 9], + [32, NULL_REVISION], + [33, NULL_REVISION], + [34, 31], + [35, NULL_REVISION], + [36, 26], + [37, NULL_REVISION], + [38, NULL_REVISION], + [39, NULL_REVISION], + [40, NULL_REVISION], + [41, NULL_REVISION], + [42, 26], + [0, NULL_REVISION], + [44, NULL_REVISION], + [45, 4], + [40, NULL_REVISION], + [47, NULL_REVISION], + [36, 0], + [49, NULL_REVISION], + [NULL_REVISION, NULL_REVISION], + [51, NULL_REVISION], + [52, NULL_REVISION], + [53, NULL_REVISION], + [14, NULL_REVISION], + [55, NULL_REVISION], + [15, NULL_REVISION], + [23, NULL_REVISION], + [58, NULL_REVISION], + [59, NULL_REVISION], + [2, NULL_REVISION], + [61, 59], + [62, NULL_REVISION], + [63, NULL_REVISION], + [NULL_REVISION, NULL_REVISION], + [65, NULL_REVISION], + [66, NULL_REVISION], + [67, NULL_REVISION], + [68, NULL_REVISION], + [37, 28], + [69, 25], + [71, NULL_REVISION], + [72, NULL_REVISION], + [50, 2], + [74, NULL_REVISION], + [12, NULL_REVISION], + [18, NULL_REVISION], + [77, NULL_REVISION], + [78, NULL_REVISION], + [79, NULL_REVISION], + [43, 33], + [81, NULL_REVISION], + [82, NULL_REVISION], + [83, NULL_REVISION], + [84, 45], + [85, NULL_REVISION], + [86, NULL_REVISION], + [NULL_REVISION, NULL_REVISION], + [88, NULL_REVISION], + [NULL_REVISION, NULL_REVISION], + [76, 83], + [44, NULL_REVISION], + [92, NULL_REVISION], + [93, NULL_REVISION], + [9, NULL_REVISION], + [95, 67], + [96, NULL_REVISION], + [97, NULL_REVISION], + [NULL_REVISION, NULL_REVISION], + ]; + let problem_rev = 28 as Revision; + let problem_base = 70 as Revision; + // making the problem obvious: problem_rev is a parent of problem_base + assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev); + + let mut missing_ancestors: MissingAncestors = + MissingAncestors::new( + graph, + [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6] + .iter() + .cloned(), + ); + assert!(missing_ancestors.bases.contains(&problem_base)); + + let mut revs: HashSet = + [4, 12, 41, 28, 68, 38, 1, 30, 56, 44] + .iter() + .cloned() + .collect(); + missing_ancestors.remove_ancestors_from(&mut revs).unwrap(); + assert!(!revs.contains(&problem_rev)); + } + } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; -pub use ancestors::AncestorsIterator; +pub use ancestors::{AncestorsIterator, MissingAncestors}; /// Mercurial revision numbers /// @@ -15,6 +15,9 @@ pub const NULL_REVISION: Revision = -1; /// The simplest expression of what we need of Mercurial DAGs. pub trait Graph { + /// Return the two parents of the given `Revision`. + /// + /// Each of the parents can be independently `NULL_REVISION` fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>; } diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -182,6 +182,64 @@ graph = {0: [-1, -1], 1: [0, -1], 2: [1, 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7], 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]} +def test_missingancestors_explicit(): + """A few explicit cases, easier to check for catching errors in refactors. + + The bigger graph at the end has been produced by the random generator + above, and we have some evidence that the other tests don't cover it. + """ + for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))), + ({10}, set({11, 12, 13, 14})), + ({7}, set({1, 2, 3, 4, 5})), + )): + print("%% removeancestorsfrom(), example %d" % (i + 1)) + missanc = ancestor.incrementalmissingancestors(graph.get, bases) + missanc.removeancestorsfrom(revs) + print("remaining (sorted): %s" % sorted(list(revs))) + + for i, (bases, revs) in enumerate((({10}, {11}), + ({11}, {10}), + ({7}, {9, 11}), + )): + print("%% missingancestors(), example %d" % (i + 1)) + missanc = ancestor.incrementalmissingancestors(graph.get, bases) + print("return %s" % missanc.missingancestors(revs)) + + print("% removeancestorsfrom(), bigger graph") + vecgraph = [ + [-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1], + [2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1], + [13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1], + [19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1], + [3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9], + [32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1], + [38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1], + [44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1], + [-1, -1], [51, -1], [52, -1], [53, -1], [14, -1], + [55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1], + [61, 59], [62, -1], [63, -1], [-1, -1], [65, -1], + [66, -1], [67, -1], [68, -1], [37, 28], [69, 25], + [71, -1], [72, -1], [50, 2], [74, -1], [12, -1], + [18, -1], [77, -1], [78, -1], [79, -1], [43, 33], + [81, -1], [82, -1], [83, -1], [84, 45], [85, -1], + [86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1], + [92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1], + [-1, -1]] + problem_rev = 28 + problem_base = 70 + # problem_rev is a parent of problem_base, but a faulty implementation + # could forget to remove it. + bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6} + if problem_rev not in vecgraph[problem_base] or problem_base not in bases: + print("Conditions have changed") + missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases) + revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44} + missanc.removeancestorsfrom(revs) + if 28 in revs: + print("Failed!") + else: + print("Ok") + def genlazyancestors(revs, stoprev=0, inclusive=False): print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % (revs, stoprev, inclusive))) @@ -276,6 +334,7 @@ def main(): seed = long(time.time() * 1000) rng = random.Random(seed) + test_missingancestors_explicit() test_missingancestors(seed, rng) test_lazyancestors() test_gca() diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -1,3 +1,17 @@ +% removeancestorsfrom(), example 1 +remaining (sorted): [5, 6, 8, 9] +% removeancestorsfrom(), example 2 +remaining (sorted): [11, 12, 13, 14] +% removeancestorsfrom(), example 3 +remaining (sorted): [3, 5] +% missingancestors(), example 1 +return [3, 7, 11] +% missingancestors(), example 2 +return [5, 10] +% missingancestors(), example 3 +return [3, 6, 9, 11] +% removeancestorsfrom(), bigger graph +Ok % lazy ancestor set for [], stoprev = 0, inclusive = False membership: [] iteration: []