# HG changeset patch # User Georges Racinet # Date 2019-02-04 18:46:57 # Node ID 9060af281be7cbfc7a7a61cc3fdd4e3c8fcfe399 # Parent 97743297008062cd3eac457b616ba759bdd6b4c6 rust: itering less on MissingAncestors.bases for max() Instead of iterating on the whole `self.bases` each time to find its max, we keep the latter in a separate member attribute and keep it up to date in `add_bases()` On a perfdiscovery done on PyPy, with repos prepared with `contrib/discovery-helper.sh 50 100`, this gives a slight improvement (around 0.5% on wall time, but 10% on CPU) before: ! wall 0.172801 comb 0.180000 user 0.180000 sys 0.000000 (median of 541) after: ! wall 0.171798 comb 0.160000 user 0.160000 sys 0.000000 (median of 551) (perf command run time upped because of bigger variability during this test). Differential Revision: https://phab.mercurial-scm.org/D5945 diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -38,6 +38,7 @@ pub struct LazyAncestors { graph: G, bases: HashSet, + max_base: Revision, } impl AncestorsIterator { @@ -209,7 +210,13 @@ impl LazyAncestors impl MissingAncestors { pub fn new(graph: G, bases: impl IntoIterator) -> Self { - MissingAncestors { graph: graph, bases: bases.into_iter().collect() } + let mut created = MissingAncestors { + graph: graph, + bases: HashSet::new(), + max_base: NULL_REVISION, + }; + created.add_bases(bases); + created } pub fn has_bases(&self) -> bool { @@ -232,17 +239,33 @@ impl MissingAncestors { } /// Consumes the object and returns the relative heads of its bases. - pub fn into_bases_heads(mut self) -> Result, GraphError> { + pub fn into_bases_heads( + mut self, + ) -> Result, GraphError> { dagops::retain_heads(&self.graph, &mut self.bases)?; Ok(self.bases) } + /// Add some revisions to `self.bases` + /// + /// Takes care of keeping `self.max_base` up to date. pub fn add_bases( &mut self, new_bases: impl IntoIterator, ) { - self.bases - .extend(new_bases.into_iter().filter(|&rev| rev != NULL_REVISION)); + let mut max_base = self.max_base; + self.bases.extend( + new_bases + .into_iter() + .filter(|&rev| rev != NULL_REVISION) + .map(|r| { + if r > max_base { + max_base = r; + } + r + }), + ); + self.max_base = max_base; } /// Remove all ancestors of self.bases from the revs set (in place) @@ -261,20 +284,16 @@ impl MissingAncestors { } // anything in revs > start is definitely not an ancestor of bases // revs <= start need to be investigated - // TODO optim: if a missingancestors is to be used several times, - // we shouldn't need to iterate each time on bases - let start = match self.bases.iter().cloned().max() { - Some(m) => m, - None => { // self.bases is empty - return Ok(()); - } - }; + if self.max_base == NULL_REVISION { + return Ok(()); + } + // whatever happens, we'll keep at least keepcount of them // knowing this gives us a earlier stop condition than // going all the way to the root - let keepcount = revs.iter().filter(|r| **r > start).count(); + let keepcount = revs.iter().filter(|r| **r > self.max_base).count(); - let mut curr = start; + let mut curr = self.max_base; while curr != NULL_REVISION && revs.len() > keepcount { if self.bases.contains(&curr) { revs.remove(&curr); @@ -285,12 +304,17 @@ impl MissingAncestors { Ok(()) } - /// Add rev's parents to self.bases + /// Add the parents of `rev` to `self.bases` + /// + /// This has no effect on `self.max_base` #[inline] fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> { - // No need to bother the set with inserting NULL_REVISION over and - // over + if rev == NULL_REVISION { + return Ok(()); + } for p in self.graph.parents(rev)?.iter().cloned() { + // No need to bother the set with inserting NULL_REVISION over and + // over if p != NULL_REVISION { self.bases.insert(p); } @@ -320,12 +344,8 @@ impl MissingAncestors { if revs_visit.is_empty() { return Ok(Vec::new()); } - - let max_bases = - bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION); - let max_revs = - revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION); - let start = max(max_bases, max_revs); + let max_revs = revs_visit.iter().cloned().max().unwrap(); + let start = max(self.max_base, max_revs); // TODO heuristics for with_capacity()? let mut missing: Vec = Vec::new(); @@ -571,11 +591,13 @@ mod tests { missing_ancestors.get_bases().iter().cloned().collect(); as_vec.sort(); assert_eq!(as_vec, [1, 3, 5]); + assert_eq!(missing_ancestors.max_base, 5); missing_ancestors.add_bases([3, 7, 8].iter().cloned()); as_vec = missing_ancestors.get_bases().iter().cloned().collect(); as_vec.sort(); assert_eq!(as_vec, [1, 3, 5, 7, 8]); + assert_eq!(missing_ancestors.max_base, 8); as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect(); as_vec.sort(); diff --git a/rust/hg-core/src/dagops.rs b/rust/hg-core/src/dagops.rs --- a/rust/hg-core/src/dagops.rs +++ b/rust/hg-core/src/dagops.rs @@ -46,7 +46,9 @@ pub fn heads<'a>( let mut heads: HashSet = iter_revs.clone().cloned().collect(); heads.remove(&NULL_REVISION); for rev in iter_revs { - remove_parents(graph, *rev, &mut heads)?; + if *rev != NULL_REVISION { + remove_parents(graph, *rev, &mut heads)?; + } } Ok(heads) } @@ -71,7 +73,9 @@ pub fn retain_heads( // mutating let as_vec: Vec = revs.iter().cloned().collect(); for rev in as_vec { - remove_parents(graph, rev, revs)?; + if rev != NULL_REVISION { + remove_parents(graph, rev, revs)?; + } } Ok(()) } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -13,6 +13,11 @@ pub mod testing; // unconditionally bui /// 4 bytes, and are liberally converted to ints, whence the i32 pub type Revision = i32; + +/// Marker expressing the absence of a parent +/// +/// Independently of the actual representation, `NULL_REVISION` is guaranteed +/// to be smaller that all existing revisions. pub const NULL_REVISION: Revision = -1; /// Same as `mercurial.node.wdirrev`