dagops.rs
313 lines
| 10.1 KiB
| application/rls-services+xml
|
RustLexer
Georges Racinet on ishtar.racinet.fr
|
r41278 | // dagops.rs | ||
// | ||||
// Copyright 2019 Georges Racinet <georges.racinet@octobus.net> | ||||
// | ||||
// This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | ||||
//! Miscellaneous DAG operations | ||||
//! | ||||
//! # Terminology | ||||
Yuya Nishihara
|
r43109 | //! - By *relative heads* of a collection of revision numbers (`Revision`), we | ||
//! mean those revisions that have no children among the collection. | ||||
//! - Similarly *relative roots* of a collection of `Revision`, we mean those | ||||
//! whose parents, if any, don't belong to the collection. | ||||
Raphaël Gomès
|
r52153 | use bitvec::slice::BitSlice; | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | use super::{Graph, GraphError, Revision, NULL_REVISION}; | ||
Raphaël Gomès
|
r52152 | use crate::{ancestors::AncestorsIterator, BaseRevision}; | ||
Georges Racinet
|
r42353 | use std::collections::{BTreeSet, HashSet}; | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | |||
Raphaël Gomès
|
r45500 | fn remove_parents<S: std::hash::BuildHasher>( | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | graph: &impl Graph, | ||
rev: Revision, | ||||
Raphaël Gomès
|
r45500 | set: &mut HashSet<Revision, S>, | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | ) -> Result<(), GraphError> { | ||
for parent in graph.parents(rev)?.iter() { | ||||
if *parent != NULL_REVISION { | ||||
set.remove(parent); | ||||
} | ||||
} | ||||
Ok(()) | ||||
} | ||||
/// Relative heads out of some revisions, passed as an iterator. | ||||
/// | ||||
/// These heads are defined as those revisions that have no children | ||||
/// among those emitted by the iterator. | ||||
/// | ||||
/// # Performance notes | ||||
/// Internally, this clones the iterator, and builds a `HashSet` out of it. | ||||
/// | ||||
/// This function takes an `Iterator` instead of `impl IntoIterator` to | ||||
/// guarantee that cloning the iterator doesn't result in cloning the full | ||||
/// construct it comes from. | ||||
pub fn heads<'a>( | ||||
graph: &impl Graph, | ||||
iter_revs: impl Clone + Iterator<Item = &'a Revision>, | ||||
) -> Result<HashSet<Revision>, GraphError> { | ||||
let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect(); | ||||
heads.remove(&NULL_REVISION); | ||||
for rev in iter_revs { | ||||
Georges Racinet
|
r41866 | if *rev != NULL_REVISION { | ||
remove_parents(graph, *rev, &mut heads)?; | ||||
} | ||||
Georges Racinet on ishtar.racinet.fr
|
r41278 | } | ||
Ok(heads) | ||||
} | ||||
/// Retain in `revs` only its relative heads. | ||||
/// | ||||
/// This is an in-place operation, so that control of the incoming | ||||
/// set is left to the caller. | ||||
/// - a direct Python binding would probably need to build its own `HashSet` | ||||
/// from an incoming iterable, even if its sole purpose is to extract the | ||||
/// heads. | ||||
/// - a Rust caller can decide whether cloning beforehand is appropriate | ||||
/// | ||||
/// # Performance notes | ||||
/// Internally, this function will store a full copy of `revs` in a `Vec`. | ||||
Raphaël Gomès
|
r45500 | pub fn retain_heads<S: std::hash::BuildHasher>( | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | graph: &impl Graph, | ||
Raphaël Gomès
|
r45500 | revs: &mut HashSet<Revision, S>, | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | ) -> Result<(), GraphError> { | ||
revs.remove(&NULL_REVISION); | ||||
// we need to construct an iterable copy of revs to avoid itering while | ||||
// mutating | ||||
let as_vec: Vec<Revision> = revs.iter().cloned().collect(); | ||||
for rev in as_vec { | ||||
Georges Racinet
|
r41866 | if rev != NULL_REVISION { | ||
remove_parents(graph, rev, revs)?; | ||||
} | ||||
Georges Racinet on ishtar.racinet.fr
|
r41278 | } | ||
Ok(()) | ||||
} | ||||
Raphaël Gomès
|
r52153 | /// Optimized version of `retain_heads` that expects an zeroed bitvec of the | ||
/// size of the graph, to act as a faster but less space-efficient `HashSet`. | ||||
Raphaël Gomès
|
r52152 | /// | ||
/// # Panics | ||||
/// | ||||
/// Can panic if `not_heads` is shorten than the length of graph. | ||||
pub fn retain_heads_fast( | ||||
graph: &impl Graph, | ||||
Raphaël Gomès
|
r52153 | not_heads: &mut BitSlice, | ||
Raphaël Gomès
|
r52152 | filtered_revs: &HashSet<Revision>, | ||
) -> Result<(), GraphError> { | ||||
for idx in (0..not_heads.len()).rev() { | ||||
let rev = Revision(idx as BaseRevision); | ||||
if !not_heads[idx] && filtered_revs.contains(&rev) { | ||||
Raphaël Gomès
|
r52153 | not_heads.get_mut(idx).unwrap().commit(true); | ||
Raphaël Gomès
|
r52152 | continue; | ||
} | ||||
for parent in graph.parents(rev)?.iter() { | ||||
if *parent != NULL_REVISION { | ||||
Raphaël Gomès
|
r52153 | not_heads.get_mut(parent.0 as usize).unwrap().commit(true); | ||
Raphaël Gomès
|
r52152 | } | ||
} | ||||
} | ||||
Ok(()) | ||||
} | ||||
Georges Racinet
|
r42354 | /// Roots of `revs`, passed as a `HashSet` | ||
/// | ||||
/// They are returned in arbitrary order | ||||
Raphaël Gomès
|
r45500 | pub fn roots<G: Graph, S: std::hash::BuildHasher>( | ||
Georges Racinet
|
r42354 | graph: &G, | ||
Raphaël Gomès
|
r45500 | revs: &HashSet<Revision, S>, | ||
Georges Racinet
|
r42354 | ) -> Result<Vec<Revision>, GraphError> { | ||
let mut roots: Vec<Revision> = Vec::new(); | ||||
for rev in revs { | ||||
if graph | ||||
.parents(*rev)? | ||||
.iter() | ||||
.filter(|p| **p != NULL_REVISION) | ||||
.all(|p| !revs.contains(p)) | ||||
{ | ||||
roots.push(*rev); | ||||
} | ||||
} | ||||
Ok(roots) | ||||
} | ||||
Georges Racinet
|
r42353 | /// Compute the topological range between two collections of revisions | ||
/// | ||||
/// This is equivalent to the revset `<roots>::<heads>`. | ||||
/// | ||||
/// Currently, the given `Graph` has to implement `Clone`, which means | ||||
/// actually cloning just a reference-counted Python pointer if | ||||
/// it's passed over through `rust-cpython`. This is due to the internal | ||||
/// use of `AncestorsIterator` | ||||
/// | ||||
/// # Algorithmic details | ||||
/// | ||||
/// This is a two-pass swipe inspired from what `reachableroots2` from | ||||
/// `mercurial.cext.parsers` does to obtain the same results. | ||||
/// | ||||
/// - first, we climb up the DAG from `heads` in topological order, keeping | ||||
/// them in the vector `heads_ancestors` vector, and adding any element of | ||||
/// `roots` we find among them to the resulting range. | ||||
/// - Then, we iterate on that recorded vector so that a revision is always | ||||
/// emitted after its parents and add all revisions whose parents are already | ||||
/// in the range to the results. | ||||
/// | ||||
/// # Performance notes | ||||
/// | ||||
/// The main difference with the C implementation is that | ||||
/// the latter uses a flat array with bit flags, instead of complex structures | ||||
/// like `HashSet`, making it faster in most scenarios. In theory, it's | ||||
/// possible that the present implementation could be more memory efficient | ||||
/// for very large repositories with many branches. | ||||
pub fn range( | ||||
graph: &(impl Graph + Clone), | ||||
roots: impl IntoIterator<Item = Revision>, | ||||
heads: impl IntoIterator<Item = Revision>, | ||||
) -> Result<BTreeSet<Revision>, GraphError> { | ||||
let mut range = BTreeSet::new(); | ||||
let roots: HashSet<Revision> = roots.into_iter().collect(); | ||||
let min_root: Revision = match roots.iter().cloned().min() { | ||||
None => { | ||||
return Ok(range); | ||||
} | ||||
Some(r) => r, | ||||
}; | ||||
// Internally, AncestorsIterator currently maintains a `HashSet` | ||||
// of all seen revision, which is also what we record, albeit in an ordered | ||||
// way. There's room for improvement on this duplication. | ||||
let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?; | ||||
let mut heads_ancestors: Vec<Revision> = Vec::new(); | ||||
for revres in ait { | ||||
let rev = revres?; | ||||
if roots.contains(&rev) { | ||||
range.insert(rev); | ||||
} | ||||
heads_ancestors.push(rev); | ||||
} | ||||
for rev in heads_ancestors.into_iter().rev() { | ||||
for parent in graph.parents(rev)?.iter() { | ||||
if *parent != NULL_REVISION && range.contains(parent) { | ||||
range.insert(rev); | ||||
} | ||||
} | ||||
} | ||||
Ok(range) | ||||
} | ||||
Georges Racinet on ishtar.racinet.fr
|
r41278 | #[cfg(test)] | ||
mod tests { | ||||
use super::*; | ||||
Raphaël Gomès
|
r51872 | use crate::{testing::SampleGraph, BaseRevision}; | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | |||
/// Apply `retain_heads()` to the given slice and return as a sorted `Vec` | ||||
fn retain_heads_sorted( | ||||
graph: &impl Graph, | ||||
Raphaël Gomès
|
r51872 | revs: &[BaseRevision], | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | ) -> Result<Vec<Revision>, GraphError> { | ||
Raphaël Gomès
|
r51872 | let mut revs: HashSet<Revision> = | ||
revs.iter().cloned().map(Revision).collect(); | ||||
Georges Racinet on ishtar.racinet.fr
|
r41278 | retain_heads(graph, &mut revs)?; | ||
let mut as_vec: Vec<Revision> = revs.iter().cloned().collect(); | ||||
Raphaël Gomès
|
r50825 | as_vec.sort_unstable(); | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | Ok(as_vec) | ||
} | ||||
#[test] | ||||
fn test_retain_heads() -> Result<(), GraphError> { | ||||
assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]); | ||||
assert_eq!( | ||||
retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?, | ||||
vec![1, 6, 12] | ||||
); | ||||
assert_eq!( | ||||
retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?, | ||||
vec![3, 5, 8, 9] | ||||
); | ||||
Ok(()) | ||||
} | ||||
/// Apply `heads()` to the given slice and return as a sorted `Vec` | ||||
fn heads_sorted( | ||||
graph: &impl Graph, | ||||
Raphaël Gomès
|
r51872 | revs: &[BaseRevision], | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | ) -> Result<Vec<Revision>, GraphError> { | ||
Raphaël Gomès
|
r52013 | let iter_revs: Vec<_> = revs.iter().cloned().map(Revision).collect(); | ||
Raphaël Gomès
|
r51872 | let heads = heads(graph, iter_revs.iter())?; | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | let mut as_vec: Vec<Revision> = heads.iter().cloned().collect(); | ||
Raphaël Gomès
|
r50825 | as_vec.sort_unstable(); | ||
Georges Racinet on ishtar.racinet.fr
|
r41278 | Ok(as_vec) | ||
} | ||||
#[test] | ||||
fn test_heads() -> Result<(), GraphError> { | ||||
assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]); | ||||
assert_eq!( | ||||
heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?, | ||||
vec![1, 6, 12] | ||||
); | ||||
assert_eq!( | ||||
heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?, | ||||
vec![3, 5, 8, 9] | ||||
); | ||||
Ok(()) | ||||
} | ||||
Georges Racinet
|
r42354 | /// Apply `roots()` and sort the result for easier comparison | ||
fn roots_sorted( | ||||
graph: &impl Graph, | ||||
Raphaël Gomès
|
r51872 | revs: &[BaseRevision], | ||
Georges Racinet
|
r42354 | ) -> Result<Vec<Revision>, GraphError> { | ||
Raphaël Gomès
|
r51872 | let set: HashSet<_> = revs.iter().cloned().map(Revision).collect(); | ||
Raphaël Gomès
|
r45500 | let mut as_vec = roots(graph, &set)?; | ||
Raphaël Gomès
|
r50825 | as_vec.sort_unstable(); | ||
Georges Racinet
|
r42354 | Ok(as_vec) | ||
} | ||||
#[test] | ||||
fn test_roots() -> Result<(), GraphError> { | ||||
assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]); | ||||
assert_eq!( | ||||
roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?, | ||||
vec![0, 4, 12] | ||||
); | ||||
assert_eq!( | ||||
roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?, | ||||
vec![1, 8] | ||||
); | ||||
Ok(()) | ||||
} | ||||
Georges Racinet
|
r42353 | /// Apply `range()` and convert the result into a Vec for easier comparison | ||
fn range_vec( | ||||
graph: impl Graph + Clone, | ||||
Raphaël Gomès
|
r51872 | roots: &[BaseRevision], | ||
heads: &[BaseRevision], | ||||
Georges Racinet
|
r42353 | ) -> Result<Vec<Revision>, GraphError> { | ||
Raphaël Gomès
|
r51872 | range( | ||
&graph, | ||||
roots.iter().cloned().map(Revision), | ||||
heads.iter().cloned().map(Revision), | ||||
) | ||||
.map(|bs| bs.into_iter().collect()) | ||||
Georges Racinet
|
r42353 | } | ||
#[test] | ||||
fn test_range() -> Result<(), GraphError> { | ||||
assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]); | ||||
Raphaël Gomès
|
r51872 | assert_eq!( | ||
range_vec(SampleGraph, &[0], &[8])?, | ||||
Vec::<Revision>::new() | ||||
); | ||||
Georges Racinet
|
r42353 | assert_eq!( | ||
range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?, | ||||
vec![5, 10] | ||||
); | ||||
assert_eq!( | ||||
range_vec(SampleGraph, &[5, 6], &[10, 12])?, | ||||
vec![5, 6, 9, 10, 12] | ||||
); | ||||
Ok(()) | ||||
} | ||||
Georges Racinet on ishtar.racinet.fr
|
r41278 | } | ||