Show More
@@ -1,140 +1,231 | |||
|
1 | 1 | // dagops.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2019 Georges Racinet <georges.racinet@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | //! Miscellaneous DAG operations |
|
9 | 9 | //! |
|
10 | 10 | //! # Terminology |
|
11 | 11 | //! - By *relative heads* of a collection of revision numbers (`Revision`), |
|
12 | 12 | //! we mean those revisions that have no children among the collection. |
|
13 | 13 | //! - Similarly *relative roots* of a collection of `Revision`, we mean |
|
14 | 14 | //! those whose parents, if any, don't belong to the collection. |
|
15 | 15 | use super::{Graph, GraphError, Revision, NULL_REVISION}; |
|
16 | use std::collections::HashSet; | |
|
16 | use crate::ancestors::AncestorsIterator; | |
|
17 | use std::collections::{BTreeSet, HashSet}; | |
|
17 | 18 | |
|
18 | 19 | fn remove_parents( |
|
19 | 20 | graph: &impl Graph, |
|
20 | 21 | rev: Revision, |
|
21 | 22 | set: &mut HashSet<Revision>, |
|
22 | 23 | ) -> Result<(), GraphError> { |
|
23 | 24 | for parent in graph.parents(rev)?.iter() { |
|
24 | 25 | if *parent != NULL_REVISION { |
|
25 | 26 | set.remove(parent); |
|
26 | 27 | } |
|
27 | 28 | } |
|
28 | 29 | Ok(()) |
|
29 | 30 | } |
|
30 | 31 | |
|
31 | 32 | /// Relative heads out of some revisions, passed as an iterator. |
|
32 | 33 | /// |
|
33 | 34 | /// These heads are defined as those revisions that have no children |
|
34 | 35 | /// among those emitted by the iterator. |
|
35 | 36 | /// |
|
36 | 37 | /// # Performance notes |
|
37 | 38 | /// Internally, this clones the iterator, and builds a `HashSet` out of it. |
|
38 | 39 | /// |
|
39 | 40 | /// This function takes an `Iterator` instead of `impl IntoIterator` to |
|
40 | 41 | /// guarantee that cloning the iterator doesn't result in cloning the full |
|
41 | 42 | /// construct it comes from. |
|
42 | 43 | pub fn heads<'a>( |
|
43 | 44 | graph: &impl Graph, |
|
44 | 45 | iter_revs: impl Clone + Iterator<Item = &'a Revision>, |
|
45 | 46 | ) -> Result<HashSet<Revision>, GraphError> { |
|
46 | 47 | let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect(); |
|
47 | 48 | heads.remove(&NULL_REVISION); |
|
48 | 49 | for rev in iter_revs { |
|
49 | 50 | if *rev != NULL_REVISION { |
|
50 | 51 | remove_parents(graph, *rev, &mut heads)?; |
|
51 | 52 | } |
|
52 | 53 | } |
|
53 | 54 | Ok(heads) |
|
54 | 55 | } |
|
55 | 56 | |
|
56 | 57 | /// Retain in `revs` only its relative heads. |
|
57 | 58 | /// |
|
58 | 59 | /// This is an in-place operation, so that control of the incoming |
|
59 | 60 | /// set is left to the caller. |
|
60 | 61 | /// - a direct Python binding would probably need to build its own `HashSet` |
|
61 | 62 | /// from an incoming iterable, even if its sole purpose is to extract the |
|
62 | 63 | /// heads. |
|
63 | 64 | /// - a Rust caller can decide whether cloning beforehand is appropriate |
|
64 | 65 | /// |
|
65 | 66 | /// # Performance notes |
|
66 | 67 | /// Internally, this function will store a full copy of `revs` in a `Vec`. |
|
67 | 68 | pub fn retain_heads( |
|
68 | 69 | graph: &impl Graph, |
|
69 | 70 | revs: &mut HashSet<Revision>, |
|
70 | 71 | ) -> Result<(), GraphError> { |
|
71 | 72 | revs.remove(&NULL_REVISION); |
|
72 | 73 | // we need to construct an iterable copy of revs to avoid itering while |
|
73 | 74 | // mutating |
|
74 | 75 | let as_vec: Vec<Revision> = revs.iter().cloned().collect(); |
|
75 | 76 | for rev in as_vec { |
|
76 | 77 | if rev != NULL_REVISION { |
|
77 | 78 | remove_parents(graph, rev, revs)?; |
|
78 | 79 | } |
|
79 | 80 | } |
|
80 | 81 | Ok(()) |
|
81 | 82 | } |
|
82 | 83 | |
|
84 | /// Compute the topological range between two collections of revisions | |
|
85 | /// | |
|
86 | /// This is equivalent to the revset `<roots>::<heads>`. | |
|
87 | /// | |
|
88 | /// Currently, the given `Graph` has to implement `Clone`, which means | |
|
89 | /// actually cloning just a reference-counted Python pointer if | |
|
90 | /// it's passed over through `rust-cpython`. This is due to the internal | |
|
91 | /// use of `AncestorsIterator` | |
|
92 | /// | |
|
93 | /// # Algorithmic details | |
|
94 | /// | |
|
95 | /// This is a two-pass swipe inspired from what `reachableroots2` from | |
|
96 | /// `mercurial.cext.parsers` does to obtain the same results. | |
|
97 | /// | |
|
98 | /// - first, we climb up the DAG from `heads` in topological order, keeping | |
|
99 | /// them in the vector `heads_ancestors` vector, and adding any element of | |
|
100 | /// `roots` we find among them to the resulting range. | |
|
101 | /// - Then, we iterate on that recorded vector so that a revision is always | |
|
102 | /// emitted after its parents and add all revisions whose parents are already | |
|
103 | /// in the range to the results. | |
|
104 | /// | |
|
105 | /// # Performance notes | |
|
106 | /// | |
|
107 | /// The main difference with the C implementation is that | |
|
108 | /// the latter uses a flat array with bit flags, instead of complex structures | |
|
109 | /// like `HashSet`, making it faster in most scenarios. In theory, it's | |
|
110 | /// possible that the present implementation could be more memory efficient | |
|
111 | /// for very large repositories with many branches. | |
|
112 | pub fn range( | |
|
113 | graph: &(impl Graph + Clone), | |
|
114 | roots: impl IntoIterator<Item = Revision>, | |
|
115 | heads: impl IntoIterator<Item = Revision>, | |
|
116 | ) -> Result<BTreeSet<Revision>, GraphError> { | |
|
117 | let mut range = BTreeSet::new(); | |
|
118 | let roots: HashSet<Revision> = roots.into_iter().collect(); | |
|
119 | let min_root: Revision = match roots.iter().cloned().min() { | |
|
120 | None => { | |
|
121 | return Ok(range); | |
|
122 | } | |
|
123 | Some(r) => r, | |
|
124 | }; | |
|
125 | ||
|
126 | // Internally, AncestorsIterator currently maintains a `HashSet` | |
|
127 | // of all seen revision, which is also what we record, albeit in an ordered | |
|
128 | // way. There's room for improvement on this duplication. | |
|
129 | let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?; | |
|
130 | let mut heads_ancestors: Vec<Revision> = Vec::new(); | |
|
131 | for revres in ait { | |
|
132 | let rev = revres?; | |
|
133 | if roots.contains(&rev) { | |
|
134 | range.insert(rev); | |
|
135 | } | |
|
136 | heads_ancestors.push(rev); | |
|
137 | } | |
|
138 | ||
|
139 | for rev in heads_ancestors.into_iter().rev() { | |
|
140 | for parent in graph.parents(rev)?.iter() { | |
|
141 | if *parent != NULL_REVISION && range.contains(parent) { | |
|
142 | range.insert(rev); | |
|
143 | } | |
|
144 | } | |
|
145 | } | |
|
146 | Ok(range) | |
|
147 | } | |
|
148 | ||
|
83 | 149 | #[cfg(test)] |
|
84 | 150 | mod tests { |
|
85 | 151 | |
|
86 | 152 | use super::*; |
|
87 | 153 | use crate::testing::SampleGraph; |
|
88 | 154 | |
|
89 | 155 | /// Apply `retain_heads()` to the given slice and return as a sorted `Vec` |
|
90 | 156 | fn retain_heads_sorted( |
|
91 | 157 | graph: &impl Graph, |
|
92 | 158 | revs: &[Revision], |
|
93 | 159 | ) -> Result<Vec<Revision>, GraphError> { |
|
94 | 160 | let mut revs: HashSet<Revision> = revs.iter().cloned().collect(); |
|
95 | 161 | retain_heads(graph, &mut revs)?; |
|
96 | 162 | let mut as_vec: Vec<Revision> = revs.iter().cloned().collect(); |
|
97 | 163 | as_vec.sort(); |
|
98 | 164 | Ok(as_vec) |
|
99 | 165 | } |
|
100 | 166 | |
|
101 | 167 | #[test] |
|
102 | 168 | fn test_retain_heads() -> Result<(), GraphError> { |
|
103 | 169 | assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]); |
|
104 | 170 | assert_eq!( |
|
105 | 171 | retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?, |
|
106 | 172 | vec![1, 6, 12] |
|
107 | 173 | ); |
|
108 | 174 | assert_eq!( |
|
109 | 175 | retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?, |
|
110 | 176 | vec![3, 5, 8, 9] |
|
111 | 177 | ); |
|
112 | 178 | Ok(()) |
|
113 | 179 | } |
|
114 | 180 | |
|
115 | 181 | /// Apply `heads()` to the given slice and return as a sorted `Vec` |
|
116 | 182 | fn heads_sorted( |
|
117 | 183 | graph: &impl Graph, |
|
118 | 184 | revs: &[Revision], |
|
119 | 185 | ) -> Result<Vec<Revision>, GraphError> { |
|
120 | 186 | let heads = heads(graph, revs.iter())?; |
|
121 | 187 | let mut as_vec: Vec<Revision> = heads.iter().cloned().collect(); |
|
122 | 188 | as_vec.sort(); |
|
123 | 189 | Ok(as_vec) |
|
124 | 190 | } |
|
125 | 191 | |
|
126 | 192 | #[test] |
|
127 | 193 | fn test_heads() -> Result<(), GraphError> { |
|
128 | 194 | assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]); |
|
129 | 195 | assert_eq!( |
|
130 | 196 | heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?, |
|
131 | 197 | vec![1, 6, 12] |
|
132 | 198 | ); |
|
133 | 199 | assert_eq!( |
|
134 | 200 | heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?, |
|
135 | 201 | vec![3, 5, 8, 9] |
|
136 | 202 | ); |
|
137 | 203 | Ok(()) |
|
138 | 204 | } |
|
139 | 205 | |
|
206 | /// Apply `range()` and convert the result into a Vec for easier comparison | |
|
207 | fn range_vec( | |
|
208 | graph: impl Graph + Clone, | |
|
209 | roots: &[Revision], | |
|
210 | heads: &[Revision], | |
|
211 | ) -> Result<Vec<Revision>, GraphError> { | |
|
212 | range(&graph, roots.iter().cloned(), heads.iter().cloned()) | |
|
213 | .map(|bs| bs.into_iter().collect()) | |
|
140 | 214 | } |
|
215 | ||
|
216 | #[test] | |
|
217 | fn test_range() -> Result<(), GraphError> { | |
|
218 | assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]); | |
|
219 | assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]); | |
|
220 | assert_eq!( | |
|
221 | range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?, | |
|
222 | vec![5, 10] | |
|
223 | ); | |
|
224 | assert_eq!( | |
|
225 | range_vec(SampleGraph, &[5, 6], &[10, 12])?, | |
|
226 | vec![5, 6, 9, 10, 12] | |
|
227 | ); | |
|
228 | Ok(()) | |
|
229 | } | |
|
230 | ||
|
231 | } |
General Comments 0
You need to be logged in to leave comments.
Login now