##// END OF EJS Templates
rust-discovery: use while loop instead of match + break...
Yuya Nishihara -
r43032:798b7d4b default
parent child Browse files
Show More
@@ -1,701 +1,695 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::ancestors::MissingAncestors;
14 use crate::ancestors::MissingAncestors;
15 use crate::dagops;
15 use crate::dagops;
16 use rand::seq::SliceRandom;
16 use rand::seq::SliceRandom;
17 use rand::{thread_rng, RngCore, SeedableRng};
17 use rand::{thread_rng, RngCore, SeedableRng};
18 use std::cmp::{max, min};
18 use std::cmp::{max, min};
19 use std::collections::{HashMap, HashSet, VecDeque};
19 use std::collections::{HashMap, HashSet, VecDeque};
20
20
21 type Rng = rand_pcg::Pcg32;
21 type Rng = rand_pcg::Pcg32;
22
22
23 pub struct PartialDiscovery<G: Graph + Clone> {
23 pub struct PartialDiscovery<G: Graph + Clone> {
24 target_heads: Option<Vec<Revision>>,
24 target_heads: Option<Vec<Revision>>,
25 graph: G, // plays the role of self._repo
25 graph: G, // plays the role of self._repo
26 common: MissingAncestors<G>,
26 common: MissingAncestors<G>,
27 undecided: Option<HashSet<Revision>>,
27 undecided: Option<HashSet<Revision>>,
28 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
28 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
29 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
30 rng: Rng,
30 rng: Rng,
31 respect_size: bool,
31 respect_size: bool,
32 randomize: bool,
32 randomize: bool,
33 }
33 }
34
34
35 pub struct DiscoveryStats {
35 pub struct DiscoveryStats {
36 pub undecided: Option<usize>,
36 pub undecided: Option<usize>,
37 }
37 }
38
38
39 /// Update an existing sample to match the expected size
39 /// Update an existing sample to match the expected size
40 ///
40 ///
41 /// The sample is updated with revisions exponentially distant from each
41 /// The sample is updated with revisions exponentially distant from each
42 /// element of `heads`.
42 /// element of `heads`.
43 ///
43 ///
44 /// If a target size is specified, the sampling will stop once this size is
44 /// If a target size is specified, the sampling will stop once this size is
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
46 /// reached.
46 /// reached.
47 ///
47 ///
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
49 /// represented by `parentfn`
49 /// represented by `parentfn`
50 /// - `heads`: set of DAG head revs
50 /// - `heads`: set of DAG head revs
51 /// - `sample`: a sample to update
51 /// - `sample`: a sample to update
52 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `parentfn`: a callable to resolve parents for a revision
53 /// - `quicksamplesize`: optional target size of the sample
53 /// - `quicksamplesize`: optional target size of the sample
54 fn update_sample<I>(
54 fn update_sample<I>(
55 revs: Option<&HashSet<Revision>>,
55 revs: Option<&HashSet<Revision>>,
56 heads: impl IntoIterator<Item = Revision>,
56 heads: impl IntoIterator<Item = Revision>,
57 sample: &mut HashSet<Revision>,
57 sample: &mut HashSet<Revision>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
59 quicksamplesize: Option<usize>,
59 quicksamplesize: Option<usize>,
60 ) -> Result<(), GraphError>
60 ) -> Result<(), GraphError>
61 where
61 where
62 I: Iterator<Item = Revision>,
62 I: Iterator<Item = Revision>,
63 {
63 {
64 let mut distances: HashMap<Revision, u32> = HashMap::new();
64 let mut distances: HashMap<Revision, u32> = HashMap::new();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
66 let mut factor: u32 = 1;
66 let mut factor: u32 = 1;
67 let mut seen: HashSet<Revision> = HashSet::new();
67 let mut seen: HashSet<Revision> = HashSet::new();
68 loop {
68 while let Some(current) = visit.pop_front() {
69 let current = match visit.pop_front() {
70 None => {
71 break;
72 }
73 Some(r) => r,
74 };
75 if !seen.insert(current) {
69 if !seen.insert(current) {
76 continue;
70 continue;
77 }
71 }
78
72
79 let d = *distances.entry(current).or_insert(1);
73 let d = *distances.entry(current).or_insert(1);
80 if d > factor {
74 if d > factor {
81 factor *= 2;
75 factor *= 2;
82 }
76 }
83 if d == factor {
77 if d == factor {
84 sample.insert(current);
78 sample.insert(current);
85 if let Some(sz) = quicksamplesize {
79 if let Some(sz) = quicksamplesize {
86 if sample.len() >= sz {
80 if sample.len() >= sz {
87 return Ok(());
81 return Ok(());
88 }
82 }
89 }
83 }
90 }
84 }
91 for p in parentsfn(current)? {
85 for p in parentsfn(current)? {
92 if let Some(revs) = revs {
86 if let Some(revs) = revs {
93 if !revs.contains(&p) {
87 if !revs.contains(&p) {
94 continue;
88 continue;
95 }
89 }
96 }
90 }
97 distances.entry(p).or_insert(d + 1);
91 distances.entry(p).or_insert(d + 1);
98 visit.push_back(p);
92 visit.push_back(p);
99 }
93 }
100 }
94 }
101 Ok(())
95 Ok(())
102 }
96 }
103
97
104 struct ParentsIterator {
98 struct ParentsIterator {
105 parents: [Revision; 2],
99 parents: [Revision; 2],
106 cur: usize,
100 cur: usize,
107 }
101 }
108
102
109 impl ParentsIterator {
103 impl ParentsIterator {
110 fn graph_parents(
104 fn graph_parents(
111 graph: &impl Graph,
105 graph: &impl Graph,
112 r: Revision,
106 r: Revision,
113 ) -> Result<ParentsIterator, GraphError> {
107 ) -> Result<ParentsIterator, GraphError> {
114 Ok(ParentsIterator {
108 Ok(ParentsIterator {
115 parents: graph.parents(r)?,
109 parents: graph.parents(r)?,
116 cur: 0,
110 cur: 0,
117 })
111 })
118 }
112 }
119 }
113 }
120
114
121 impl Iterator for ParentsIterator {
115 impl Iterator for ParentsIterator {
122 type Item = Revision;
116 type Item = Revision;
123
117
124 fn next(&mut self) -> Option<Revision> {
118 fn next(&mut self) -> Option<Revision> {
125 if self.cur > 1 {
119 if self.cur > 1 {
126 return None;
120 return None;
127 }
121 }
128 let rev = self.parents[self.cur];
122 let rev = self.parents[self.cur];
129 self.cur += 1;
123 self.cur += 1;
130 if rev == NULL_REVISION {
124 if rev == NULL_REVISION {
131 return self.next();
125 return self.next();
132 }
126 }
133 Some(rev)
127 Some(rev)
134 }
128 }
135 }
129 }
136
130
137 impl<G: Graph + Clone> PartialDiscovery<G> {
131 impl<G: Graph + Clone> PartialDiscovery<G> {
138 /// Create a PartialDiscovery object, with the intent
132 /// Create a PartialDiscovery object, with the intent
139 /// of comparing our `::<target_heads>` revset to the contents of another
133 /// of comparing our `::<target_heads>` revset to the contents of another
140 /// repo.
134 /// repo.
141 ///
135 ///
142 /// For now `target_heads` is passed as a vector, and will be used
136 /// For now `target_heads` is passed as a vector, and will be used
143 /// at the first call to `ensure_undecided()`.
137 /// at the first call to `ensure_undecided()`.
144 ///
138 ///
145 /// If we want to make the signature more flexible,
139 /// If we want to make the signature more flexible,
146 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
147 /// object since we'll keep it in the meanwhile
141 /// object since we'll keep it in the meanwhile
148 ///
142 ///
149 /// The `respect_size` boolean controls how the sampling methods
143 /// The `respect_size` boolean controls how the sampling methods
150 /// will interpret the size argument requested by the caller. If it's
144 /// will interpret the size argument requested by the caller. If it's
151 /// `false`, they are allowed to produce a sample whose size is more
145 /// `false`, they are allowed to produce a sample whose size is more
152 /// appropriate to the situation (typically bigger).
146 /// appropriate to the situation (typically bigger).
153 ///
147 ///
154 /// The `randomize` boolean affects sampling, and specifically how
148 /// The `randomize` boolean affects sampling, and specifically how
155 /// limiting or last-minute expanding is been done:
149 /// limiting or last-minute expanding is been done:
156 ///
150 ///
157 /// If `true`, both will perform random picking from `self.undecided`.
151 /// If `true`, both will perform random picking from `self.undecided`.
158 /// This is currently the best for actual discoveries.
152 /// This is currently the best for actual discoveries.
159 ///
153 ///
160 /// If `false`, a reproductible picking strategy is performed. This is
154 /// If `false`, a reproductible picking strategy is performed. This is
161 /// useful for integration tests.
155 /// useful for integration tests.
162 pub fn new(
156 pub fn new(
163 graph: G,
157 graph: G,
164 target_heads: Vec<Revision>,
158 target_heads: Vec<Revision>,
165 respect_size: bool,
159 respect_size: bool,
166 randomize: bool,
160 randomize: bool,
167 ) -> Self {
161 ) -> Self {
168 let mut seed: [u8; 16] = [0; 16];
162 let mut seed: [u8; 16] = [0; 16];
169 if randomize {
163 if randomize {
170 thread_rng().fill_bytes(&mut seed);
164 thread_rng().fill_bytes(&mut seed);
171 }
165 }
172 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
173 }
167 }
174
168
175 pub fn new_with_seed(
169 pub fn new_with_seed(
176 graph: G,
170 graph: G,
177 target_heads: Vec<Revision>,
171 target_heads: Vec<Revision>,
178 seed: [u8; 16],
172 seed: [u8; 16],
179 respect_size: bool,
173 respect_size: bool,
180 randomize: bool,
174 randomize: bool,
181 ) -> Self {
175 ) -> Self {
182 PartialDiscovery {
176 PartialDiscovery {
183 undecided: None,
177 undecided: None,
184 children_cache: None,
178 children_cache: None,
185 target_heads: Some(target_heads),
179 target_heads: Some(target_heads),
186 graph: graph.clone(),
180 graph: graph.clone(),
187 common: MissingAncestors::new(graph, vec![]),
181 common: MissingAncestors::new(graph, vec![]),
188 missing: HashSet::new(),
182 missing: HashSet::new(),
189 rng: Rng::from_seed(seed),
183 rng: Rng::from_seed(seed),
190 respect_size: respect_size,
184 respect_size: respect_size,
191 randomize: randomize,
185 randomize: randomize,
192 }
186 }
193 }
187 }
194
188
195 /// Extract at most `size` random elements from sample and return them
189 /// Extract at most `size` random elements from sample and return them
196 /// as a vector
190 /// as a vector
197 fn limit_sample(
191 fn limit_sample(
198 &mut self,
192 &mut self,
199 mut sample: Vec<Revision>,
193 mut sample: Vec<Revision>,
200 size: usize,
194 size: usize,
201 ) -> Vec<Revision> {
195 ) -> Vec<Revision> {
202 if !self.randomize {
196 if !self.randomize {
203 sample.sort();
197 sample.sort();
204 sample.truncate(size);
198 sample.truncate(size);
205 return sample;
199 return sample;
206 }
200 }
207 let sample_len = sample.len();
201 let sample_len = sample.len();
208 if sample_len <= size {
202 if sample_len <= size {
209 return sample;
203 return sample;
210 }
204 }
211 let rng = &mut self.rng;
205 let rng = &mut self.rng;
212 let dropped_size = sample_len - size;
206 let dropped_size = sample_len - size;
213 let limited_slice = if size < dropped_size {
207 let limited_slice = if size < dropped_size {
214 sample.partial_shuffle(rng, size).0
208 sample.partial_shuffle(rng, size).0
215 } else {
209 } else {
216 sample.partial_shuffle(rng, dropped_size).1
210 sample.partial_shuffle(rng, dropped_size).1
217 };
211 };
218 limited_slice.to_owned()
212 limited_slice.to_owned()
219 }
213 }
220
214
221 /// Register revisions known as being common
215 /// Register revisions known as being common
222 pub fn add_common_revisions(
216 pub fn add_common_revisions(
223 &mut self,
217 &mut self,
224 common: impl IntoIterator<Item = Revision>,
218 common: impl IntoIterator<Item = Revision>,
225 ) -> Result<(), GraphError> {
219 ) -> Result<(), GraphError> {
226 let before_len = self.common.get_bases().len();
220 let before_len = self.common.get_bases().len();
227 self.common.add_bases(common);
221 self.common.add_bases(common);
228 if self.common.get_bases().len() == before_len {
222 if self.common.get_bases().len() == before_len {
229 return Ok(());
223 return Ok(());
230 }
224 }
231 if let Some(ref mut undecided) = self.undecided {
225 if let Some(ref mut undecided) = self.undecided {
232 self.common.remove_ancestors_from(undecided)?;
226 self.common.remove_ancestors_from(undecided)?;
233 }
227 }
234 Ok(())
228 Ok(())
235 }
229 }
236
230
237 /// Register revisions known as being missing
231 /// Register revisions known as being missing
238 ///
232 ///
239 /// # Performance note
233 /// # Performance note
240 ///
234 ///
241 /// Except in the most trivial case, the first call of this method has
235 /// Except in the most trivial case, the first call of this method has
242 /// the side effect of computing `self.undecided` set for the first time,
236 /// the side effect of computing `self.undecided` set for the first time,
243 /// and the related caches it might need for efficiency of its internal
237 /// and the related caches it might need for efficiency of its internal
244 /// computation. This is typically faster if more information is
238 /// computation. This is typically faster if more information is
245 /// available in `self.common`. Therefore, for good performance, the
239 /// available in `self.common`. Therefore, for good performance, the
246 /// caller should avoid calling this too early.
240 /// caller should avoid calling this too early.
247 pub fn add_missing_revisions(
241 pub fn add_missing_revisions(
248 &mut self,
242 &mut self,
249 missing: impl IntoIterator<Item = Revision>,
243 missing: impl IntoIterator<Item = Revision>,
250 ) -> Result<(), GraphError> {
244 ) -> Result<(), GraphError> {
251 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
252 if tovisit.is_empty() {
246 if tovisit.is_empty() {
253 return Ok(());
247 return Ok(());
254 }
248 }
255 self.ensure_children_cache()?;
249 self.ensure_children_cache()?;
256 self.ensure_undecided()?; // for safety of possible future refactors
250 self.ensure_undecided()?; // for safety of possible future refactors
257 let children = self.children_cache.as_ref().unwrap();
251 let children = self.children_cache.as_ref().unwrap();
258 let mut seen: HashSet<Revision> = HashSet::new();
252 let mut seen: HashSet<Revision> = HashSet::new();
259 let undecided_mut = self.undecided.as_mut().unwrap();
253 let undecided_mut = self.undecided.as_mut().unwrap();
260 while let Some(rev) = tovisit.pop_front() {
254 while let Some(rev) = tovisit.pop_front() {
261 if !self.missing.insert(rev) {
255 if !self.missing.insert(rev) {
262 // either it's known to be missing from a previous
256 // either it's known to be missing from a previous
263 // invocation, and there's no need to iterate on its
257 // invocation, and there's no need to iterate on its
264 // children (we now they are all missing)
258 // children (we now they are all missing)
265 // or it's from a previous iteration of this loop
259 // or it's from a previous iteration of this loop
266 // and its children have already been queued
260 // and its children have already been queued
267 continue;
261 continue;
268 }
262 }
269 undecided_mut.remove(&rev);
263 undecided_mut.remove(&rev);
270 match children.get(&rev) {
264 match children.get(&rev) {
271 None => {
265 None => {
272 continue;
266 continue;
273 }
267 }
274 Some(this_children) => {
268 Some(this_children) => {
275 for child in this_children.iter().cloned() {
269 for child in this_children.iter().cloned() {
276 if seen.insert(child) {
270 if seen.insert(child) {
277 tovisit.push_back(child);
271 tovisit.push_back(child);
278 }
272 }
279 }
273 }
280 }
274 }
281 }
275 }
282 }
276 }
283 Ok(())
277 Ok(())
284 }
278 }
285
279
286 /// Do we have any information about the peer?
280 /// Do we have any information about the peer?
287 pub fn has_info(&self) -> bool {
281 pub fn has_info(&self) -> bool {
288 self.common.has_bases()
282 self.common.has_bases()
289 }
283 }
290
284
291 /// Did we acquire full knowledge of our Revisions that the peer has?
285 /// Did we acquire full knowledge of our Revisions that the peer has?
292 pub fn is_complete(&self) -> bool {
286 pub fn is_complete(&self) -> bool {
293 self.undecided.as_ref().map_or(false, |s| s.is_empty())
287 self.undecided.as_ref().map_or(false, |s| s.is_empty())
294 }
288 }
295
289
296 /// Return the heads of the currently known common set of revisions.
290 /// Return the heads of the currently known common set of revisions.
297 ///
291 ///
298 /// If the discovery process is not complete (see `is_complete()`), the
292 /// If the discovery process is not complete (see `is_complete()`), the
299 /// caller must be aware that this is an intermediate state.
293 /// caller must be aware that this is an intermediate state.
300 ///
294 ///
301 /// On the other hand, if it is complete, then this is currently
295 /// On the other hand, if it is complete, then this is currently
302 /// the only way to retrieve the end results of the discovery process.
296 /// the only way to retrieve the end results of the discovery process.
303 ///
297 ///
304 /// We may introduce in the future an `into_common_heads` call that
298 /// We may introduce in the future an `into_common_heads` call that
305 /// would be more appropriate for normal Rust callers, dropping `self`
299 /// would be more appropriate for normal Rust callers, dropping `self`
306 /// if it is complete.
300 /// if it is complete.
307 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
308 self.common.bases_heads()
302 self.common.bases_heads()
309 }
303 }
310
304
311 /// Force first computation of `self.undecided`
305 /// Force first computation of `self.undecided`
312 ///
306 ///
313 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
314 /// unwrapped to get workable immutable or mutable references without
308 /// unwrapped to get workable immutable or mutable references without
315 /// any panic.
309 /// any panic.
316 ///
310 ///
317 /// This is an imperative call instead of an access with added lazyness
311 /// This is an imperative call instead of an access with added lazyness
318 /// to reduce easily the scope of mutable borrow for the caller,
312 /// to reduce easily the scope of mutable borrow for the caller,
319 /// compared to undecided(&'a mut self) -> &'a… that would keep it
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
320 /// as long as the resulting immutable one.
314 /// as long as the resulting immutable one.
321 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
322 if self.undecided.is_some() {
316 if self.undecided.is_some() {
323 return Ok(());
317 return Ok(());
324 }
318 }
325 let tgt = self.target_heads.take().unwrap();
319 let tgt = self.target_heads.take().unwrap();
326 self.undecided =
320 self.undecided =
327 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
328 Ok(())
322 Ok(())
329 }
323 }
330
324
331 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
332 if self.children_cache.is_some() {
326 if self.children_cache.is_some() {
333 return Ok(());
327 return Ok(());
334 }
328 }
335 self.ensure_undecided()?;
329 self.ensure_undecided()?;
336
330
337 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
331 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
338 for &rev in self.undecided.as_ref().unwrap() {
332 for &rev in self.undecided.as_ref().unwrap() {
339 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
333 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
340 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
334 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
341 }
335 }
342 }
336 }
343 self.children_cache = Some(children);
337 self.children_cache = Some(children);
344 Ok(())
338 Ok(())
345 }
339 }
346
340
347 /// Provide statistics about the current state of the discovery process
341 /// Provide statistics about the current state of the discovery process
348 pub fn stats(&self) -> DiscoveryStats {
342 pub fn stats(&self) -> DiscoveryStats {
349 DiscoveryStats {
343 DiscoveryStats {
350 undecided: self.undecided.as_ref().map(|s| s.len()),
344 undecided: self.undecided.as_ref().map(|s| s.len()),
351 }
345 }
352 }
346 }
353
347
354 pub fn take_quick_sample(
348 pub fn take_quick_sample(
355 &mut self,
349 &mut self,
356 headrevs: impl IntoIterator<Item = Revision>,
350 headrevs: impl IntoIterator<Item = Revision>,
357 size: usize,
351 size: usize,
358 ) -> Result<Vec<Revision>, GraphError> {
352 ) -> Result<Vec<Revision>, GraphError> {
359 self.ensure_undecided()?;
353 self.ensure_undecided()?;
360 let mut sample = {
354 let mut sample = {
361 let undecided = self.undecided.as_ref().unwrap();
355 let undecided = self.undecided.as_ref().unwrap();
362 if undecided.len() <= size {
356 if undecided.len() <= size {
363 return Ok(undecided.iter().cloned().collect());
357 return Ok(undecided.iter().cloned().collect());
364 }
358 }
365 dagops::heads(&self.graph, undecided.iter())?
359 dagops::heads(&self.graph, undecided.iter())?
366 };
360 };
367 if sample.len() >= size {
361 if sample.len() >= size {
368 return Ok(self.limit_sample(sample.into_iter().collect(), size));
362 return Ok(self.limit_sample(sample.into_iter().collect(), size));
369 }
363 }
370 update_sample(
364 update_sample(
371 None,
365 None,
372 headrevs,
366 headrevs,
373 &mut sample,
367 &mut sample,
374 |r| ParentsIterator::graph_parents(&self.graph, r),
368 |r| ParentsIterator::graph_parents(&self.graph, r),
375 Some(size),
369 Some(size),
376 )?;
370 )?;
377 Ok(sample.into_iter().collect())
371 Ok(sample.into_iter().collect())
378 }
372 }
379
373
380 /// Extract a sample from `self.undecided`, going from its heads and roots.
374 /// Extract a sample from `self.undecided`, going from its heads and roots.
381 ///
375 ///
382 /// The `size` parameter is used to avoid useless computations if
376 /// The `size` parameter is used to avoid useless computations if
383 /// it turns out to be bigger than the whole set of undecided Revisions.
377 /// it turns out to be bigger than the whole set of undecided Revisions.
384 ///
378 ///
385 /// The sample is taken by using `update_sample` from the heads, then
379 /// The sample is taken by using `update_sample` from the heads, then
386 /// from the roots, working on the reverse DAG,
380 /// from the roots, working on the reverse DAG,
387 /// expressed by `self.children_cache`.
381 /// expressed by `self.children_cache`.
388 ///
382 ///
389 /// No effort is being made to complete or limit the sample to `size`
383 /// No effort is being made to complete or limit the sample to `size`
390 /// but this method returns another interesting size that it derives
384 /// but this method returns another interesting size that it derives
391 /// from its knowledge of the structure of the various sets, leaving
385 /// from its knowledge of the structure of the various sets, leaving
392 /// to the caller the decision to use it or not.
386 /// to the caller the decision to use it or not.
393 fn bidirectional_sample(
387 fn bidirectional_sample(
394 &mut self,
388 &mut self,
395 size: usize,
389 size: usize,
396 ) -> Result<(HashSet<Revision>, usize), GraphError> {
390 ) -> Result<(HashSet<Revision>, usize), GraphError> {
397 self.ensure_undecided()?;
391 self.ensure_undecided()?;
398 {
392 {
399 // we don't want to compute children_cache before this
393 // we don't want to compute children_cache before this
400 // but doing it after extracting self.undecided takes a mutable
394 // but doing it after extracting self.undecided takes a mutable
401 // ref to self while a shareable one is still active.
395 // ref to self while a shareable one is still active.
402 let undecided = self.undecided.as_ref().unwrap();
396 let undecided = self.undecided.as_ref().unwrap();
403 if undecided.len() <= size {
397 if undecided.len() <= size {
404 return Ok((undecided.clone(), size));
398 return Ok((undecided.clone(), size));
405 }
399 }
406 }
400 }
407
401
408 self.ensure_children_cache()?;
402 self.ensure_children_cache()?;
409 let revs = self.undecided.as_ref().unwrap();
403 let revs = self.undecided.as_ref().unwrap();
410 let mut sample: HashSet<Revision> = revs.clone();
404 let mut sample: HashSet<Revision> = revs.clone();
411
405
412 // it's possible that leveraging the children cache would be more
406 // it's possible that leveraging the children cache would be more
413 // efficient here
407 // efficient here
414 dagops::retain_heads(&self.graph, &mut sample)?;
408 dagops::retain_heads(&self.graph, &mut sample)?;
415 let revsheads = sample.clone(); // was again heads(revs) in python
409 let revsheads = sample.clone(); // was again heads(revs) in python
416
410
417 // update from heads
411 // update from heads
418 update_sample(
412 update_sample(
419 Some(revs),
413 Some(revs),
420 revsheads.iter().cloned(),
414 revsheads.iter().cloned(),
421 &mut sample,
415 &mut sample,
422 |r| ParentsIterator::graph_parents(&self.graph, r),
416 |r| ParentsIterator::graph_parents(&self.graph, r),
423 None,
417 None,
424 )?;
418 )?;
425
419
426 // update from roots
420 // update from roots
427 let revroots: HashSet<Revision> =
421 let revroots: HashSet<Revision> =
428 dagops::roots(&self.graph, revs)?.into_iter().collect();
422 dagops::roots(&self.graph, revs)?.into_iter().collect();
429 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
423 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
430
424
431 let children = self.children_cache.as_ref().unwrap();
425 let children = self.children_cache.as_ref().unwrap();
432 let empty_vec: Vec<Revision> = Vec::new();
426 let empty_vec: Vec<Revision> = Vec::new();
433 update_sample(
427 update_sample(
434 Some(revs),
428 Some(revs),
435 revroots,
429 revroots,
436 &mut sample,
430 &mut sample,
437 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
431 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
438 None,
432 None,
439 )?;
433 )?;
440 Ok((sample, prescribed_size))
434 Ok((sample, prescribed_size))
441 }
435 }
442
436
443 /// Fill up sample up to the wished size with random undecided Revisions.
437 /// Fill up sample up to the wished size with random undecided Revisions.
444 ///
438 ///
445 /// This is intended to be used as a last resort completion if the
439 /// This is intended to be used as a last resort completion if the
446 /// regular sampling algorithm returns too few elements.
440 /// regular sampling algorithm returns too few elements.
447 fn random_complete_sample(
441 fn random_complete_sample(
448 &mut self,
442 &mut self,
449 sample: &mut Vec<Revision>,
443 sample: &mut Vec<Revision>,
450 size: usize,
444 size: usize,
451 ) {
445 ) {
452 let sample_len = sample.len();
446 let sample_len = sample.len();
453 if size <= sample_len {
447 if size <= sample_len {
454 return;
448 return;
455 }
449 }
456 let take_from: Vec<Revision> = self
450 let take_from: Vec<Revision> = self
457 .undecided
451 .undecided
458 .as_ref()
452 .as_ref()
459 .unwrap()
453 .unwrap()
460 .iter()
454 .iter()
461 .filter(|&r| !sample.contains(r))
455 .filter(|&r| !sample.contains(r))
462 .cloned()
456 .cloned()
463 .collect();
457 .collect();
464 sample.extend(self.limit_sample(take_from, size - sample_len));
458 sample.extend(self.limit_sample(take_from, size - sample_len));
465 }
459 }
466
460
467 pub fn take_full_sample(
461 pub fn take_full_sample(
468 &mut self,
462 &mut self,
469 size: usize,
463 size: usize,
470 ) -> Result<Vec<Revision>, GraphError> {
464 ) -> Result<Vec<Revision>, GraphError> {
471 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
465 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
472 let size = if self.respect_size {
466 let size = if self.respect_size {
473 size
467 size
474 } else {
468 } else {
475 prescribed_size
469 prescribed_size
476 };
470 };
477 let mut sample =
471 let mut sample =
478 self.limit_sample(sample_set.into_iter().collect(), size);
472 self.limit_sample(sample_set.into_iter().collect(), size);
479 self.random_complete_sample(&mut sample, size);
473 self.random_complete_sample(&mut sample, size);
480 Ok(sample)
474 Ok(sample)
481 }
475 }
482 }
476 }
483
477
484 #[cfg(test)]
478 #[cfg(test)]
485 mod tests {
479 mod tests {
486 use super::*;
480 use super::*;
487 use crate::testing::SampleGraph;
481 use crate::testing::SampleGraph;
488
482
489 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
483 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
490 ///
484 ///
491 /// To avoid actual randomness in these tests, we give it a fixed
485 /// To avoid actual randomness in these tests, we give it a fixed
492 /// random seed, but by default we'll test the random version.
486 /// random seed, but by default we'll test the random version.
493 fn full_disco() -> PartialDiscovery<SampleGraph> {
487 fn full_disco() -> PartialDiscovery<SampleGraph> {
494 PartialDiscovery::new_with_seed(
488 PartialDiscovery::new_with_seed(
495 SampleGraph,
489 SampleGraph,
496 vec![10, 11, 12, 13],
490 vec![10, 11, 12, 13],
497 [0; 16],
491 [0; 16],
498 true,
492 true,
499 true,
493 true,
500 )
494 )
501 }
495 }
502
496
503 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
497 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
504 ///
498 ///
505 /// To avoid actual randomness in tests, we give it a fixed random seed.
499 /// To avoid actual randomness in tests, we give it a fixed random seed.
506 fn disco12() -> PartialDiscovery<SampleGraph> {
500 fn disco12() -> PartialDiscovery<SampleGraph> {
507 PartialDiscovery::new_with_seed(
501 PartialDiscovery::new_with_seed(
508 SampleGraph,
502 SampleGraph,
509 vec![12],
503 vec![12],
510 [0; 16],
504 [0; 16],
511 true,
505 true,
512 true,
506 true,
513 )
507 )
514 }
508 }
515
509
516 fn sorted_undecided(
510 fn sorted_undecided(
517 disco: &PartialDiscovery<SampleGraph>,
511 disco: &PartialDiscovery<SampleGraph>,
518 ) -> Vec<Revision> {
512 ) -> Vec<Revision> {
519 let mut as_vec: Vec<Revision> =
513 let mut as_vec: Vec<Revision> =
520 disco.undecided.as_ref().unwrap().iter().cloned().collect();
514 disco.undecided.as_ref().unwrap().iter().cloned().collect();
521 as_vec.sort();
515 as_vec.sort();
522 as_vec
516 as_vec
523 }
517 }
524
518
525 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
519 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
526 let mut as_vec: Vec<Revision> =
520 let mut as_vec: Vec<Revision> =
527 disco.missing.iter().cloned().collect();
521 disco.missing.iter().cloned().collect();
528 as_vec.sort();
522 as_vec.sort();
529 as_vec
523 as_vec
530 }
524 }
531
525
532 fn sorted_common_heads(
526 fn sorted_common_heads(
533 disco: &PartialDiscovery<SampleGraph>,
527 disco: &PartialDiscovery<SampleGraph>,
534 ) -> Result<Vec<Revision>, GraphError> {
528 ) -> Result<Vec<Revision>, GraphError> {
535 let mut as_vec: Vec<Revision> =
529 let mut as_vec: Vec<Revision> =
536 disco.common_heads()?.iter().cloned().collect();
530 disco.common_heads()?.iter().cloned().collect();
537 as_vec.sort();
531 as_vec.sort();
538 Ok(as_vec)
532 Ok(as_vec)
539 }
533 }
540
534
541 #[test]
535 #[test]
542 fn test_add_common_get_undecided() -> Result<(), GraphError> {
536 fn test_add_common_get_undecided() -> Result<(), GraphError> {
543 let mut disco = full_disco();
537 let mut disco = full_disco();
544 assert_eq!(disco.undecided, None);
538 assert_eq!(disco.undecided, None);
545 assert!(!disco.has_info());
539 assert!(!disco.has_info());
546 assert_eq!(disco.stats().undecided, None);
540 assert_eq!(disco.stats().undecided, None);
547
541
548 disco.add_common_revisions(vec![11, 12])?;
542 disco.add_common_revisions(vec![11, 12])?;
549 assert!(disco.has_info());
543 assert!(disco.has_info());
550 assert!(!disco.is_complete());
544 assert!(!disco.is_complete());
551 assert!(disco.missing.is_empty());
545 assert!(disco.missing.is_empty());
552
546
553 // add_common_revisions did not trigger a premature computation
547 // add_common_revisions did not trigger a premature computation
554 // of `undecided`, let's check that and ask for them
548 // of `undecided`, let's check that and ask for them
555 assert_eq!(disco.undecided, None);
549 assert_eq!(disco.undecided, None);
556 disco.ensure_undecided()?;
550 disco.ensure_undecided()?;
557 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
551 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
558 assert_eq!(disco.stats().undecided, Some(4));
552 assert_eq!(disco.stats().undecided, Some(4));
559 Ok(())
553 Ok(())
560 }
554 }
561
555
562 /// in this test, we pretend that our peer misses exactly (8+10)::
556 /// in this test, we pretend that our peer misses exactly (8+10)::
563 /// and we're comparing all our repo to it (as in a bare push)
557 /// and we're comparing all our repo to it (as in a bare push)
564 #[test]
558 #[test]
565 fn test_discovery() -> Result<(), GraphError> {
559 fn test_discovery() -> Result<(), GraphError> {
566 let mut disco = full_disco();
560 let mut disco = full_disco();
567 disco.add_common_revisions(vec![11, 12])?;
561 disco.add_common_revisions(vec![11, 12])?;
568 disco.add_missing_revisions(vec![8, 10])?;
562 disco.add_missing_revisions(vec![8, 10])?;
569 assert_eq!(sorted_undecided(&disco), vec![5]);
563 assert_eq!(sorted_undecided(&disco), vec![5]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
564 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
571 assert!(!disco.is_complete());
565 assert!(!disco.is_complete());
572
566
573 disco.add_common_revisions(vec![5])?;
567 disco.add_common_revisions(vec![5])?;
574 assert_eq!(sorted_undecided(&disco), vec![]);
568 assert_eq!(sorted_undecided(&disco), vec![]);
575 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
569 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
576 assert!(disco.is_complete());
570 assert!(disco.is_complete());
577 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
571 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
578 Ok(())
572 Ok(())
579 }
573 }
580
574
581 #[test]
575 #[test]
582 fn test_add_missing_early_continue() -> Result<(), GraphError> {
576 fn test_add_missing_early_continue() -> Result<(), GraphError> {
583 eprintln!("test_add_missing_early_stop");
577 eprintln!("test_add_missing_early_stop");
584 let mut disco = full_disco();
578 let mut disco = full_disco();
585 disco.add_common_revisions(vec![13, 3, 4])?;
579 disco.add_common_revisions(vec![13, 3, 4])?;
586 disco.ensure_children_cache()?;
580 disco.ensure_children_cache()?;
587 // 12 is grand-child of 6 through 9
581 // 12 is grand-child of 6 through 9
588 // passing them in this order maximizes the chances of the
582 // passing them in this order maximizes the chances of the
589 // early continue to do the wrong thing
583 // early continue to do the wrong thing
590 disco.add_missing_revisions(vec![6, 9, 12])?;
584 disco.add_missing_revisions(vec![6, 9, 12])?;
591 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
585 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
592 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
586 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
593 assert!(!disco.is_complete());
587 assert!(!disco.is_complete());
594 Ok(())
588 Ok(())
595 }
589 }
596
590
597 #[test]
591 #[test]
598 fn test_limit_sample_no_need_to() {
592 fn test_limit_sample_no_need_to() {
599 let sample = vec![1, 2, 3, 4];
593 let sample = vec![1, 2, 3, 4];
600 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
594 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
601 }
595 }
602
596
603 #[test]
597 #[test]
604 fn test_limit_sample_less_than_half() {
598 fn test_limit_sample_less_than_half() {
605 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
599 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
606 }
600 }
607
601
608 #[test]
602 #[test]
609 fn test_limit_sample_more_than_half() {
603 fn test_limit_sample_more_than_half() {
610 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
604 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
611 }
605 }
612
606
613 #[test]
607 #[test]
614 fn test_limit_sample_no_random() {
608 fn test_limit_sample_no_random() {
615 let mut disco = full_disco();
609 let mut disco = full_disco();
616 disco.randomize = false;
610 disco.randomize = false;
617 assert_eq!(
611 assert_eq!(
618 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
612 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
619 vec![1, 3, 5, 7]
613 vec![1, 3, 5, 7]
620 );
614 );
621 }
615 }
622
616
623 #[test]
617 #[test]
624 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
618 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
625 let mut disco = full_disco();
619 let mut disco = full_disco();
626 disco.undecided = Some((1..=13).collect());
620 disco.undecided = Some((1..=13).collect());
627
621
628 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
622 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
629 sample_vec.sort();
623 sample_vec.sort();
630 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
624 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
631 Ok(())
625 Ok(())
632 }
626 }
633
627
634 #[test]
628 #[test]
635 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
629 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
636 let mut disco = disco12();
630 let mut disco = disco12();
637 disco.ensure_undecided()?;
631 disco.ensure_undecided()?;
638
632
639 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
633 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
640 sample_vec.sort();
634 sample_vec.sort();
641 // r12's only parent is r9, whose unique grand-parent through the
635 // r12's only parent is r9, whose unique grand-parent through the
642 // diamond shape is r4. This ends there because the distance from r4
636 // diamond shape is r4. This ends there because the distance from r4
643 // to the root is only 3.
637 // to the root is only 3.
644 assert_eq!(sample_vec, vec![4, 9, 12]);
638 assert_eq!(sample_vec, vec![4, 9, 12]);
645 Ok(())
639 Ok(())
646 }
640 }
647
641
648 #[test]
642 #[test]
649 fn test_children_cache() -> Result<(), GraphError> {
643 fn test_children_cache() -> Result<(), GraphError> {
650 let mut disco = full_disco();
644 let mut disco = full_disco();
651 disco.ensure_children_cache()?;
645 disco.ensure_children_cache()?;
652
646
653 let cache = disco.children_cache.unwrap();
647 let cache = disco.children_cache.unwrap();
654 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
648 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
655 assert_eq!(cache.get(&10).cloned(), None);
649 assert_eq!(cache.get(&10).cloned(), None);
656
650
657 let mut children_4 = cache.get(&4).cloned().unwrap();
651 let mut children_4 = cache.get(&4).cloned().unwrap();
658 children_4.sort();
652 children_4.sort();
659 assert_eq!(children_4, vec![5, 6, 7]);
653 assert_eq!(children_4, vec![5, 6, 7]);
660
654
661 let mut children_7 = cache.get(&7).cloned().unwrap();
655 let mut children_7 = cache.get(&7).cloned().unwrap();
662 children_7.sort();
656 children_7.sort();
663 assert_eq!(children_7, vec![9, 11]);
657 assert_eq!(children_7, vec![9, 11]);
664
658
665 Ok(())
659 Ok(())
666 }
660 }
667
661
668 #[test]
662 #[test]
669 fn test_complete_sample() {
663 fn test_complete_sample() {
670 let mut disco = full_disco();
664 let mut disco = full_disco();
671 let undecided: HashSet<Revision> =
665 let undecided: HashSet<Revision> =
672 [4, 7, 9, 2, 3].iter().cloned().collect();
666 [4, 7, 9, 2, 3].iter().cloned().collect();
673 disco.undecided = Some(undecided);
667 disco.undecided = Some(undecided);
674
668
675 let mut sample = vec![0];
669 let mut sample = vec![0];
676 disco.random_complete_sample(&mut sample, 3);
670 disco.random_complete_sample(&mut sample, 3);
677 assert_eq!(sample.len(), 3);
671 assert_eq!(sample.len(), 3);
678
672
679 let mut sample = vec![2, 4, 7];
673 let mut sample = vec![2, 4, 7];
680 disco.random_complete_sample(&mut sample, 1);
674 disco.random_complete_sample(&mut sample, 1);
681 assert_eq!(sample.len(), 3);
675 assert_eq!(sample.len(), 3);
682 }
676 }
683
677
684 #[test]
678 #[test]
685 fn test_bidirectional_sample() -> Result<(), GraphError> {
679 fn test_bidirectional_sample() -> Result<(), GraphError> {
686 let mut disco = full_disco();
680 let mut disco = full_disco();
687 disco.undecided = Some((0..=13).into_iter().collect());
681 disco.undecided = Some((0..=13).into_iter().collect());
688
682
689 let (sample_set, size) = disco.bidirectional_sample(7)?;
683 let (sample_set, size) = disco.bidirectional_sample(7)?;
690 assert_eq!(size, 7);
684 assert_eq!(size, 7);
691 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
685 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
692 sample.sort();
686 sample.sort();
693 // our DAG is a bit too small for the results to be really interesting
687 // our DAG is a bit too small for the results to be really interesting
694 // at least it shows that
688 // at least it shows that
695 // - we went both ways
689 // - we went both ways
696 // - we didn't take all Revisions (6 is not in the sample)
690 // - we didn't take all Revisions (6 is not in the sample)
697 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
691 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
698 Ok(())
692 Ok(())
699 }
693 }
700
694
701 }
695 }
General Comments 0
You need to be logged in to leave comments. Login now