##// END OF EJS Templates
rust-discovery: type alias for random generator seed...
Georges Racinet -
r44494:2abffea4 default
parent child Browse files
Show More
@@ -1,694 +1,695
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
15 use rand::seq::SliceRandom;
15 use rand::seq::SliceRandom;
16 use rand::{thread_rng, RngCore, SeedableRng};
16 use rand::{thread_rng, RngCore, SeedableRng};
17 use std::cmp::{max, min};
17 use std::cmp::{max, min};
18 use std::collections::{HashSet, VecDeque};
18 use std::collections::{HashSet, VecDeque};
19
19
20 type Rng = rand_pcg::Pcg32;
20 type Rng = rand_pcg::Pcg32;
21 type Seed = [u8; 16];
21
22
22 pub struct PartialDiscovery<G: Graph + Clone> {
23 pub struct PartialDiscovery<G: Graph + Clone> {
23 target_heads: Option<Vec<Revision>>,
24 target_heads: Option<Vec<Revision>>,
24 graph: G, // plays the role of self._repo
25 graph: G, // plays the role of self._repo
25 common: MissingAncestors<G>,
26 common: MissingAncestors<G>,
26 undecided: Option<HashSet<Revision>>,
27 undecided: Option<HashSet<Revision>>,
27 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
28 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
29 rng: Rng,
30 rng: Rng,
30 respect_size: bool,
31 respect_size: bool,
31 randomize: bool,
32 randomize: bool,
32 }
33 }
33
34
34 pub struct DiscoveryStats {
35 pub struct DiscoveryStats {
35 pub undecided: Option<usize>,
36 pub undecided: Option<usize>,
36 }
37 }
37
38
38 /// Update an existing sample to match the expected size
39 /// Update an existing sample to match the expected size
39 ///
40 ///
40 /// The sample is updated with revisions exponentially distant from each
41 /// The sample is updated with revisions exponentially distant from each
41 /// element of `heads`.
42 /// element of `heads`.
42 ///
43 ///
43 /// If a target size is specified, the sampling will stop once this size is
44 /// If a target size is specified, the sampling will stop once this size is
44 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached.
46 /// reached.
46 ///
47 ///
47 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// represented by `parentfn`
49 /// represented by `parentfn`
49 /// - `heads`: set of DAG head revs
50 /// - `heads`: set of DAG head revs
50 /// - `sample`: a sample to update
51 /// - `sample`: a sample to update
51 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `quicksamplesize`: optional target size of the sample
53 /// - `quicksamplesize`: optional target size of the sample
53 fn update_sample<I>(
54 fn update_sample<I>(
54 revs: Option<&HashSet<Revision>>,
55 revs: Option<&HashSet<Revision>>,
55 heads: impl IntoIterator<Item = Revision>,
56 heads: impl IntoIterator<Item = Revision>,
56 sample: &mut HashSet<Revision>,
57 sample: &mut HashSet<Revision>,
57 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 quicksamplesize: Option<usize>,
59 quicksamplesize: Option<usize>,
59 ) -> Result<(), GraphError>
60 ) -> Result<(), GraphError>
60 where
61 where
61 I: Iterator<Item = Revision>,
62 I: Iterator<Item = Revision>,
62 {
63 {
63 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
64 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut factor: u32 = 1;
66 let mut factor: u32 = 1;
66 let mut seen: HashSet<Revision> = HashSet::new();
67 let mut seen: HashSet<Revision> = HashSet::new();
67 while let Some(current) = visit.pop_front() {
68 while let Some(current) = visit.pop_front() {
68 if !seen.insert(current) {
69 if !seen.insert(current) {
69 continue;
70 continue;
70 }
71 }
71
72
72 let d = *distances.entry(current).or_insert(1);
73 let d = *distances.entry(current).or_insert(1);
73 if d > factor {
74 if d > factor {
74 factor *= 2;
75 factor *= 2;
75 }
76 }
76 if d == factor {
77 if d == factor {
77 sample.insert(current);
78 sample.insert(current);
78 if let Some(sz) = quicksamplesize {
79 if let Some(sz) = quicksamplesize {
79 if sample.len() >= sz {
80 if sample.len() >= sz {
80 return Ok(());
81 return Ok(());
81 }
82 }
82 }
83 }
83 }
84 }
84 for p in parentsfn(current)? {
85 for p in parentsfn(current)? {
85 if let Some(revs) = revs {
86 if let Some(revs) = revs {
86 if !revs.contains(&p) {
87 if !revs.contains(&p) {
87 continue;
88 continue;
88 }
89 }
89 }
90 }
90 distances.entry(p).or_insert(d + 1);
91 distances.entry(p).or_insert(d + 1);
91 visit.push_back(p);
92 visit.push_back(p);
92 }
93 }
93 }
94 }
94 Ok(())
95 Ok(())
95 }
96 }
96
97
97 struct ParentsIterator {
98 struct ParentsIterator {
98 parents: [Revision; 2],
99 parents: [Revision; 2],
99 cur: usize,
100 cur: usize,
100 }
101 }
101
102
102 impl ParentsIterator {
103 impl ParentsIterator {
103 fn graph_parents(
104 fn graph_parents(
104 graph: &impl Graph,
105 graph: &impl Graph,
105 r: Revision,
106 r: Revision,
106 ) -> Result<ParentsIterator, GraphError> {
107 ) -> Result<ParentsIterator, GraphError> {
107 Ok(ParentsIterator {
108 Ok(ParentsIterator {
108 parents: graph.parents(r)?,
109 parents: graph.parents(r)?,
109 cur: 0,
110 cur: 0,
110 })
111 })
111 }
112 }
112 }
113 }
113
114
114 impl Iterator for ParentsIterator {
115 impl Iterator for ParentsIterator {
115 type Item = Revision;
116 type Item = Revision;
116
117
117 fn next(&mut self) -> Option<Revision> {
118 fn next(&mut self) -> Option<Revision> {
118 if self.cur > 1 {
119 if self.cur > 1 {
119 return None;
120 return None;
120 }
121 }
121 let rev = self.parents[self.cur];
122 let rev = self.parents[self.cur];
122 self.cur += 1;
123 self.cur += 1;
123 if rev == NULL_REVISION {
124 if rev == NULL_REVISION {
124 return self.next();
125 return self.next();
125 }
126 }
126 Some(rev)
127 Some(rev)
127 }
128 }
128 }
129 }
129
130
130 impl<G: Graph + Clone> PartialDiscovery<G> {
131 impl<G: Graph + Clone> PartialDiscovery<G> {
131 /// Create a PartialDiscovery object, with the intent
132 /// Create a PartialDiscovery object, with the intent
132 /// of comparing our `::<target_heads>` revset to the contents of another
133 /// of comparing our `::<target_heads>` revset to the contents of another
133 /// repo.
134 /// repo.
134 ///
135 ///
135 /// For now `target_heads` is passed as a vector, and will be used
136 /// For now `target_heads` is passed as a vector, and will be used
136 /// at the first call to `ensure_undecided()`.
137 /// at the first call to `ensure_undecided()`.
137 ///
138 ///
138 /// If we want to make the signature more flexible,
139 /// If we want to make the signature more flexible,
139 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
140 /// object since we'll keep it in the meanwhile
141 /// object since we'll keep it in the meanwhile
141 ///
142 ///
142 /// The `respect_size` boolean controls how the sampling methods
143 /// The `respect_size` boolean controls how the sampling methods
143 /// will interpret the size argument requested by the caller. If it's
144 /// will interpret the size argument requested by the caller. If it's
144 /// `false`, they are allowed to produce a sample whose size is more
145 /// `false`, they are allowed to produce a sample whose size is more
145 /// appropriate to the situation (typically bigger).
146 /// appropriate to the situation (typically bigger).
146 ///
147 ///
147 /// The `randomize` boolean affects sampling, and specifically how
148 /// The `randomize` boolean affects sampling, and specifically how
148 /// limiting or last-minute expanding is been done:
149 /// limiting or last-minute expanding is been done:
149 ///
150 ///
150 /// If `true`, both will perform random picking from `self.undecided`.
151 /// If `true`, both will perform random picking from `self.undecided`.
151 /// This is currently the best for actual discoveries.
152 /// This is currently the best for actual discoveries.
152 ///
153 ///
153 /// If `false`, a reproductible picking strategy is performed. This is
154 /// If `false`, a reproductible picking strategy is performed. This is
154 /// useful for integration tests.
155 /// useful for integration tests.
155 pub fn new(
156 pub fn new(
156 graph: G,
157 graph: G,
157 target_heads: Vec<Revision>,
158 target_heads: Vec<Revision>,
158 respect_size: bool,
159 respect_size: bool,
159 randomize: bool,
160 randomize: bool,
160 ) -> Self {
161 ) -> Self {
161 let mut seed: [u8; 16] = [0; 16];
162 let mut seed = [0; 16];
162 if randomize {
163 if randomize {
163 thread_rng().fill_bytes(&mut seed);
164 thread_rng().fill_bytes(&mut seed);
164 }
165 }
165 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
166 }
167 }
167
168
168 pub fn new_with_seed(
169 pub fn new_with_seed(
169 graph: G,
170 graph: G,
170 target_heads: Vec<Revision>,
171 target_heads: Vec<Revision>,
171 seed: [u8; 16],
172 seed: Seed,
172 respect_size: bool,
173 respect_size: bool,
173 randomize: bool,
174 randomize: bool,
174 ) -> Self {
175 ) -> Self {
175 PartialDiscovery {
176 PartialDiscovery {
176 undecided: None,
177 undecided: None,
177 children_cache: None,
178 children_cache: None,
178 target_heads: Some(target_heads),
179 target_heads: Some(target_heads),
179 graph: graph.clone(),
180 graph: graph.clone(),
180 common: MissingAncestors::new(graph, vec![]),
181 common: MissingAncestors::new(graph, vec![]),
181 missing: HashSet::new(),
182 missing: HashSet::new(),
182 rng: Rng::from_seed(seed),
183 rng: Rng::from_seed(seed),
183 respect_size: respect_size,
184 respect_size: respect_size,
184 randomize: randomize,
185 randomize: randomize,
185 }
186 }
186 }
187 }
187
188
188 /// Extract at most `size` random elements from sample and return them
189 /// Extract at most `size` random elements from sample and return them
189 /// as a vector
190 /// as a vector
190 fn limit_sample(
191 fn limit_sample(
191 &mut self,
192 &mut self,
192 mut sample: Vec<Revision>,
193 mut sample: Vec<Revision>,
193 size: usize,
194 size: usize,
194 ) -> Vec<Revision> {
195 ) -> Vec<Revision> {
195 if !self.randomize {
196 if !self.randomize {
196 sample.sort();
197 sample.sort();
197 sample.truncate(size);
198 sample.truncate(size);
198 return sample;
199 return sample;
199 }
200 }
200 let sample_len = sample.len();
201 let sample_len = sample.len();
201 if sample_len <= size {
202 if sample_len <= size {
202 return sample;
203 return sample;
203 }
204 }
204 let rng = &mut self.rng;
205 let rng = &mut self.rng;
205 let dropped_size = sample_len - size;
206 let dropped_size = sample_len - size;
206 let limited_slice = if size < dropped_size {
207 let limited_slice = if size < dropped_size {
207 sample.partial_shuffle(rng, size).0
208 sample.partial_shuffle(rng, size).0
208 } else {
209 } else {
209 sample.partial_shuffle(rng, dropped_size).1
210 sample.partial_shuffle(rng, dropped_size).1
210 };
211 };
211 limited_slice.to_owned()
212 limited_slice.to_owned()
212 }
213 }
213
214
214 /// Register revisions known as being common
215 /// Register revisions known as being common
215 pub fn add_common_revisions(
216 pub fn add_common_revisions(
216 &mut self,
217 &mut self,
217 common: impl IntoIterator<Item = Revision>,
218 common: impl IntoIterator<Item = Revision>,
218 ) -> Result<(), GraphError> {
219 ) -> Result<(), GraphError> {
219 let before_len = self.common.get_bases().len();
220 let before_len = self.common.get_bases().len();
220 self.common.add_bases(common);
221 self.common.add_bases(common);
221 if self.common.get_bases().len() == before_len {
222 if self.common.get_bases().len() == before_len {
222 return Ok(());
223 return Ok(());
223 }
224 }
224 if let Some(ref mut undecided) = self.undecided {
225 if let Some(ref mut undecided) = self.undecided {
225 self.common.remove_ancestors_from(undecided)?;
226 self.common.remove_ancestors_from(undecided)?;
226 }
227 }
227 Ok(())
228 Ok(())
228 }
229 }
229
230
230 /// Register revisions known as being missing
231 /// Register revisions known as being missing
231 ///
232 ///
232 /// # Performance note
233 /// # Performance note
233 ///
234 ///
234 /// Except in the most trivial case, the first call of this method has
235 /// Except in the most trivial case, the first call of this method has
235 /// the side effect of computing `self.undecided` set for the first time,
236 /// the side effect of computing `self.undecided` set for the first time,
236 /// and the related caches it might need for efficiency of its internal
237 /// and the related caches it might need for efficiency of its internal
237 /// computation. This is typically faster if more information is
238 /// computation. This is typically faster if more information is
238 /// available in `self.common`. Therefore, for good performance, the
239 /// available in `self.common`. Therefore, for good performance, the
239 /// caller should avoid calling this too early.
240 /// caller should avoid calling this too early.
240 pub fn add_missing_revisions(
241 pub fn add_missing_revisions(
241 &mut self,
242 &mut self,
242 missing: impl IntoIterator<Item = Revision>,
243 missing: impl IntoIterator<Item = Revision>,
243 ) -> Result<(), GraphError> {
244 ) -> Result<(), GraphError> {
244 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 if tovisit.is_empty() {
246 if tovisit.is_empty() {
246 return Ok(());
247 return Ok(());
247 }
248 }
248 self.ensure_children_cache()?;
249 self.ensure_children_cache()?;
249 self.ensure_undecided()?; // for safety of possible future refactors
250 self.ensure_undecided()?; // for safety of possible future refactors
250 let children = self.children_cache.as_ref().unwrap();
251 let children = self.children_cache.as_ref().unwrap();
251 let mut seen: HashSet<Revision> = HashSet::new();
252 let mut seen: HashSet<Revision> = HashSet::new();
252 let undecided_mut = self.undecided.as_mut().unwrap();
253 let undecided_mut = self.undecided.as_mut().unwrap();
253 while let Some(rev) = tovisit.pop_front() {
254 while let Some(rev) = tovisit.pop_front() {
254 if !self.missing.insert(rev) {
255 if !self.missing.insert(rev) {
255 // either it's known to be missing from a previous
256 // either it's known to be missing from a previous
256 // invocation, and there's no need to iterate on its
257 // invocation, and there's no need to iterate on its
257 // children (we now they are all missing)
258 // children (we now they are all missing)
258 // or it's from a previous iteration of this loop
259 // or it's from a previous iteration of this loop
259 // and its children have already been queued
260 // and its children have already been queued
260 continue;
261 continue;
261 }
262 }
262 undecided_mut.remove(&rev);
263 undecided_mut.remove(&rev);
263 match children.get(&rev) {
264 match children.get(&rev) {
264 None => {
265 None => {
265 continue;
266 continue;
266 }
267 }
267 Some(this_children) => {
268 Some(this_children) => {
268 for child in this_children.iter().cloned() {
269 for child in this_children.iter().cloned() {
269 if seen.insert(child) {
270 if seen.insert(child) {
270 tovisit.push_back(child);
271 tovisit.push_back(child);
271 }
272 }
272 }
273 }
273 }
274 }
274 }
275 }
275 }
276 }
276 Ok(())
277 Ok(())
277 }
278 }
278
279
279 /// Do we have any information about the peer?
280 /// Do we have any information about the peer?
280 pub fn has_info(&self) -> bool {
281 pub fn has_info(&self) -> bool {
281 self.common.has_bases()
282 self.common.has_bases()
282 }
283 }
283
284
284 /// Did we acquire full knowledge of our Revisions that the peer has?
285 /// Did we acquire full knowledge of our Revisions that the peer has?
285 pub fn is_complete(&self) -> bool {
286 pub fn is_complete(&self) -> bool {
286 self.undecided.as_ref().map_or(false, |s| s.is_empty())
287 self.undecided.as_ref().map_or(false, |s| s.is_empty())
287 }
288 }
288
289
289 /// Return the heads of the currently known common set of revisions.
290 /// Return the heads of the currently known common set of revisions.
290 ///
291 ///
291 /// If the discovery process is not complete (see `is_complete()`), the
292 /// If the discovery process is not complete (see `is_complete()`), the
292 /// caller must be aware that this is an intermediate state.
293 /// caller must be aware that this is an intermediate state.
293 ///
294 ///
294 /// On the other hand, if it is complete, then this is currently
295 /// On the other hand, if it is complete, then this is currently
295 /// the only way to retrieve the end results of the discovery process.
296 /// the only way to retrieve the end results of the discovery process.
296 ///
297 ///
297 /// We may introduce in the future an `into_common_heads` call that
298 /// We may introduce in the future an `into_common_heads` call that
298 /// would be more appropriate for normal Rust callers, dropping `self`
299 /// would be more appropriate for normal Rust callers, dropping `self`
299 /// if it is complete.
300 /// if it is complete.
300 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
301 self.common.bases_heads()
302 self.common.bases_heads()
302 }
303 }
303
304
304 /// Force first computation of `self.undecided`
305 /// Force first computation of `self.undecided`
305 ///
306 ///
306 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
307 /// unwrapped to get workable immutable or mutable references without
308 /// unwrapped to get workable immutable or mutable references without
308 /// any panic.
309 /// any panic.
309 ///
310 ///
310 /// This is an imperative call instead of an access with added lazyness
311 /// This is an imperative call instead of an access with added lazyness
311 /// to reduce easily the scope of mutable borrow for the caller,
312 /// to reduce easily the scope of mutable borrow for the caller,
312 /// compared to undecided(&'a mut self) -> &'a… that would keep it
313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
313 /// as long as the resulting immutable one.
314 /// as long as the resulting immutable one.
314 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
315 if self.undecided.is_some() {
316 if self.undecided.is_some() {
316 return Ok(());
317 return Ok(());
317 }
318 }
318 let tgt = self.target_heads.take().unwrap();
319 let tgt = self.target_heads.take().unwrap();
319 self.undecided =
320 self.undecided =
320 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
321 Ok(())
322 Ok(())
322 }
323 }
323
324
324 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
325 if self.children_cache.is_some() {
326 if self.children_cache.is_some() {
326 return Ok(());
327 return Ok(());
327 }
328 }
328 self.ensure_undecided()?;
329 self.ensure_undecided()?;
329
330
330 let mut children: FastHashMap<Revision, Vec<Revision>> =
331 let mut children: FastHashMap<Revision, Vec<Revision>> =
331 FastHashMap::default();
332 FastHashMap::default();
332 for &rev in self.undecided.as_ref().unwrap() {
333 for &rev in self.undecided.as_ref().unwrap() {
333 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
334 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
335 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
335 }
336 }
336 }
337 }
337 self.children_cache = Some(children);
338 self.children_cache = Some(children);
338 Ok(())
339 Ok(())
339 }
340 }
340
341
341 /// Provide statistics about the current state of the discovery process
342 /// Provide statistics about the current state of the discovery process
342 pub fn stats(&self) -> DiscoveryStats {
343 pub fn stats(&self) -> DiscoveryStats {
343 DiscoveryStats {
344 DiscoveryStats {
344 undecided: self.undecided.as_ref().map(|s| s.len()),
345 undecided: self.undecided.as_ref().map(|s| s.len()),
345 }
346 }
346 }
347 }
347
348
348 pub fn take_quick_sample(
349 pub fn take_quick_sample(
349 &mut self,
350 &mut self,
350 headrevs: impl IntoIterator<Item = Revision>,
351 headrevs: impl IntoIterator<Item = Revision>,
351 size: usize,
352 size: usize,
352 ) -> Result<Vec<Revision>, GraphError> {
353 ) -> Result<Vec<Revision>, GraphError> {
353 self.ensure_undecided()?;
354 self.ensure_undecided()?;
354 let mut sample = {
355 let mut sample = {
355 let undecided = self.undecided.as_ref().unwrap();
356 let undecided = self.undecided.as_ref().unwrap();
356 if undecided.len() <= size {
357 if undecided.len() <= size {
357 return Ok(undecided.iter().cloned().collect());
358 return Ok(undecided.iter().cloned().collect());
358 }
359 }
359 dagops::heads(&self.graph, undecided.iter())?
360 dagops::heads(&self.graph, undecided.iter())?
360 };
361 };
361 if sample.len() >= size {
362 if sample.len() >= size {
362 return Ok(self.limit_sample(sample.into_iter().collect(), size));
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
363 }
364 }
364 update_sample(
365 update_sample(
365 None,
366 None,
366 headrevs,
367 headrevs,
367 &mut sample,
368 &mut sample,
368 |r| ParentsIterator::graph_parents(&self.graph, r),
369 |r| ParentsIterator::graph_parents(&self.graph, r),
369 Some(size),
370 Some(size),
370 )?;
371 )?;
371 Ok(sample.into_iter().collect())
372 Ok(sample.into_iter().collect())
372 }
373 }
373
374
374 /// Extract a sample from `self.undecided`, going from its heads and roots.
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
375 ///
376 ///
376 /// The `size` parameter is used to avoid useless computations if
377 /// The `size` parameter is used to avoid useless computations if
377 /// it turns out to be bigger than the whole set of undecided Revisions.
378 /// it turns out to be bigger than the whole set of undecided Revisions.
378 ///
379 ///
379 /// The sample is taken by using `update_sample` from the heads, then
380 /// The sample is taken by using `update_sample` from the heads, then
380 /// from the roots, working on the reverse DAG,
381 /// from the roots, working on the reverse DAG,
381 /// expressed by `self.children_cache`.
382 /// expressed by `self.children_cache`.
382 ///
383 ///
383 /// No effort is being made to complete or limit the sample to `size`
384 /// No effort is being made to complete or limit the sample to `size`
384 /// but this method returns another interesting size that it derives
385 /// but this method returns another interesting size that it derives
385 /// from its knowledge of the structure of the various sets, leaving
386 /// from its knowledge of the structure of the various sets, leaving
386 /// to the caller the decision to use it or not.
387 /// to the caller the decision to use it or not.
387 fn bidirectional_sample(
388 fn bidirectional_sample(
388 &mut self,
389 &mut self,
389 size: usize,
390 size: usize,
390 ) -> Result<(HashSet<Revision>, usize), GraphError> {
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
391 self.ensure_undecided()?;
392 self.ensure_undecided()?;
392 {
393 {
393 // we don't want to compute children_cache before this
394 // we don't want to compute children_cache before this
394 // but doing it after extracting self.undecided takes a mutable
395 // but doing it after extracting self.undecided takes a mutable
395 // ref to self while a shareable one is still active.
396 // ref to self while a shareable one is still active.
396 let undecided = self.undecided.as_ref().unwrap();
397 let undecided = self.undecided.as_ref().unwrap();
397 if undecided.len() <= size {
398 if undecided.len() <= size {
398 return Ok((undecided.clone(), size));
399 return Ok((undecided.clone(), size));
399 }
400 }
400 }
401 }
401
402
402 self.ensure_children_cache()?;
403 self.ensure_children_cache()?;
403 let revs = self.undecided.as_ref().unwrap();
404 let revs = self.undecided.as_ref().unwrap();
404 let mut sample: HashSet<Revision> = revs.clone();
405 let mut sample: HashSet<Revision> = revs.clone();
405
406
406 // it's possible that leveraging the children cache would be more
407 // it's possible that leveraging the children cache would be more
407 // efficient here
408 // efficient here
408 dagops::retain_heads(&self.graph, &mut sample)?;
409 dagops::retain_heads(&self.graph, &mut sample)?;
409 let revsheads = sample.clone(); // was again heads(revs) in python
410 let revsheads = sample.clone(); // was again heads(revs) in python
410
411
411 // update from heads
412 // update from heads
412 update_sample(
413 update_sample(
413 Some(revs),
414 Some(revs),
414 revsheads.iter().cloned(),
415 revsheads.iter().cloned(),
415 &mut sample,
416 &mut sample,
416 |r| ParentsIterator::graph_parents(&self.graph, r),
417 |r| ParentsIterator::graph_parents(&self.graph, r),
417 None,
418 None,
418 )?;
419 )?;
419
420
420 // update from roots
421 // update from roots
421 let revroots: HashSet<Revision> =
422 let revroots: HashSet<Revision> =
422 dagops::roots(&self.graph, revs)?.into_iter().collect();
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
423 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
424
425
425 let children = self.children_cache.as_ref().unwrap();
426 let children = self.children_cache.as_ref().unwrap();
426 let empty_vec: Vec<Revision> = Vec::new();
427 let empty_vec: Vec<Revision> = Vec::new();
427 update_sample(
428 update_sample(
428 Some(revs),
429 Some(revs),
429 revroots,
430 revroots,
430 &mut sample,
431 &mut sample,
431 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
432 None,
433 None,
433 )?;
434 )?;
434 Ok((sample, prescribed_size))
435 Ok((sample, prescribed_size))
435 }
436 }
436
437
437 /// Fill up sample up to the wished size with random undecided Revisions.
438 /// Fill up sample up to the wished size with random undecided Revisions.
438 ///
439 ///
439 /// This is intended to be used as a last resort completion if the
440 /// This is intended to be used as a last resort completion if the
440 /// regular sampling algorithm returns too few elements.
441 /// regular sampling algorithm returns too few elements.
441 fn random_complete_sample(
442 fn random_complete_sample(
442 &mut self,
443 &mut self,
443 sample: &mut Vec<Revision>,
444 sample: &mut Vec<Revision>,
444 size: usize,
445 size: usize,
445 ) {
446 ) {
446 let sample_len = sample.len();
447 let sample_len = sample.len();
447 if size <= sample_len {
448 if size <= sample_len {
448 return;
449 return;
449 }
450 }
450 let take_from: Vec<Revision> = self
451 let take_from: Vec<Revision> = self
451 .undecided
452 .undecided
452 .as_ref()
453 .as_ref()
453 .unwrap()
454 .unwrap()
454 .iter()
455 .iter()
455 .filter(|&r| !sample.contains(r))
456 .filter(|&r| !sample.contains(r))
456 .cloned()
457 .cloned()
457 .collect();
458 .collect();
458 sample.extend(self.limit_sample(take_from, size - sample_len));
459 sample.extend(self.limit_sample(take_from, size - sample_len));
459 }
460 }
460
461
461 pub fn take_full_sample(
462 pub fn take_full_sample(
462 &mut self,
463 &mut self,
463 size: usize,
464 size: usize,
464 ) -> Result<Vec<Revision>, GraphError> {
465 ) -> Result<Vec<Revision>, GraphError> {
465 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
466 let size = if self.respect_size {
467 let size = if self.respect_size {
467 size
468 size
468 } else {
469 } else {
469 prescribed_size
470 prescribed_size
470 };
471 };
471 let mut sample =
472 let mut sample =
472 self.limit_sample(sample_set.into_iter().collect(), size);
473 self.limit_sample(sample_set.into_iter().collect(), size);
473 self.random_complete_sample(&mut sample, size);
474 self.random_complete_sample(&mut sample, size);
474 Ok(sample)
475 Ok(sample)
475 }
476 }
476 }
477 }
477
478
478 #[cfg(test)]
479 #[cfg(test)]
479 mod tests {
480 mod tests {
480 use super::*;
481 use super::*;
481 use crate::testing::SampleGraph;
482 use crate::testing::SampleGraph;
482
483
483 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
484 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
484 ///
485 ///
485 /// To avoid actual randomness in these tests, we give it a fixed
486 /// To avoid actual randomness in these tests, we give it a fixed
486 /// random seed, but by default we'll test the random version.
487 /// random seed, but by default we'll test the random version.
487 fn full_disco() -> PartialDiscovery<SampleGraph> {
488 fn full_disco() -> PartialDiscovery<SampleGraph> {
488 PartialDiscovery::new_with_seed(
489 PartialDiscovery::new_with_seed(
489 SampleGraph,
490 SampleGraph,
490 vec![10, 11, 12, 13],
491 vec![10, 11, 12, 13],
491 [0; 16],
492 [0; 16],
492 true,
493 true,
493 true,
494 true,
494 )
495 )
495 }
496 }
496
497
497 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
498 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
498 ///
499 ///
499 /// To avoid actual randomness in tests, we give it a fixed random seed.
500 /// To avoid actual randomness in tests, we give it a fixed random seed.
500 fn disco12() -> PartialDiscovery<SampleGraph> {
501 fn disco12() -> PartialDiscovery<SampleGraph> {
501 PartialDiscovery::new_with_seed(
502 PartialDiscovery::new_with_seed(
502 SampleGraph,
503 SampleGraph,
503 vec![12],
504 vec![12],
504 [0; 16],
505 [0; 16],
505 true,
506 true,
506 true,
507 true,
507 )
508 )
508 }
509 }
509
510
510 fn sorted_undecided(
511 fn sorted_undecided(
511 disco: &PartialDiscovery<SampleGraph>,
512 disco: &PartialDiscovery<SampleGraph>,
512 ) -> Vec<Revision> {
513 ) -> Vec<Revision> {
513 let mut as_vec: Vec<Revision> =
514 let mut as_vec: Vec<Revision> =
514 disco.undecided.as_ref().unwrap().iter().cloned().collect();
515 disco.undecided.as_ref().unwrap().iter().cloned().collect();
515 as_vec.sort();
516 as_vec.sort();
516 as_vec
517 as_vec
517 }
518 }
518
519
519 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
520 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
520 let mut as_vec: Vec<Revision> =
521 let mut as_vec: Vec<Revision> =
521 disco.missing.iter().cloned().collect();
522 disco.missing.iter().cloned().collect();
522 as_vec.sort();
523 as_vec.sort();
523 as_vec
524 as_vec
524 }
525 }
525
526
526 fn sorted_common_heads(
527 fn sorted_common_heads(
527 disco: &PartialDiscovery<SampleGraph>,
528 disco: &PartialDiscovery<SampleGraph>,
528 ) -> Result<Vec<Revision>, GraphError> {
529 ) -> Result<Vec<Revision>, GraphError> {
529 let mut as_vec: Vec<Revision> =
530 let mut as_vec: Vec<Revision> =
530 disco.common_heads()?.iter().cloned().collect();
531 disco.common_heads()?.iter().cloned().collect();
531 as_vec.sort();
532 as_vec.sort();
532 Ok(as_vec)
533 Ok(as_vec)
533 }
534 }
534
535
535 #[test]
536 #[test]
536 fn test_add_common_get_undecided() -> Result<(), GraphError> {
537 fn test_add_common_get_undecided() -> Result<(), GraphError> {
537 let mut disco = full_disco();
538 let mut disco = full_disco();
538 assert_eq!(disco.undecided, None);
539 assert_eq!(disco.undecided, None);
539 assert!(!disco.has_info());
540 assert!(!disco.has_info());
540 assert_eq!(disco.stats().undecided, None);
541 assert_eq!(disco.stats().undecided, None);
541
542
542 disco.add_common_revisions(vec![11, 12])?;
543 disco.add_common_revisions(vec![11, 12])?;
543 assert!(disco.has_info());
544 assert!(disco.has_info());
544 assert!(!disco.is_complete());
545 assert!(!disco.is_complete());
545 assert!(disco.missing.is_empty());
546 assert!(disco.missing.is_empty());
546
547
547 // add_common_revisions did not trigger a premature computation
548 // add_common_revisions did not trigger a premature computation
548 // of `undecided`, let's check that and ask for them
549 // of `undecided`, let's check that and ask for them
549 assert_eq!(disco.undecided, None);
550 assert_eq!(disco.undecided, None);
550 disco.ensure_undecided()?;
551 disco.ensure_undecided()?;
551 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
552 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
552 assert_eq!(disco.stats().undecided, Some(4));
553 assert_eq!(disco.stats().undecided, Some(4));
553 Ok(())
554 Ok(())
554 }
555 }
555
556
556 /// in this test, we pretend that our peer misses exactly (8+10)::
557 /// in this test, we pretend that our peer misses exactly (8+10)::
557 /// and we're comparing all our repo to it (as in a bare push)
558 /// and we're comparing all our repo to it (as in a bare push)
558 #[test]
559 #[test]
559 fn test_discovery() -> Result<(), GraphError> {
560 fn test_discovery() -> Result<(), GraphError> {
560 let mut disco = full_disco();
561 let mut disco = full_disco();
561 disco.add_common_revisions(vec![11, 12])?;
562 disco.add_common_revisions(vec![11, 12])?;
562 disco.add_missing_revisions(vec![8, 10])?;
563 disco.add_missing_revisions(vec![8, 10])?;
563 assert_eq!(sorted_undecided(&disco), vec![5]);
564 assert_eq!(sorted_undecided(&disco), vec![5]);
564 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
565 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
565 assert!(!disco.is_complete());
566 assert!(!disco.is_complete());
566
567
567 disco.add_common_revisions(vec![5])?;
568 disco.add_common_revisions(vec![5])?;
568 assert_eq!(sorted_undecided(&disco), vec![]);
569 assert_eq!(sorted_undecided(&disco), vec![]);
569 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
570 assert!(disco.is_complete());
571 assert!(disco.is_complete());
571 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
572 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
572 Ok(())
573 Ok(())
573 }
574 }
574
575
575 #[test]
576 #[test]
576 fn test_add_missing_early_continue() -> Result<(), GraphError> {
577 fn test_add_missing_early_continue() -> Result<(), GraphError> {
577 eprintln!("test_add_missing_early_stop");
578 eprintln!("test_add_missing_early_stop");
578 let mut disco = full_disco();
579 let mut disco = full_disco();
579 disco.add_common_revisions(vec![13, 3, 4])?;
580 disco.add_common_revisions(vec![13, 3, 4])?;
580 disco.ensure_children_cache()?;
581 disco.ensure_children_cache()?;
581 // 12 is grand-child of 6 through 9
582 // 12 is grand-child of 6 through 9
582 // passing them in this order maximizes the chances of the
583 // passing them in this order maximizes the chances of the
583 // early continue to do the wrong thing
584 // early continue to do the wrong thing
584 disco.add_missing_revisions(vec![6, 9, 12])?;
585 disco.add_missing_revisions(vec![6, 9, 12])?;
585 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
586 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
586 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
587 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
587 assert!(!disco.is_complete());
588 assert!(!disco.is_complete());
588 Ok(())
589 Ok(())
589 }
590 }
590
591
591 #[test]
592 #[test]
592 fn test_limit_sample_no_need_to() {
593 fn test_limit_sample_no_need_to() {
593 let sample = vec![1, 2, 3, 4];
594 let sample = vec![1, 2, 3, 4];
594 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
595 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
595 }
596 }
596
597
597 #[test]
598 #[test]
598 fn test_limit_sample_less_than_half() {
599 fn test_limit_sample_less_than_half() {
599 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
600 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
600 }
601 }
601
602
602 #[test]
603 #[test]
603 fn test_limit_sample_more_than_half() {
604 fn test_limit_sample_more_than_half() {
604 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
605 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
605 }
606 }
606
607
607 #[test]
608 #[test]
608 fn test_limit_sample_no_random() {
609 fn test_limit_sample_no_random() {
609 let mut disco = full_disco();
610 let mut disco = full_disco();
610 disco.randomize = false;
611 disco.randomize = false;
611 assert_eq!(
612 assert_eq!(
612 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
613 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
613 vec![1, 3, 5, 7]
614 vec![1, 3, 5, 7]
614 );
615 );
615 }
616 }
616
617
617 #[test]
618 #[test]
618 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
619 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
619 let mut disco = full_disco();
620 let mut disco = full_disco();
620 disco.undecided = Some((1..=13).collect());
621 disco.undecided = Some((1..=13).collect());
621
622
622 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
623 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
623 sample_vec.sort();
624 sample_vec.sort();
624 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
625 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
625 Ok(())
626 Ok(())
626 }
627 }
627
628
628 #[test]
629 #[test]
629 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
630 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
630 let mut disco = disco12();
631 let mut disco = disco12();
631 disco.ensure_undecided()?;
632 disco.ensure_undecided()?;
632
633
633 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
634 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
634 sample_vec.sort();
635 sample_vec.sort();
635 // r12's only parent is r9, whose unique grand-parent through the
636 // r12's only parent is r9, whose unique grand-parent through the
636 // diamond shape is r4. This ends there because the distance from r4
637 // diamond shape is r4. This ends there because the distance from r4
637 // to the root is only 3.
638 // to the root is only 3.
638 assert_eq!(sample_vec, vec![4, 9, 12]);
639 assert_eq!(sample_vec, vec![4, 9, 12]);
639 Ok(())
640 Ok(())
640 }
641 }
641
642
642 #[test]
643 #[test]
643 fn test_children_cache() -> Result<(), GraphError> {
644 fn test_children_cache() -> Result<(), GraphError> {
644 let mut disco = full_disco();
645 let mut disco = full_disco();
645 disco.ensure_children_cache()?;
646 disco.ensure_children_cache()?;
646
647
647 let cache = disco.children_cache.unwrap();
648 let cache = disco.children_cache.unwrap();
648 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
649 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
649 assert_eq!(cache.get(&10).cloned(), None);
650 assert_eq!(cache.get(&10).cloned(), None);
650
651
651 let mut children_4 = cache.get(&4).cloned().unwrap();
652 let mut children_4 = cache.get(&4).cloned().unwrap();
652 children_4.sort();
653 children_4.sort();
653 assert_eq!(children_4, vec![5, 6, 7]);
654 assert_eq!(children_4, vec![5, 6, 7]);
654
655
655 let mut children_7 = cache.get(&7).cloned().unwrap();
656 let mut children_7 = cache.get(&7).cloned().unwrap();
656 children_7.sort();
657 children_7.sort();
657 assert_eq!(children_7, vec![9, 11]);
658 assert_eq!(children_7, vec![9, 11]);
658
659
659 Ok(())
660 Ok(())
660 }
661 }
661
662
662 #[test]
663 #[test]
663 fn test_complete_sample() {
664 fn test_complete_sample() {
664 let mut disco = full_disco();
665 let mut disco = full_disco();
665 let undecided: HashSet<Revision> =
666 let undecided: HashSet<Revision> =
666 [4, 7, 9, 2, 3].iter().cloned().collect();
667 [4, 7, 9, 2, 3].iter().cloned().collect();
667 disco.undecided = Some(undecided);
668 disco.undecided = Some(undecided);
668
669
669 let mut sample = vec![0];
670 let mut sample = vec![0];
670 disco.random_complete_sample(&mut sample, 3);
671 disco.random_complete_sample(&mut sample, 3);
671 assert_eq!(sample.len(), 3);
672 assert_eq!(sample.len(), 3);
672
673
673 let mut sample = vec![2, 4, 7];
674 let mut sample = vec![2, 4, 7];
674 disco.random_complete_sample(&mut sample, 1);
675 disco.random_complete_sample(&mut sample, 1);
675 assert_eq!(sample.len(), 3);
676 assert_eq!(sample.len(), 3);
676 }
677 }
677
678
678 #[test]
679 #[test]
679 fn test_bidirectional_sample() -> Result<(), GraphError> {
680 fn test_bidirectional_sample() -> Result<(), GraphError> {
680 let mut disco = full_disco();
681 let mut disco = full_disco();
681 disco.undecided = Some((0..=13).into_iter().collect());
682 disco.undecided = Some((0..=13).into_iter().collect());
682
683
683 let (sample_set, size) = disco.bidirectional_sample(7)?;
684 let (sample_set, size) = disco.bidirectional_sample(7)?;
684 assert_eq!(size, 7);
685 assert_eq!(size, 7);
685 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
686 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
686 sample.sort();
687 sample.sort();
687 // our DAG is a bit too small for the results to be really interesting
688 // our DAG is a bit too small for the results to be really interesting
688 // at least it shows that
689 // at least it shows that
689 // - we went both ways
690 // - we went both ways
690 // - we didn't take all Revisions (6 is not in the sample)
691 // - we didn't take all Revisions (6 is not in the sample)
691 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
692 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
692 Ok(())
693 Ok(())
693 }
694 }
694 }
695 }
General Comments 0
You need to be logged in to leave comments. Login now