##// END OF EJS Templates
rust-discovery: remove useless extern crate
Yuya Nishihara -
r43031:04c3b76f default
parent child Browse files
Show More
@@ -1,703 +1,701 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 extern crate rand;
14 extern crate rand_pcg;
15 use self::rand::seq::SliceRandom;
16 use self::rand::{thread_rng, RngCore, SeedableRng};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
13 use super::{Graph, GraphError, Revision, NULL_REVISION};
18 use crate::ancestors::MissingAncestors;
14 use crate::ancestors::MissingAncestors;
19 use crate::dagops;
15 use crate::dagops;
16 use rand::seq::SliceRandom;
17 use rand::{thread_rng, RngCore, SeedableRng};
20 use std::cmp::{max, min};
18 use std::cmp::{max, min};
21 use std::collections::{HashMap, HashSet, VecDeque};
19 use std::collections::{HashMap, HashSet, VecDeque};
22
20
23 type Rng = self::rand_pcg::Pcg32;
21 type Rng = rand_pcg::Pcg32;
24
22
25 pub struct PartialDiscovery<G: Graph + Clone> {
23 pub struct PartialDiscovery<G: Graph + Clone> {
26 target_heads: Option<Vec<Revision>>,
24 target_heads: Option<Vec<Revision>>,
27 graph: G, // plays the role of self._repo
25 graph: G, // plays the role of self._repo
28 common: MissingAncestors<G>,
26 common: MissingAncestors<G>,
29 undecided: Option<HashSet<Revision>>,
27 undecided: Option<HashSet<Revision>>,
30 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
28 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
31 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
32 rng: Rng,
30 rng: Rng,
33 respect_size: bool,
31 respect_size: bool,
34 randomize: bool,
32 randomize: bool,
35 }
33 }
36
34
37 pub struct DiscoveryStats {
35 pub struct DiscoveryStats {
38 pub undecided: Option<usize>,
36 pub undecided: Option<usize>,
39 }
37 }
40
38
41 /// Update an existing sample to match the expected size
39 /// Update an existing sample to match the expected size
42 ///
40 ///
43 /// The sample is updated with revisions exponentially distant from each
41 /// The sample is updated with revisions exponentially distant from each
44 /// element of `heads`.
42 /// element of `heads`.
45 ///
43 ///
46 /// If a target size is specified, the sampling will stop once this size is
44 /// If a target size is specified, the sampling will stop once this size is
47 /// reached. Otherwise sampling will happen until roots of the <revs> set are
45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
48 /// reached.
46 /// reached.
49 ///
47 ///
50 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
51 /// represented by `parentfn`
49 /// represented by `parentfn`
52 /// - `heads`: set of DAG head revs
50 /// - `heads`: set of DAG head revs
53 /// - `sample`: a sample to update
51 /// - `sample`: a sample to update
54 /// - `parentfn`: a callable to resolve parents for a revision
52 /// - `parentfn`: a callable to resolve parents for a revision
55 /// - `quicksamplesize`: optional target size of the sample
53 /// - `quicksamplesize`: optional target size of the sample
56 fn update_sample<I>(
54 fn update_sample<I>(
57 revs: Option<&HashSet<Revision>>,
55 revs: Option<&HashSet<Revision>>,
58 heads: impl IntoIterator<Item = Revision>,
56 heads: impl IntoIterator<Item = Revision>,
59 sample: &mut HashSet<Revision>,
57 sample: &mut HashSet<Revision>,
60 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
61 quicksamplesize: Option<usize>,
59 quicksamplesize: Option<usize>,
62 ) -> Result<(), GraphError>
60 ) -> Result<(), GraphError>
63 where
61 where
64 I: Iterator<Item = Revision>,
62 I: Iterator<Item = Revision>,
65 {
63 {
66 let mut distances: HashMap<Revision, u32> = HashMap::new();
64 let mut distances: HashMap<Revision, u32> = HashMap::new();
67 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
68 let mut factor: u32 = 1;
66 let mut factor: u32 = 1;
69 let mut seen: HashSet<Revision> = HashSet::new();
67 let mut seen: HashSet<Revision> = HashSet::new();
70 loop {
68 loop {
71 let current = match visit.pop_front() {
69 let current = match visit.pop_front() {
72 None => {
70 None => {
73 break;
71 break;
74 }
72 }
75 Some(r) => r,
73 Some(r) => r,
76 };
74 };
77 if !seen.insert(current) {
75 if !seen.insert(current) {
78 continue;
76 continue;
79 }
77 }
80
78
81 let d = *distances.entry(current).or_insert(1);
79 let d = *distances.entry(current).or_insert(1);
82 if d > factor {
80 if d > factor {
83 factor *= 2;
81 factor *= 2;
84 }
82 }
85 if d == factor {
83 if d == factor {
86 sample.insert(current);
84 sample.insert(current);
87 if let Some(sz) = quicksamplesize {
85 if let Some(sz) = quicksamplesize {
88 if sample.len() >= sz {
86 if sample.len() >= sz {
89 return Ok(());
87 return Ok(());
90 }
88 }
91 }
89 }
92 }
90 }
93 for p in parentsfn(current)? {
91 for p in parentsfn(current)? {
94 if let Some(revs) = revs {
92 if let Some(revs) = revs {
95 if !revs.contains(&p) {
93 if !revs.contains(&p) {
96 continue;
94 continue;
97 }
95 }
98 }
96 }
99 distances.entry(p).or_insert(d + 1);
97 distances.entry(p).or_insert(d + 1);
100 visit.push_back(p);
98 visit.push_back(p);
101 }
99 }
102 }
100 }
103 Ok(())
101 Ok(())
104 }
102 }
105
103
106 struct ParentsIterator {
104 struct ParentsIterator {
107 parents: [Revision; 2],
105 parents: [Revision; 2],
108 cur: usize,
106 cur: usize,
109 }
107 }
110
108
111 impl ParentsIterator {
109 impl ParentsIterator {
112 fn graph_parents(
110 fn graph_parents(
113 graph: &impl Graph,
111 graph: &impl Graph,
114 r: Revision,
112 r: Revision,
115 ) -> Result<ParentsIterator, GraphError> {
113 ) -> Result<ParentsIterator, GraphError> {
116 Ok(ParentsIterator {
114 Ok(ParentsIterator {
117 parents: graph.parents(r)?,
115 parents: graph.parents(r)?,
118 cur: 0,
116 cur: 0,
119 })
117 })
120 }
118 }
121 }
119 }
122
120
123 impl Iterator for ParentsIterator {
121 impl Iterator for ParentsIterator {
124 type Item = Revision;
122 type Item = Revision;
125
123
126 fn next(&mut self) -> Option<Revision> {
124 fn next(&mut self) -> Option<Revision> {
127 if self.cur > 1 {
125 if self.cur > 1 {
128 return None;
126 return None;
129 }
127 }
130 let rev = self.parents[self.cur];
128 let rev = self.parents[self.cur];
131 self.cur += 1;
129 self.cur += 1;
132 if rev == NULL_REVISION {
130 if rev == NULL_REVISION {
133 return self.next();
131 return self.next();
134 }
132 }
135 Some(rev)
133 Some(rev)
136 }
134 }
137 }
135 }
138
136
139 impl<G: Graph + Clone> PartialDiscovery<G> {
137 impl<G: Graph + Clone> PartialDiscovery<G> {
140 /// Create a PartialDiscovery object, with the intent
138 /// Create a PartialDiscovery object, with the intent
141 /// of comparing our `::<target_heads>` revset to the contents of another
139 /// of comparing our `::<target_heads>` revset to the contents of another
142 /// repo.
140 /// repo.
143 ///
141 ///
144 /// For now `target_heads` is passed as a vector, and will be used
142 /// For now `target_heads` is passed as a vector, and will be used
145 /// at the first call to `ensure_undecided()`.
143 /// at the first call to `ensure_undecided()`.
146 ///
144 ///
147 /// If we want to make the signature more flexible,
145 /// If we want to make the signature more flexible,
148 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
146 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
149 /// object since we'll keep it in the meanwhile
147 /// object since we'll keep it in the meanwhile
150 ///
148 ///
151 /// The `respect_size` boolean controls how the sampling methods
149 /// The `respect_size` boolean controls how the sampling methods
152 /// will interpret the size argument requested by the caller. If it's
150 /// will interpret the size argument requested by the caller. If it's
153 /// `false`, they are allowed to produce a sample whose size is more
151 /// `false`, they are allowed to produce a sample whose size is more
154 /// appropriate to the situation (typically bigger).
152 /// appropriate to the situation (typically bigger).
155 ///
153 ///
156 /// The `randomize` boolean affects sampling, and specifically how
154 /// The `randomize` boolean affects sampling, and specifically how
157 /// limiting or last-minute expanding is been done:
155 /// limiting or last-minute expanding is been done:
158 ///
156 ///
159 /// If `true`, both will perform random picking from `self.undecided`.
157 /// If `true`, both will perform random picking from `self.undecided`.
160 /// This is currently the best for actual discoveries.
158 /// This is currently the best for actual discoveries.
161 ///
159 ///
162 /// If `false`, a reproductible picking strategy is performed. This is
160 /// If `false`, a reproductible picking strategy is performed. This is
163 /// useful for integration tests.
161 /// useful for integration tests.
164 pub fn new(
162 pub fn new(
165 graph: G,
163 graph: G,
166 target_heads: Vec<Revision>,
164 target_heads: Vec<Revision>,
167 respect_size: bool,
165 respect_size: bool,
168 randomize: bool,
166 randomize: bool,
169 ) -> Self {
167 ) -> Self {
170 let mut seed: [u8; 16] = [0; 16];
168 let mut seed: [u8; 16] = [0; 16];
171 if randomize {
169 if randomize {
172 thread_rng().fill_bytes(&mut seed);
170 thread_rng().fill_bytes(&mut seed);
173 }
171 }
174 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
172 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
175 }
173 }
176
174
177 pub fn new_with_seed(
175 pub fn new_with_seed(
178 graph: G,
176 graph: G,
179 target_heads: Vec<Revision>,
177 target_heads: Vec<Revision>,
180 seed: [u8; 16],
178 seed: [u8; 16],
181 respect_size: bool,
179 respect_size: bool,
182 randomize: bool,
180 randomize: bool,
183 ) -> Self {
181 ) -> Self {
184 PartialDiscovery {
182 PartialDiscovery {
185 undecided: None,
183 undecided: None,
186 children_cache: None,
184 children_cache: None,
187 target_heads: Some(target_heads),
185 target_heads: Some(target_heads),
188 graph: graph.clone(),
186 graph: graph.clone(),
189 common: MissingAncestors::new(graph, vec![]),
187 common: MissingAncestors::new(graph, vec![]),
190 missing: HashSet::new(),
188 missing: HashSet::new(),
191 rng: Rng::from_seed(seed),
189 rng: Rng::from_seed(seed),
192 respect_size: respect_size,
190 respect_size: respect_size,
193 randomize: randomize,
191 randomize: randomize,
194 }
192 }
195 }
193 }
196
194
197 /// Extract at most `size` random elements from sample and return them
195 /// Extract at most `size` random elements from sample and return them
198 /// as a vector
196 /// as a vector
199 fn limit_sample(
197 fn limit_sample(
200 &mut self,
198 &mut self,
201 mut sample: Vec<Revision>,
199 mut sample: Vec<Revision>,
202 size: usize,
200 size: usize,
203 ) -> Vec<Revision> {
201 ) -> Vec<Revision> {
204 if !self.randomize {
202 if !self.randomize {
205 sample.sort();
203 sample.sort();
206 sample.truncate(size);
204 sample.truncate(size);
207 return sample;
205 return sample;
208 }
206 }
209 let sample_len = sample.len();
207 let sample_len = sample.len();
210 if sample_len <= size {
208 if sample_len <= size {
211 return sample;
209 return sample;
212 }
210 }
213 let rng = &mut self.rng;
211 let rng = &mut self.rng;
214 let dropped_size = sample_len - size;
212 let dropped_size = sample_len - size;
215 let limited_slice = if size < dropped_size {
213 let limited_slice = if size < dropped_size {
216 sample.partial_shuffle(rng, size).0
214 sample.partial_shuffle(rng, size).0
217 } else {
215 } else {
218 sample.partial_shuffle(rng, dropped_size).1
216 sample.partial_shuffle(rng, dropped_size).1
219 };
217 };
220 limited_slice.to_owned()
218 limited_slice.to_owned()
221 }
219 }
222
220
223 /// Register revisions known as being common
221 /// Register revisions known as being common
224 pub fn add_common_revisions(
222 pub fn add_common_revisions(
225 &mut self,
223 &mut self,
226 common: impl IntoIterator<Item = Revision>,
224 common: impl IntoIterator<Item = Revision>,
227 ) -> Result<(), GraphError> {
225 ) -> Result<(), GraphError> {
228 let before_len = self.common.get_bases().len();
226 let before_len = self.common.get_bases().len();
229 self.common.add_bases(common);
227 self.common.add_bases(common);
230 if self.common.get_bases().len() == before_len {
228 if self.common.get_bases().len() == before_len {
231 return Ok(());
229 return Ok(());
232 }
230 }
233 if let Some(ref mut undecided) = self.undecided {
231 if let Some(ref mut undecided) = self.undecided {
234 self.common.remove_ancestors_from(undecided)?;
232 self.common.remove_ancestors_from(undecided)?;
235 }
233 }
236 Ok(())
234 Ok(())
237 }
235 }
238
236
239 /// Register revisions known as being missing
237 /// Register revisions known as being missing
240 ///
238 ///
241 /// # Performance note
239 /// # Performance note
242 ///
240 ///
243 /// Except in the most trivial case, the first call of this method has
241 /// Except in the most trivial case, the first call of this method has
244 /// the side effect of computing `self.undecided` set for the first time,
242 /// the side effect of computing `self.undecided` set for the first time,
245 /// and the related caches it might need for efficiency of its internal
243 /// and the related caches it might need for efficiency of its internal
246 /// computation. This is typically faster if more information is
244 /// computation. This is typically faster if more information is
247 /// available in `self.common`. Therefore, for good performance, the
245 /// available in `self.common`. Therefore, for good performance, the
248 /// caller should avoid calling this too early.
246 /// caller should avoid calling this too early.
249 pub fn add_missing_revisions(
247 pub fn add_missing_revisions(
250 &mut self,
248 &mut self,
251 missing: impl IntoIterator<Item = Revision>,
249 missing: impl IntoIterator<Item = Revision>,
252 ) -> Result<(), GraphError> {
250 ) -> Result<(), GraphError> {
253 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
251 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
254 if tovisit.is_empty() {
252 if tovisit.is_empty() {
255 return Ok(());
253 return Ok(());
256 }
254 }
257 self.ensure_children_cache()?;
255 self.ensure_children_cache()?;
258 self.ensure_undecided()?; // for safety of possible future refactors
256 self.ensure_undecided()?; // for safety of possible future refactors
259 let children = self.children_cache.as_ref().unwrap();
257 let children = self.children_cache.as_ref().unwrap();
260 let mut seen: HashSet<Revision> = HashSet::new();
258 let mut seen: HashSet<Revision> = HashSet::new();
261 let undecided_mut = self.undecided.as_mut().unwrap();
259 let undecided_mut = self.undecided.as_mut().unwrap();
262 while let Some(rev) = tovisit.pop_front() {
260 while let Some(rev) = tovisit.pop_front() {
263 if !self.missing.insert(rev) {
261 if !self.missing.insert(rev) {
264 // either it's known to be missing from a previous
262 // either it's known to be missing from a previous
265 // invocation, and there's no need to iterate on its
263 // invocation, and there's no need to iterate on its
266 // children (we now they are all missing)
264 // children (we now they are all missing)
267 // or it's from a previous iteration of this loop
265 // or it's from a previous iteration of this loop
268 // and its children have already been queued
266 // and its children have already been queued
269 continue;
267 continue;
270 }
268 }
271 undecided_mut.remove(&rev);
269 undecided_mut.remove(&rev);
272 match children.get(&rev) {
270 match children.get(&rev) {
273 None => {
271 None => {
274 continue;
272 continue;
275 }
273 }
276 Some(this_children) => {
274 Some(this_children) => {
277 for child in this_children.iter().cloned() {
275 for child in this_children.iter().cloned() {
278 if seen.insert(child) {
276 if seen.insert(child) {
279 tovisit.push_back(child);
277 tovisit.push_back(child);
280 }
278 }
281 }
279 }
282 }
280 }
283 }
281 }
284 }
282 }
285 Ok(())
283 Ok(())
286 }
284 }
287
285
288 /// Do we have any information about the peer?
286 /// Do we have any information about the peer?
289 pub fn has_info(&self) -> bool {
287 pub fn has_info(&self) -> bool {
290 self.common.has_bases()
288 self.common.has_bases()
291 }
289 }
292
290
293 /// Did we acquire full knowledge of our Revisions that the peer has?
291 /// Did we acquire full knowledge of our Revisions that the peer has?
294 pub fn is_complete(&self) -> bool {
292 pub fn is_complete(&self) -> bool {
295 self.undecided.as_ref().map_or(false, |s| s.is_empty())
293 self.undecided.as_ref().map_or(false, |s| s.is_empty())
296 }
294 }
297
295
298 /// Return the heads of the currently known common set of revisions.
296 /// Return the heads of the currently known common set of revisions.
299 ///
297 ///
300 /// If the discovery process is not complete (see `is_complete()`), the
298 /// If the discovery process is not complete (see `is_complete()`), the
301 /// caller must be aware that this is an intermediate state.
299 /// caller must be aware that this is an intermediate state.
302 ///
300 ///
303 /// On the other hand, if it is complete, then this is currently
301 /// On the other hand, if it is complete, then this is currently
304 /// the only way to retrieve the end results of the discovery process.
302 /// the only way to retrieve the end results of the discovery process.
305 ///
303 ///
306 /// We may introduce in the future an `into_common_heads` call that
304 /// We may introduce in the future an `into_common_heads` call that
307 /// would be more appropriate for normal Rust callers, dropping `self`
305 /// would be more appropriate for normal Rust callers, dropping `self`
308 /// if it is complete.
306 /// if it is complete.
309 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
307 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
310 self.common.bases_heads()
308 self.common.bases_heads()
311 }
309 }
312
310
313 /// Force first computation of `self.undecided`
311 /// Force first computation of `self.undecided`
314 ///
312 ///
315 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
313 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
316 /// unwrapped to get workable immutable or mutable references without
314 /// unwrapped to get workable immutable or mutable references without
317 /// any panic.
315 /// any panic.
318 ///
316 ///
319 /// This is an imperative call instead of an access with added lazyness
317 /// This is an imperative call instead of an access with added lazyness
320 /// to reduce easily the scope of mutable borrow for the caller,
318 /// to reduce easily the scope of mutable borrow for the caller,
321 /// compared to undecided(&'a mut self) -> &'a… that would keep it
319 /// compared to undecided(&'a mut self) -> &'a… that would keep it
322 /// as long as the resulting immutable one.
320 /// as long as the resulting immutable one.
323 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
321 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
324 if self.undecided.is_some() {
322 if self.undecided.is_some() {
325 return Ok(());
323 return Ok(());
326 }
324 }
327 let tgt = self.target_heads.take().unwrap();
325 let tgt = self.target_heads.take().unwrap();
328 self.undecided =
326 self.undecided =
329 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
327 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
330 Ok(())
328 Ok(())
331 }
329 }
332
330
333 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
331 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
334 if self.children_cache.is_some() {
332 if self.children_cache.is_some() {
335 return Ok(());
333 return Ok(());
336 }
334 }
337 self.ensure_undecided()?;
335 self.ensure_undecided()?;
338
336
339 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
337 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
340 for &rev in self.undecided.as_ref().unwrap() {
338 for &rev in self.undecided.as_ref().unwrap() {
341 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
339 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
342 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
340 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
343 }
341 }
344 }
342 }
345 self.children_cache = Some(children);
343 self.children_cache = Some(children);
346 Ok(())
344 Ok(())
347 }
345 }
348
346
349 /// Provide statistics about the current state of the discovery process
347 /// Provide statistics about the current state of the discovery process
350 pub fn stats(&self) -> DiscoveryStats {
348 pub fn stats(&self) -> DiscoveryStats {
351 DiscoveryStats {
349 DiscoveryStats {
352 undecided: self.undecided.as_ref().map(|s| s.len()),
350 undecided: self.undecided.as_ref().map(|s| s.len()),
353 }
351 }
354 }
352 }
355
353
356 pub fn take_quick_sample(
354 pub fn take_quick_sample(
357 &mut self,
355 &mut self,
358 headrevs: impl IntoIterator<Item = Revision>,
356 headrevs: impl IntoIterator<Item = Revision>,
359 size: usize,
357 size: usize,
360 ) -> Result<Vec<Revision>, GraphError> {
358 ) -> Result<Vec<Revision>, GraphError> {
361 self.ensure_undecided()?;
359 self.ensure_undecided()?;
362 let mut sample = {
360 let mut sample = {
363 let undecided = self.undecided.as_ref().unwrap();
361 let undecided = self.undecided.as_ref().unwrap();
364 if undecided.len() <= size {
362 if undecided.len() <= size {
365 return Ok(undecided.iter().cloned().collect());
363 return Ok(undecided.iter().cloned().collect());
366 }
364 }
367 dagops::heads(&self.graph, undecided.iter())?
365 dagops::heads(&self.graph, undecided.iter())?
368 };
366 };
369 if sample.len() >= size {
367 if sample.len() >= size {
370 return Ok(self.limit_sample(sample.into_iter().collect(), size));
368 return Ok(self.limit_sample(sample.into_iter().collect(), size));
371 }
369 }
372 update_sample(
370 update_sample(
373 None,
371 None,
374 headrevs,
372 headrevs,
375 &mut sample,
373 &mut sample,
376 |r| ParentsIterator::graph_parents(&self.graph, r),
374 |r| ParentsIterator::graph_parents(&self.graph, r),
377 Some(size),
375 Some(size),
378 )?;
376 )?;
379 Ok(sample.into_iter().collect())
377 Ok(sample.into_iter().collect())
380 }
378 }
381
379
382 /// Extract a sample from `self.undecided`, going from its heads and roots.
380 /// Extract a sample from `self.undecided`, going from its heads and roots.
383 ///
381 ///
384 /// The `size` parameter is used to avoid useless computations if
382 /// The `size` parameter is used to avoid useless computations if
385 /// it turns out to be bigger than the whole set of undecided Revisions.
383 /// it turns out to be bigger than the whole set of undecided Revisions.
386 ///
384 ///
387 /// The sample is taken by using `update_sample` from the heads, then
385 /// The sample is taken by using `update_sample` from the heads, then
388 /// from the roots, working on the reverse DAG,
386 /// from the roots, working on the reverse DAG,
389 /// expressed by `self.children_cache`.
387 /// expressed by `self.children_cache`.
390 ///
388 ///
391 /// No effort is being made to complete or limit the sample to `size`
389 /// No effort is being made to complete or limit the sample to `size`
392 /// but this method returns another interesting size that it derives
390 /// but this method returns another interesting size that it derives
393 /// from its knowledge of the structure of the various sets, leaving
391 /// from its knowledge of the structure of the various sets, leaving
394 /// to the caller the decision to use it or not.
392 /// to the caller the decision to use it or not.
395 fn bidirectional_sample(
393 fn bidirectional_sample(
396 &mut self,
394 &mut self,
397 size: usize,
395 size: usize,
398 ) -> Result<(HashSet<Revision>, usize), GraphError> {
396 ) -> Result<(HashSet<Revision>, usize), GraphError> {
399 self.ensure_undecided()?;
397 self.ensure_undecided()?;
400 {
398 {
401 // we don't want to compute children_cache before this
399 // we don't want to compute children_cache before this
402 // but doing it after extracting self.undecided takes a mutable
400 // but doing it after extracting self.undecided takes a mutable
403 // ref to self while a shareable one is still active.
401 // ref to self while a shareable one is still active.
404 let undecided = self.undecided.as_ref().unwrap();
402 let undecided = self.undecided.as_ref().unwrap();
405 if undecided.len() <= size {
403 if undecided.len() <= size {
406 return Ok((undecided.clone(), size));
404 return Ok((undecided.clone(), size));
407 }
405 }
408 }
406 }
409
407
410 self.ensure_children_cache()?;
408 self.ensure_children_cache()?;
411 let revs = self.undecided.as_ref().unwrap();
409 let revs = self.undecided.as_ref().unwrap();
412 let mut sample: HashSet<Revision> = revs.clone();
410 let mut sample: HashSet<Revision> = revs.clone();
413
411
414 // it's possible that leveraging the children cache would be more
412 // it's possible that leveraging the children cache would be more
415 // efficient here
413 // efficient here
416 dagops::retain_heads(&self.graph, &mut sample)?;
414 dagops::retain_heads(&self.graph, &mut sample)?;
417 let revsheads = sample.clone(); // was again heads(revs) in python
415 let revsheads = sample.clone(); // was again heads(revs) in python
418
416
419 // update from heads
417 // update from heads
420 update_sample(
418 update_sample(
421 Some(revs),
419 Some(revs),
422 revsheads.iter().cloned(),
420 revsheads.iter().cloned(),
423 &mut sample,
421 &mut sample,
424 |r| ParentsIterator::graph_parents(&self.graph, r),
422 |r| ParentsIterator::graph_parents(&self.graph, r),
425 None,
423 None,
426 )?;
424 )?;
427
425
428 // update from roots
426 // update from roots
429 let revroots: HashSet<Revision> =
427 let revroots: HashSet<Revision> =
430 dagops::roots(&self.graph, revs)?.into_iter().collect();
428 dagops::roots(&self.graph, revs)?.into_iter().collect();
431 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
429 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
432
430
433 let children = self.children_cache.as_ref().unwrap();
431 let children = self.children_cache.as_ref().unwrap();
434 let empty_vec: Vec<Revision> = Vec::new();
432 let empty_vec: Vec<Revision> = Vec::new();
435 update_sample(
433 update_sample(
436 Some(revs),
434 Some(revs),
437 revroots,
435 revroots,
438 &mut sample,
436 &mut sample,
439 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
437 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
440 None,
438 None,
441 )?;
439 )?;
442 Ok((sample, prescribed_size))
440 Ok((sample, prescribed_size))
443 }
441 }
444
442
445 /// Fill up sample up to the wished size with random undecided Revisions.
443 /// Fill up sample up to the wished size with random undecided Revisions.
446 ///
444 ///
447 /// This is intended to be used as a last resort completion if the
445 /// This is intended to be used as a last resort completion if the
448 /// regular sampling algorithm returns too few elements.
446 /// regular sampling algorithm returns too few elements.
449 fn random_complete_sample(
447 fn random_complete_sample(
450 &mut self,
448 &mut self,
451 sample: &mut Vec<Revision>,
449 sample: &mut Vec<Revision>,
452 size: usize,
450 size: usize,
453 ) {
451 ) {
454 let sample_len = sample.len();
452 let sample_len = sample.len();
455 if size <= sample_len {
453 if size <= sample_len {
456 return;
454 return;
457 }
455 }
458 let take_from: Vec<Revision> = self
456 let take_from: Vec<Revision> = self
459 .undecided
457 .undecided
460 .as_ref()
458 .as_ref()
461 .unwrap()
459 .unwrap()
462 .iter()
460 .iter()
463 .filter(|&r| !sample.contains(r))
461 .filter(|&r| !sample.contains(r))
464 .cloned()
462 .cloned()
465 .collect();
463 .collect();
466 sample.extend(self.limit_sample(take_from, size - sample_len));
464 sample.extend(self.limit_sample(take_from, size - sample_len));
467 }
465 }
468
466
469 pub fn take_full_sample(
467 pub fn take_full_sample(
470 &mut self,
468 &mut self,
471 size: usize,
469 size: usize,
472 ) -> Result<Vec<Revision>, GraphError> {
470 ) -> Result<Vec<Revision>, GraphError> {
473 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
471 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
474 let size = if self.respect_size {
472 let size = if self.respect_size {
475 size
473 size
476 } else {
474 } else {
477 prescribed_size
475 prescribed_size
478 };
476 };
479 let mut sample =
477 let mut sample =
480 self.limit_sample(sample_set.into_iter().collect(), size);
478 self.limit_sample(sample_set.into_iter().collect(), size);
481 self.random_complete_sample(&mut sample, size);
479 self.random_complete_sample(&mut sample, size);
482 Ok(sample)
480 Ok(sample)
483 }
481 }
484 }
482 }
485
483
486 #[cfg(test)]
484 #[cfg(test)]
487 mod tests {
485 mod tests {
488 use super::*;
486 use super::*;
489 use crate::testing::SampleGraph;
487 use crate::testing::SampleGraph;
490
488
491 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
489 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
492 ///
490 ///
493 /// To avoid actual randomness in these tests, we give it a fixed
491 /// To avoid actual randomness in these tests, we give it a fixed
494 /// random seed, but by default we'll test the random version.
492 /// random seed, but by default we'll test the random version.
495 fn full_disco() -> PartialDiscovery<SampleGraph> {
493 fn full_disco() -> PartialDiscovery<SampleGraph> {
496 PartialDiscovery::new_with_seed(
494 PartialDiscovery::new_with_seed(
497 SampleGraph,
495 SampleGraph,
498 vec![10, 11, 12, 13],
496 vec![10, 11, 12, 13],
499 [0; 16],
497 [0; 16],
500 true,
498 true,
501 true,
499 true,
502 )
500 )
503 }
501 }
504
502
505 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
503 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
506 ///
504 ///
507 /// To avoid actual randomness in tests, we give it a fixed random seed.
505 /// To avoid actual randomness in tests, we give it a fixed random seed.
508 fn disco12() -> PartialDiscovery<SampleGraph> {
506 fn disco12() -> PartialDiscovery<SampleGraph> {
509 PartialDiscovery::new_with_seed(
507 PartialDiscovery::new_with_seed(
510 SampleGraph,
508 SampleGraph,
511 vec![12],
509 vec![12],
512 [0; 16],
510 [0; 16],
513 true,
511 true,
514 true,
512 true,
515 )
513 )
516 }
514 }
517
515
518 fn sorted_undecided(
516 fn sorted_undecided(
519 disco: &PartialDiscovery<SampleGraph>,
517 disco: &PartialDiscovery<SampleGraph>,
520 ) -> Vec<Revision> {
518 ) -> Vec<Revision> {
521 let mut as_vec: Vec<Revision> =
519 let mut as_vec: Vec<Revision> =
522 disco.undecided.as_ref().unwrap().iter().cloned().collect();
520 disco.undecided.as_ref().unwrap().iter().cloned().collect();
523 as_vec.sort();
521 as_vec.sort();
524 as_vec
522 as_vec
525 }
523 }
526
524
527 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
525 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
528 let mut as_vec: Vec<Revision> =
526 let mut as_vec: Vec<Revision> =
529 disco.missing.iter().cloned().collect();
527 disco.missing.iter().cloned().collect();
530 as_vec.sort();
528 as_vec.sort();
531 as_vec
529 as_vec
532 }
530 }
533
531
534 fn sorted_common_heads(
532 fn sorted_common_heads(
535 disco: &PartialDiscovery<SampleGraph>,
533 disco: &PartialDiscovery<SampleGraph>,
536 ) -> Result<Vec<Revision>, GraphError> {
534 ) -> Result<Vec<Revision>, GraphError> {
537 let mut as_vec: Vec<Revision> =
535 let mut as_vec: Vec<Revision> =
538 disco.common_heads()?.iter().cloned().collect();
536 disco.common_heads()?.iter().cloned().collect();
539 as_vec.sort();
537 as_vec.sort();
540 Ok(as_vec)
538 Ok(as_vec)
541 }
539 }
542
540
543 #[test]
541 #[test]
544 fn test_add_common_get_undecided() -> Result<(), GraphError> {
542 fn test_add_common_get_undecided() -> Result<(), GraphError> {
545 let mut disco = full_disco();
543 let mut disco = full_disco();
546 assert_eq!(disco.undecided, None);
544 assert_eq!(disco.undecided, None);
547 assert!(!disco.has_info());
545 assert!(!disco.has_info());
548 assert_eq!(disco.stats().undecided, None);
546 assert_eq!(disco.stats().undecided, None);
549
547
550 disco.add_common_revisions(vec![11, 12])?;
548 disco.add_common_revisions(vec![11, 12])?;
551 assert!(disco.has_info());
549 assert!(disco.has_info());
552 assert!(!disco.is_complete());
550 assert!(!disco.is_complete());
553 assert!(disco.missing.is_empty());
551 assert!(disco.missing.is_empty());
554
552
555 // add_common_revisions did not trigger a premature computation
553 // add_common_revisions did not trigger a premature computation
556 // of `undecided`, let's check that and ask for them
554 // of `undecided`, let's check that and ask for them
557 assert_eq!(disco.undecided, None);
555 assert_eq!(disco.undecided, None);
558 disco.ensure_undecided()?;
556 disco.ensure_undecided()?;
559 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
557 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
560 assert_eq!(disco.stats().undecided, Some(4));
558 assert_eq!(disco.stats().undecided, Some(4));
561 Ok(())
559 Ok(())
562 }
560 }
563
561
564 /// in this test, we pretend that our peer misses exactly (8+10)::
562 /// in this test, we pretend that our peer misses exactly (8+10)::
565 /// and we're comparing all our repo to it (as in a bare push)
563 /// and we're comparing all our repo to it (as in a bare push)
566 #[test]
564 #[test]
567 fn test_discovery() -> Result<(), GraphError> {
565 fn test_discovery() -> Result<(), GraphError> {
568 let mut disco = full_disco();
566 let mut disco = full_disco();
569 disco.add_common_revisions(vec![11, 12])?;
567 disco.add_common_revisions(vec![11, 12])?;
570 disco.add_missing_revisions(vec![8, 10])?;
568 disco.add_missing_revisions(vec![8, 10])?;
571 assert_eq!(sorted_undecided(&disco), vec![5]);
569 assert_eq!(sorted_undecided(&disco), vec![5]);
572 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
573 assert!(!disco.is_complete());
571 assert!(!disco.is_complete());
574
572
575 disco.add_common_revisions(vec![5])?;
573 disco.add_common_revisions(vec![5])?;
576 assert_eq!(sorted_undecided(&disco), vec![]);
574 assert_eq!(sorted_undecided(&disco), vec![]);
577 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
575 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
578 assert!(disco.is_complete());
576 assert!(disco.is_complete());
579 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
577 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
580 Ok(())
578 Ok(())
581 }
579 }
582
580
583 #[test]
581 #[test]
584 fn test_add_missing_early_continue() -> Result<(), GraphError> {
582 fn test_add_missing_early_continue() -> Result<(), GraphError> {
585 eprintln!("test_add_missing_early_stop");
583 eprintln!("test_add_missing_early_stop");
586 let mut disco = full_disco();
584 let mut disco = full_disco();
587 disco.add_common_revisions(vec![13, 3, 4])?;
585 disco.add_common_revisions(vec![13, 3, 4])?;
588 disco.ensure_children_cache()?;
586 disco.ensure_children_cache()?;
589 // 12 is grand-child of 6 through 9
587 // 12 is grand-child of 6 through 9
590 // passing them in this order maximizes the chances of the
588 // passing them in this order maximizes the chances of the
591 // early continue to do the wrong thing
589 // early continue to do the wrong thing
592 disco.add_missing_revisions(vec![6, 9, 12])?;
590 disco.add_missing_revisions(vec![6, 9, 12])?;
593 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
591 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
594 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
592 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
595 assert!(!disco.is_complete());
593 assert!(!disco.is_complete());
596 Ok(())
594 Ok(())
597 }
595 }
598
596
599 #[test]
597 #[test]
600 fn test_limit_sample_no_need_to() {
598 fn test_limit_sample_no_need_to() {
601 let sample = vec![1, 2, 3, 4];
599 let sample = vec![1, 2, 3, 4];
602 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
600 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
603 }
601 }
604
602
605 #[test]
603 #[test]
606 fn test_limit_sample_less_than_half() {
604 fn test_limit_sample_less_than_half() {
607 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
605 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
608 }
606 }
609
607
610 #[test]
608 #[test]
611 fn test_limit_sample_more_than_half() {
609 fn test_limit_sample_more_than_half() {
612 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
610 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
613 }
611 }
614
612
615 #[test]
613 #[test]
616 fn test_limit_sample_no_random() {
614 fn test_limit_sample_no_random() {
617 let mut disco = full_disco();
615 let mut disco = full_disco();
618 disco.randomize = false;
616 disco.randomize = false;
619 assert_eq!(
617 assert_eq!(
620 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
618 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
621 vec![1, 3, 5, 7]
619 vec![1, 3, 5, 7]
622 );
620 );
623 }
621 }
624
622
625 #[test]
623 #[test]
626 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
624 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
627 let mut disco = full_disco();
625 let mut disco = full_disco();
628 disco.undecided = Some((1..=13).collect());
626 disco.undecided = Some((1..=13).collect());
629
627
630 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
628 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
631 sample_vec.sort();
629 sample_vec.sort();
632 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
630 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
633 Ok(())
631 Ok(())
634 }
632 }
635
633
636 #[test]
634 #[test]
637 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
635 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
638 let mut disco = disco12();
636 let mut disco = disco12();
639 disco.ensure_undecided()?;
637 disco.ensure_undecided()?;
640
638
641 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
639 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
642 sample_vec.sort();
640 sample_vec.sort();
643 // r12's only parent is r9, whose unique grand-parent through the
641 // r12's only parent is r9, whose unique grand-parent through the
644 // diamond shape is r4. This ends there because the distance from r4
642 // diamond shape is r4. This ends there because the distance from r4
645 // to the root is only 3.
643 // to the root is only 3.
646 assert_eq!(sample_vec, vec![4, 9, 12]);
644 assert_eq!(sample_vec, vec![4, 9, 12]);
647 Ok(())
645 Ok(())
648 }
646 }
649
647
650 #[test]
648 #[test]
651 fn test_children_cache() -> Result<(), GraphError> {
649 fn test_children_cache() -> Result<(), GraphError> {
652 let mut disco = full_disco();
650 let mut disco = full_disco();
653 disco.ensure_children_cache()?;
651 disco.ensure_children_cache()?;
654
652
655 let cache = disco.children_cache.unwrap();
653 let cache = disco.children_cache.unwrap();
656 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
654 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
657 assert_eq!(cache.get(&10).cloned(), None);
655 assert_eq!(cache.get(&10).cloned(), None);
658
656
659 let mut children_4 = cache.get(&4).cloned().unwrap();
657 let mut children_4 = cache.get(&4).cloned().unwrap();
660 children_4.sort();
658 children_4.sort();
661 assert_eq!(children_4, vec![5, 6, 7]);
659 assert_eq!(children_4, vec![5, 6, 7]);
662
660
663 let mut children_7 = cache.get(&7).cloned().unwrap();
661 let mut children_7 = cache.get(&7).cloned().unwrap();
664 children_7.sort();
662 children_7.sort();
665 assert_eq!(children_7, vec![9, 11]);
663 assert_eq!(children_7, vec![9, 11]);
666
664
667 Ok(())
665 Ok(())
668 }
666 }
669
667
670 #[test]
668 #[test]
671 fn test_complete_sample() {
669 fn test_complete_sample() {
672 let mut disco = full_disco();
670 let mut disco = full_disco();
673 let undecided: HashSet<Revision> =
671 let undecided: HashSet<Revision> =
674 [4, 7, 9, 2, 3].iter().cloned().collect();
672 [4, 7, 9, 2, 3].iter().cloned().collect();
675 disco.undecided = Some(undecided);
673 disco.undecided = Some(undecided);
676
674
677 let mut sample = vec![0];
675 let mut sample = vec![0];
678 disco.random_complete_sample(&mut sample, 3);
676 disco.random_complete_sample(&mut sample, 3);
679 assert_eq!(sample.len(), 3);
677 assert_eq!(sample.len(), 3);
680
678
681 let mut sample = vec![2, 4, 7];
679 let mut sample = vec![2, 4, 7];
682 disco.random_complete_sample(&mut sample, 1);
680 disco.random_complete_sample(&mut sample, 1);
683 assert_eq!(sample.len(), 3);
681 assert_eq!(sample.len(), 3);
684 }
682 }
685
683
686 #[test]
684 #[test]
687 fn test_bidirectional_sample() -> Result<(), GraphError> {
685 fn test_bidirectional_sample() -> Result<(), GraphError> {
688 let mut disco = full_disco();
686 let mut disco = full_disco();
689 disco.undecided = Some((0..=13).into_iter().collect());
687 disco.undecided = Some((0..=13).into_iter().collect());
690
688
691 let (sample_set, size) = disco.bidirectional_sample(7)?;
689 let (sample_set, size) = disco.bidirectional_sample(7)?;
692 assert_eq!(size, 7);
690 assert_eq!(size, 7);
693 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
691 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
694 sample.sort();
692 sample.sort();
695 // our DAG is a bit too small for the results to be really interesting
693 // our DAG is a bit too small for the results to be really interesting
696 // at least it shows that
694 // at least it shows that
697 // - we went both ways
695 // - we went both ways
698 // - we didn't take all Revisions (6 is not in the sample)
696 // - we didn't take all Revisions (6 is not in the sample)
699 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
697 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
700 Ok(())
698 Ok(())
701 }
699 }
702
700
703 }
701 }
General Comments 0
You need to be logged in to leave comments. Login now