##// END OF EJS Templates
rust-discovery: using the children cache in add_missing...
Georges Racinet -
r42970:8c9a6ade default
parent child Browse files
Show More
@@ -1,653 +1,696 b''
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 extern crate rand;
13 extern crate rand;
14 extern crate rand_pcg;
14 extern crate rand_pcg;
15 use self::rand::seq::SliceRandom;
15 use self::rand::seq::SliceRandom;
16 use self::rand::{thread_rng, RngCore, SeedableRng};
16 use self::rand::{thread_rng, RngCore, SeedableRng};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
18 use crate::ancestors::MissingAncestors;
18 use crate::ancestors::MissingAncestors;
19 use crate::dagops;
19 use crate::dagops;
20 use std::cmp::{max, min};
20 use std::cmp::{max, min};
21 use std::collections::{HashMap, HashSet, VecDeque};
21 use std::collections::{HashMap, HashSet, VecDeque};
22
22
23 type Rng = self::rand_pcg::Pcg32;
23 type Rng = self::rand_pcg::Pcg32;
24
24
25 pub struct PartialDiscovery<G: Graph + Clone> {
25 pub struct PartialDiscovery<G: Graph + Clone> {
26 target_heads: Option<Vec<Revision>>,
26 target_heads: Option<Vec<Revision>>,
27 graph: G, // plays the role of self._repo
27 graph: G, // plays the role of self._repo
28 common: MissingAncestors<G>,
28 common: MissingAncestors<G>,
29 undecided: Option<HashSet<Revision>>,
29 undecided: Option<HashSet<Revision>>,
30 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
30 children_cache: Option<HashMap<Revision, Vec<Revision>>>,
31 missing: HashSet<Revision>,
31 missing: HashSet<Revision>,
32 rng: Rng,
32 rng: Rng,
33 respect_size: bool,
33 respect_size: bool,
34 randomize: bool,
34 randomize: bool,
35 }
35 }
36
36
37 pub struct DiscoveryStats {
37 pub struct DiscoveryStats {
38 pub undecided: Option<usize>,
38 pub undecided: Option<usize>,
39 }
39 }
40
40
41 /// Update an existing sample to match the expected size
41 /// Update an existing sample to match the expected size
42 ///
42 ///
43 /// The sample is updated with revisions exponentially distant from each
43 /// The sample is updated with revisions exponentially distant from each
44 /// element of `heads`.
44 /// element of `heads`.
45 ///
45 ///
46 /// If a target size is specified, the sampling will stop once this size is
46 /// If a target size is specified, the sampling will stop once this size is
47 /// reached. Otherwise sampling will happen until roots of the <revs> set are
47 /// reached. Otherwise sampling will happen until roots of the <revs> set are
48 /// reached.
48 /// reached.
49 ///
49 ///
50 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
50 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
51 /// represented by `parentfn`
51 /// represented by `parentfn`
52 /// - `heads`: set of DAG head revs
52 /// - `heads`: set of DAG head revs
53 /// - `sample`: a sample to update
53 /// - `sample`: a sample to update
54 /// - `parentfn`: a callable to resolve parents for a revision
54 /// - `parentfn`: a callable to resolve parents for a revision
55 /// - `quicksamplesize`: optional target size of the sample
55 /// - `quicksamplesize`: optional target size of the sample
56 fn update_sample<I>(
56 fn update_sample<I>(
57 revs: Option<&HashSet<Revision>>,
57 revs: Option<&HashSet<Revision>>,
58 heads: impl IntoIterator<Item = Revision>,
58 heads: impl IntoIterator<Item = Revision>,
59 sample: &mut HashSet<Revision>,
59 sample: &mut HashSet<Revision>,
60 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
60 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
61 quicksamplesize: Option<usize>,
61 quicksamplesize: Option<usize>,
62 ) -> Result<(), GraphError>
62 ) -> Result<(), GraphError>
63 where
63 where
64 I: Iterator<Item = Revision>,
64 I: Iterator<Item = Revision>,
65 {
65 {
66 let mut distances: HashMap<Revision, u32> = HashMap::new();
66 let mut distances: HashMap<Revision, u32> = HashMap::new();
67 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
67 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
68 let mut factor: u32 = 1;
68 let mut factor: u32 = 1;
69 let mut seen: HashSet<Revision> = HashSet::new();
69 let mut seen: HashSet<Revision> = HashSet::new();
70 loop {
70 loop {
71 let current = match visit.pop_front() {
71 let current = match visit.pop_front() {
72 None => {
72 None => {
73 break;
73 break;
74 }
74 }
75 Some(r) => r,
75 Some(r) => r,
76 };
76 };
77 if !seen.insert(current) {
77 if !seen.insert(current) {
78 continue;
78 continue;
79 }
79 }
80
80
81 let d = *distances.entry(current).or_insert(1);
81 let d = *distances.entry(current).or_insert(1);
82 if d > factor {
82 if d > factor {
83 factor *= 2;
83 factor *= 2;
84 }
84 }
85 if d == factor {
85 if d == factor {
86 sample.insert(current);
86 sample.insert(current);
87 if let Some(sz) = quicksamplesize {
87 if let Some(sz) = quicksamplesize {
88 if sample.len() >= sz {
88 if sample.len() >= sz {
89 return Ok(());
89 return Ok(());
90 }
90 }
91 }
91 }
92 }
92 }
93 for p in parentsfn(current)? {
93 for p in parentsfn(current)? {
94 if let Some(revs) = revs {
94 if let Some(revs) = revs {
95 if !revs.contains(&p) {
95 if !revs.contains(&p) {
96 continue;
96 continue;
97 }
97 }
98 }
98 }
99 distances.entry(p).or_insert(d + 1);
99 distances.entry(p).or_insert(d + 1);
100 visit.push_back(p);
100 visit.push_back(p);
101 }
101 }
102 }
102 }
103 Ok(())
103 Ok(())
104 }
104 }
105
105
106 struct ParentsIterator {
106 struct ParentsIterator {
107 parents: [Revision; 2],
107 parents: [Revision; 2],
108 cur: usize,
108 cur: usize,
109 }
109 }
110
110
111 impl ParentsIterator {
111 impl ParentsIterator {
112 fn graph_parents(
112 fn graph_parents(
113 graph: &impl Graph,
113 graph: &impl Graph,
114 r: Revision,
114 r: Revision,
115 ) -> Result<ParentsIterator, GraphError> {
115 ) -> Result<ParentsIterator, GraphError> {
116 Ok(ParentsIterator {
116 Ok(ParentsIterator {
117 parents: graph.parents(r)?,
117 parents: graph.parents(r)?,
118 cur: 0,
118 cur: 0,
119 })
119 })
120 }
120 }
121 }
121 }
122
122
123 impl Iterator for ParentsIterator {
123 impl Iterator for ParentsIterator {
124 type Item = Revision;
124 type Item = Revision;
125
125
126 fn next(&mut self) -> Option<Revision> {
126 fn next(&mut self) -> Option<Revision> {
127 if self.cur > 1 {
127 if self.cur > 1 {
128 return None;
128 return None;
129 }
129 }
130 let rev = self.parents[self.cur];
130 let rev = self.parents[self.cur];
131 self.cur += 1;
131 self.cur += 1;
132 if rev == NULL_REVISION {
132 if rev == NULL_REVISION {
133 return self.next();
133 return self.next();
134 }
134 }
135 Some(rev)
135 Some(rev)
136 }
136 }
137 }
137 }
138
138
139 impl<G: Graph + Clone> PartialDiscovery<G> {
139 impl<G: Graph + Clone> PartialDiscovery<G> {
140 /// Create a PartialDiscovery object, with the intent
140 /// Create a PartialDiscovery object, with the intent
141 /// of comparing our `::<target_heads>` revset to the contents of another
141 /// of comparing our `::<target_heads>` revset to the contents of another
142 /// repo.
142 /// repo.
143 ///
143 ///
144 /// For now `target_heads` is passed as a vector, and will be used
144 /// For now `target_heads` is passed as a vector, and will be used
145 /// at the first call to `ensure_undecided()`.
145 /// at the first call to `ensure_undecided()`.
146 ///
146 ///
147 /// If we want to make the signature more flexible,
147 /// If we want to make the signature more flexible,
148 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
148 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
149 /// object since we'll keep it in the meanwhile
149 /// object since we'll keep it in the meanwhile
150 ///
150 ///
151 /// The `respect_size` boolean controls how the sampling methods
151 /// The `respect_size` boolean controls how the sampling methods
152 /// will interpret the size argument requested by the caller. If it's
152 /// will interpret the size argument requested by the caller. If it's
153 /// `false`, they are allowed to produce a sample whose size is more
153 /// `false`, they are allowed to produce a sample whose size is more
154 /// appropriate to the situation (typically bigger).
154 /// appropriate to the situation (typically bigger).
155 ///
155 ///
156 /// The `randomize` boolean affects sampling, and specifically how
156 /// The `randomize` boolean affects sampling, and specifically how
157 /// limiting or last-minute expanding is been done:
157 /// limiting or last-minute expanding is been done:
158 ///
158 ///
159 /// If `true`, both will perform random picking from `self.undecided`.
159 /// If `true`, both will perform random picking from `self.undecided`.
160 /// This is currently the best for actual discoveries.
160 /// This is currently the best for actual discoveries.
161 ///
161 ///
162 /// If `false`, a reproductible picking strategy is performed. This is
162 /// If `false`, a reproductible picking strategy is performed. This is
163 /// useful for integration tests.
163 /// useful for integration tests.
164 pub fn new(
164 pub fn new(
165 graph: G,
165 graph: G,
166 target_heads: Vec<Revision>,
166 target_heads: Vec<Revision>,
167 respect_size: bool,
167 respect_size: bool,
168 randomize: bool,
168 randomize: bool,
169 ) -> Self {
169 ) -> Self {
170 let mut seed: [u8; 16] = [0; 16];
170 let mut seed: [u8; 16] = [0; 16];
171 if randomize {
171 if randomize {
172 thread_rng().fill_bytes(&mut seed);
172 thread_rng().fill_bytes(&mut seed);
173 }
173 }
174 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
174 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
175 }
175 }
176
176
177 pub fn new_with_seed(
177 pub fn new_with_seed(
178 graph: G,
178 graph: G,
179 target_heads: Vec<Revision>,
179 target_heads: Vec<Revision>,
180 seed: [u8; 16],
180 seed: [u8; 16],
181 respect_size: bool,
181 respect_size: bool,
182 randomize: bool,
182 randomize: bool,
183 ) -> Self {
183 ) -> Self {
184 PartialDiscovery {
184 PartialDiscovery {
185 undecided: None,
185 undecided: None,
186 children_cache: None,
186 children_cache: None,
187 target_heads: Some(target_heads),
187 target_heads: Some(target_heads),
188 graph: graph.clone(),
188 graph: graph.clone(),
189 common: MissingAncestors::new(graph, vec![]),
189 common: MissingAncestors::new(graph, vec![]),
190 missing: HashSet::new(),
190 missing: HashSet::new(),
191 rng: Rng::from_seed(seed),
191 rng: Rng::from_seed(seed),
192 respect_size: respect_size,
192 respect_size: respect_size,
193 randomize: randomize,
193 randomize: randomize,
194 }
194 }
195 }
195 }
196
196
197 /// Extract at most `size` random elements from sample and return them
197 /// Extract at most `size` random elements from sample and return them
198 /// as a vector
198 /// as a vector
199 fn limit_sample(
199 fn limit_sample(
200 &mut self,
200 &mut self,
201 mut sample: Vec<Revision>,
201 mut sample: Vec<Revision>,
202 size: usize,
202 size: usize,
203 ) -> Vec<Revision> {
203 ) -> Vec<Revision> {
204 if !self.randomize {
204 if !self.randomize {
205 sample.sort();
205 sample.sort();
206 sample.truncate(size);
206 sample.truncate(size);
207 return sample;
207 return sample;
208 }
208 }
209 let sample_len = sample.len();
209 let sample_len = sample.len();
210 if sample_len <= size {
210 if sample_len <= size {
211 return sample;
211 return sample;
212 }
212 }
213 let rng = &mut self.rng;
213 let rng = &mut self.rng;
214 let dropped_size = sample_len - size;
214 let dropped_size = sample_len - size;
215 let limited_slice = if size < dropped_size {
215 let limited_slice = if size < dropped_size {
216 sample.partial_shuffle(rng, size).0
216 sample.partial_shuffle(rng, size).0
217 } else {
217 } else {
218 sample.partial_shuffle(rng, dropped_size).1
218 sample.partial_shuffle(rng, dropped_size).1
219 };
219 };
220 limited_slice.to_owned()
220 limited_slice.to_owned()
221 }
221 }
222
222
223 /// Register revisions known as being common
223 /// Register revisions known as being common
224 pub fn add_common_revisions(
224 pub fn add_common_revisions(
225 &mut self,
225 &mut self,
226 common: impl IntoIterator<Item = Revision>,
226 common: impl IntoIterator<Item = Revision>,
227 ) -> Result<(), GraphError> {
227 ) -> Result<(), GraphError> {
228 self.common.add_bases(common);
228 self.common.add_bases(common);
229 if let Some(ref mut undecided) = self.undecided {
229 if let Some(ref mut undecided) = self.undecided {
230 self.common.remove_ancestors_from(undecided)?;
230 self.common.remove_ancestors_from(undecided)?;
231 }
231 }
232 Ok(())
232 Ok(())
233 }
233 }
234
234
235 /// Register revisions known as being missing
235 /// Register revisions known as being missing
236 ///
237 /// # Performance note
238 ///
239 /// Except in the most trivial case, the first call of this method has
240 /// the side effect of computing `self.undecided` set for the first time,
241 /// and the related caches it might need for efficiency of its internal
242 /// computation. This is typically faster if more information is
243 /// available in `self.common`. Therefore, for good performance, the
244 /// caller should avoid calling this too early.
236 pub fn add_missing_revisions(
245 pub fn add_missing_revisions(
237 &mut self,
246 &mut self,
238 missing: impl IntoIterator<Item = Revision>,
247 missing: impl IntoIterator<Item = Revision>,
239 ) -> Result<(), GraphError> {
248 ) -> Result<(), GraphError> {
240 self.ensure_undecided()?;
249 self.ensure_children_cache()?;
241 let range = dagops::range(
250 self.ensure_undecided()?; // for safety of possible future refactors
242 &self.graph,
251 let children = self.children_cache.as_ref().unwrap();
243 missing,
252 let mut seen: HashSet<Revision> = HashSet::new();
244 self.undecided.as_ref().unwrap().iter().cloned(),
253 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
245 )?;
246 let undecided_mut = self.undecided.as_mut().unwrap();
254 let undecided_mut = self.undecided.as_mut().unwrap();
247 for missrev in range {
255 while let Some(rev) = tovisit.pop_front() {
248 self.missing.insert(missrev);
256 if !self.missing.insert(rev) {
249 undecided_mut.remove(&missrev);
257 // either it's known to be missing from a previous
258 // invocation, and there's no need to iterate on its
259 // children (we now they are all missing)
260 // or it's from a previous iteration of this loop
261 // and its children have already been queued
262 continue;
263 }
264 undecided_mut.remove(&rev);
265 match children.get(&rev) {
266 None => {
267 continue;
268 }
269 Some(this_children) => {
270 for child in this_children.iter().cloned() {
271 if seen.insert(child) {
272 tovisit.push_back(child);
273 }
274 }
275 }
276 }
250 }
277 }
251 Ok(())
278 Ok(())
252 }
279 }
253
280
254 /// Do we have any information about the peer?
281 /// Do we have any information about the peer?
255 pub fn has_info(&self) -> bool {
282 pub fn has_info(&self) -> bool {
256 self.common.has_bases()
283 self.common.has_bases()
257 }
284 }
258
285
259 /// Did we acquire full knowledge of our Revisions that the peer has?
286 /// Did we acquire full knowledge of our Revisions that the peer has?
260 pub fn is_complete(&self) -> bool {
287 pub fn is_complete(&self) -> bool {
261 self.undecided.as_ref().map_or(false, |s| s.is_empty())
288 self.undecided.as_ref().map_or(false, |s| s.is_empty())
262 }
289 }
263
290
264 /// Return the heads of the currently known common set of revisions.
291 /// Return the heads of the currently known common set of revisions.
265 ///
292 ///
266 /// If the discovery process is not complete (see `is_complete()`), the
293 /// If the discovery process is not complete (see `is_complete()`), the
267 /// caller must be aware that this is an intermediate state.
294 /// caller must be aware that this is an intermediate state.
268 ///
295 ///
269 /// On the other hand, if it is complete, then this is currently
296 /// On the other hand, if it is complete, then this is currently
270 /// the only way to retrieve the end results of the discovery process.
297 /// the only way to retrieve the end results of the discovery process.
271 ///
298 ///
272 /// We may introduce in the future an `into_common_heads` call that
299 /// We may introduce in the future an `into_common_heads` call that
273 /// would be more appropriate for normal Rust callers, dropping `self`
300 /// would be more appropriate for normal Rust callers, dropping `self`
274 /// if it is complete.
301 /// if it is complete.
275 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
302 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
276 self.common.bases_heads()
303 self.common.bases_heads()
277 }
304 }
278
305
279 /// Force first computation of `self.undecided`
306 /// Force first computation of `self.undecided`
280 ///
307 ///
281 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
308 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
282 /// unwrapped to get workable immutable or mutable references without
309 /// unwrapped to get workable immutable or mutable references without
283 /// any panic.
310 /// any panic.
284 ///
311 ///
285 /// This is an imperative call instead of an access with added lazyness
312 /// This is an imperative call instead of an access with added lazyness
286 /// to reduce easily the scope of mutable borrow for the caller,
313 /// to reduce easily the scope of mutable borrow for the caller,
287 /// compared to undecided(&'a mut self) -> &'a… that would keep it
314 /// compared to undecided(&'a mut self) -> &'a… that would keep it
288 /// as long as the resulting immutable one.
315 /// as long as the resulting immutable one.
289 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
316 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
290 if self.undecided.is_some() {
317 if self.undecided.is_some() {
291 return Ok(());
318 return Ok(());
292 }
319 }
293 let tgt = self.target_heads.take().unwrap();
320 let tgt = self.target_heads.take().unwrap();
294 self.undecided =
321 self.undecided =
295 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
322 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
296 Ok(())
323 Ok(())
297 }
324 }
298
325
299 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
326 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
300 if self.children_cache.is_some() {
327 if self.children_cache.is_some() {
301 return Ok(());
328 return Ok(());
302 }
329 }
303 self.ensure_undecided()?;
330 self.ensure_undecided()?;
304
331
305 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
332 let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
306 for &rev in self.undecided.as_ref().unwrap() {
333 for &rev in self.undecided.as_ref().unwrap() {
307 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
308 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
335 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
309 }
336 }
310 }
337 }
311 self.children_cache = Some(children);
338 self.children_cache = Some(children);
312 Ok(())
339 Ok(())
313 }
340 }
314
341
315 /// Provide statistics about the current state of the discovery process
342 /// Provide statistics about the current state of the discovery process
316 pub fn stats(&self) -> DiscoveryStats {
343 pub fn stats(&self) -> DiscoveryStats {
317 DiscoveryStats {
344 DiscoveryStats {
318 undecided: self.undecided.as_ref().map(|s| s.len()),
345 undecided: self.undecided.as_ref().map(|s| s.len()),
319 }
346 }
320 }
347 }
321
348
322 pub fn take_quick_sample(
349 pub fn take_quick_sample(
323 &mut self,
350 &mut self,
324 headrevs: impl IntoIterator<Item = Revision>,
351 headrevs: impl IntoIterator<Item = Revision>,
325 size: usize,
352 size: usize,
326 ) -> Result<Vec<Revision>, GraphError> {
353 ) -> Result<Vec<Revision>, GraphError> {
327 self.ensure_undecided()?;
354 self.ensure_undecided()?;
328 let mut sample = {
355 let mut sample = {
329 let undecided = self.undecided.as_ref().unwrap();
356 let undecided = self.undecided.as_ref().unwrap();
330 if undecided.len() <= size {
357 if undecided.len() <= size {
331 return Ok(undecided.iter().cloned().collect());
358 return Ok(undecided.iter().cloned().collect());
332 }
359 }
333 dagops::heads(&self.graph, undecided.iter())?
360 dagops::heads(&self.graph, undecided.iter())?
334 };
361 };
335 if sample.len() >= size {
362 if sample.len() >= size {
336 return Ok(self.limit_sample(sample.into_iter().collect(), size));
363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
337 }
364 }
338 update_sample(
365 update_sample(
339 None,
366 None,
340 headrevs,
367 headrevs,
341 &mut sample,
368 &mut sample,
342 |r| ParentsIterator::graph_parents(&self.graph, r),
369 |r| ParentsIterator::graph_parents(&self.graph, r),
343 Some(size),
370 Some(size),
344 )?;
371 )?;
345 Ok(sample.into_iter().collect())
372 Ok(sample.into_iter().collect())
346 }
373 }
347
374
348 /// Extract a sample from `self.undecided`, going from its heads and roots.
375 /// Extract a sample from `self.undecided`, going from its heads and roots.
349 ///
376 ///
350 /// The `size` parameter is used to avoid useless computations if
377 /// The `size` parameter is used to avoid useless computations if
351 /// it turns out to be bigger than the whole set of undecided Revisions.
378 /// it turns out to be bigger than the whole set of undecided Revisions.
352 ///
379 ///
353 /// The sample is taken by using `update_sample` from the heads, then
380 /// The sample is taken by using `update_sample` from the heads, then
354 /// from the roots, working on the reverse DAG,
381 /// from the roots, working on the reverse DAG,
355 /// expressed by `self.children_cache`.
382 /// expressed by `self.children_cache`.
356 ///
383 ///
357 /// No effort is being made to complete or limit the sample to `size`
384 /// No effort is being made to complete or limit the sample to `size`
358 /// but this method returns another interesting size that it derives
385 /// but this method returns another interesting size that it derives
359 /// from its knowledge of the structure of the various sets, leaving
386 /// from its knowledge of the structure of the various sets, leaving
360 /// to the caller the decision to use it or not.
387 /// to the caller the decision to use it or not.
361 fn bidirectional_sample(
388 fn bidirectional_sample(
362 &mut self,
389 &mut self,
363 size: usize,
390 size: usize,
364 ) -> Result<(HashSet<Revision>, usize), GraphError> {
391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
365 self.ensure_undecided()?;
392 self.ensure_undecided()?;
366 {
393 {
367 // we don't want to compute children_cache before this
394 // we don't want to compute children_cache before this
368 // but doing it after extracting self.undecided takes a mutable
395 // but doing it after extracting self.undecided takes a mutable
369 // ref to self while a shareable one is still active.
396 // ref to self while a shareable one is still active.
370 let undecided = self.undecided.as_ref().unwrap();
397 let undecided = self.undecided.as_ref().unwrap();
371 if undecided.len() <= size {
398 if undecided.len() <= size {
372 return Ok((undecided.clone(), size));
399 return Ok((undecided.clone(), size));
373 }
400 }
374 }
401 }
375
402
376 self.ensure_children_cache()?;
403 self.ensure_children_cache()?;
377 let revs = self.undecided.as_ref().unwrap();
404 let revs = self.undecided.as_ref().unwrap();
378 let mut sample: HashSet<Revision> = revs.clone();
405 let mut sample: HashSet<Revision> = revs.clone();
379
406
380 // it's possible that leveraging the children cache would be more
407 // it's possible that leveraging the children cache would be more
381 // efficient here
408 // efficient here
382 dagops::retain_heads(&self.graph, &mut sample)?;
409 dagops::retain_heads(&self.graph, &mut sample)?;
383 let revsheads = sample.clone(); // was again heads(revs) in python
410 let revsheads = sample.clone(); // was again heads(revs) in python
384
411
385 // update from heads
412 // update from heads
386 update_sample(
413 update_sample(
387 Some(revs),
414 Some(revs),
388 revsheads.iter().cloned(),
415 revsheads.iter().cloned(),
389 &mut sample,
416 &mut sample,
390 |r| ParentsIterator::graph_parents(&self.graph, r),
417 |r| ParentsIterator::graph_parents(&self.graph, r),
391 None,
418 None,
392 )?;
419 )?;
393
420
394 // update from roots
421 // update from roots
395 let revroots: HashSet<Revision> =
422 let revroots: HashSet<Revision> =
396 dagops::roots(&self.graph, revs)?.into_iter().collect();
423 dagops::roots(&self.graph, revs)?.into_iter().collect();
397 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
398
425
399 let children = self.children_cache.as_ref().unwrap();
426 let children = self.children_cache.as_ref().unwrap();
400 let empty_vec: Vec<Revision> = Vec::new();
427 let empty_vec: Vec<Revision> = Vec::new();
401 update_sample(
428 update_sample(
402 Some(revs),
429 Some(revs),
403 revroots,
430 revroots,
404 &mut sample,
431 &mut sample,
405 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
406 None,
433 None,
407 )?;
434 )?;
408 Ok((sample, prescribed_size))
435 Ok((sample, prescribed_size))
409 }
436 }
410
437
411 /// Fill up sample up to the wished size with random undecided Revisions.
438 /// Fill up sample up to the wished size with random undecided Revisions.
412 ///
439 ///
413 /// This is intended to be used as a last resort completion if the
440 /// This is intended to be used as a last resort completion if the
414 /// regular sampling algorithm returns too few elements.
441 /// regular sampling algorithm returns too few elements.
415 fn random_complete_sample(
442 fn random_complete_sample(
416 &mut self,
443 &mut self,
417 sample: &mut Vec<Revision>,
444 sample: &mut Vec<Revision>,
418 size: usize,
445 size: usize,
419 ) {
446 ) {
420 let sample_len = sample.len();
447 let sample_len = sample.len();
421 if size <= sample_len {
448 if size <= sample_len {
422 return;
449 return;
423 }
450 }
424 let take_from: Vec<Revision> = self
451 let take_from: Vec<Revision> = self
425 .undecided
452 .undecided
426 .as_ref()
453 .as_ref()
427 .unwrap()
454 .unwrap()
428 .iter()
455 .iter()
429 .filter(|&r| !sample.contains(r))
456 .filter(|&r| !sample.contains(r))
430 .cloned()
457 .cloned()
431 .collect();
458 .collect();
432 sample.extend(self.limit_sample(take_from, size - sample_len));
459 sample.extend(self.limit_sample(take_from, size - sample_len));
433 }
460 }
434
461
435 pub fn take_full_sample(
462 pub fn take_full_sample(
436 &mut self,
463 &mut self,
437 size: usize,
464 size: usize,
438 ) -> Result<Vec<Revision>, GraphError> {
465 ) -> Result<Vec<Revision>, GraphError> {
439 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
440 let size = if self.respect_size {
467 let size = if self.respect_size {
441 size
468 size
442 } else {
469 } else {
443 prescribed_size
470 prescribed_size
444 };
471 };
445 let mut sample =
472 let mut sample =
446 self.limit_sample(sample_set.into_iter().collect(), size);
473 self.limit_sample(sample_set.into_iter().collect(), size);
447 self.random_complete_sample(&mut sample, size);
474 self.random_complete_sample(&mut sample, size);
448 Ok(sample)
475 Ok(sample)
449 }
476 }
450 }
477 }
451
478
452 #[cfg(test)]
479 #[cfg(test)]
453 mod tests {
480 mod tests {
454 use super::*;
481 use super::*;
455 use crate::testing::SampleGraph;
482 use crate::testing::SampleGraph;
456
483
457 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
484 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
458 ///
485 ///
459 /// To avoid actual randomness in these tests, we give it a fixed
486 /// To avoid actual randomness in these tests, we give it a fixed
460 /// random seed, but by default we'll test the random version.
487 /// random seed, but by default we'll test the random version.
461 fn full_disco() -> PartialDiscovery<SampleGraph> {
488 fn full_disco() -> PartialDiscovery<SampleGraph> {
462 PartialDiscovery::new_with_seed(
489 PartialDiscovery::new_with_seed(
463 SampleGraph,
490 SampleGraph,
464 vec![10, 11, 12, 13],
491 vec![10, 11, 12, 13],
465 [0; 16],
492 [0; 16],
466 true,
493 true,
467 true,
494 true,
468 )
495 )
469 }
496 }
470
497
471 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
498 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
472 ///
499 ///
473 /// To avoid actual randomness in tests, we give it a fixed random seed.
500 /// To avoid actual randomness in tests, we give it a fixed random seed.
474 fn disco12() -> PartialDiscovery<SampleGraph> {
501 fn disco12() -> PartialDiscovery<SampleGraph> {
475 PartialDiscovery::new_with_seed(
502 PartialDiscovery::new_with_seed(
476 SampleGraph,
503 SampleGraph,
477 vec![12],
504 vec![12],
478 [0; 16],
505 [0; 16],
479 true,
506 true,
480 true,
507 true,
481 )
508 )
482 }
509 }
483
510
484 fn sorted_undecided(
511 fn sorted_undecided(
485 disco: &PartialDiscovery<SampleGraph>,
512 disco: &PartialDiscovery<SampleGraph>,
486 ) -> Vec<Revision> {
513 ) -> Vec<Revision> {
487 let mut as_vec: Vec<Revision> =
514 let mut as_vec: Vec<Revision> =
488 disco.undecided.as_ref().unwrap().iter().cloned().collect();
515 disco.undecided.as_ref().unwrap().iter().cloned().collect();
489 as_vec.sort();
516 as_vec.sort();
490 as_vec
517 as_vec
491 }
518 }
492
519
493 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
520 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
494 let mut as_vec: Vec<Revision> =
521 let mut as_vec: Vec<Revision> =
495 disco.missing.iter().cloned().collect();
522 disco.missing.iter().cloned().collect();
496 as_vec.sort();
523 as_vec.sort();
497 as_vec
524 as_vec
498 }
525 }
499
526
500 fn sorted_common_heads(
527 fn sorted_common_heads(
501 disco: &PartialDiscovery<SampleGraph>,
528 disco: &PartialDiscovery<SampleGraph>,
502 ) -> Result<Vec<Revision>, GraphError> {
529 ) -> Result<Vec<Revision>, GraphError> {
503 let mut as_vec: Vec<Revision> =
530 let mut as_vec: Vec<Revision> =
504 disco.common_heads()?.iter().cloned().collect();
531 disco.common_heads()?.iter().cloned().collect();
505 as_vec.sort();
532 as_vec.sort();
506 Ok(as_vec)
533 Ok(as_vec)
507 }
534 }
508
535
509 #[test]
536 #[test]
510 fn test_add_common_get_undecided() -> Result<(), GraphError> {
537 fn test_add_common_get_undecided() -> Result<(), GraphError> {
511 let mut disco = full_disco();
538 let mut disco = full_disco();
512 assert_eq!(disco.undecided, None);
539 assert_eq!(disco.undecided, None);
513 assert!(!disco.has_info());
540 assert!(!disco.has_info());
514 assert_eq!(disco.stats().undecided, None);
541 assert_eq!(disco.stats().undecided, None);
515
542
516 disco.add_common_revisions(vec![11, 12])?;
543 disco.add_common_revisions(vec![11, 12])?;
517 assert!(disco.has_info());
544 assert!(disco.has_info());
518 assert!(!disco.is_complete());
545 assert!(!disco.is_complete());
519 assert!(disco.missing.is_empty());
546 assert!(disco.missing.is_empty());
520
547
521 // add_common_revisions did not trigger a premature computation
548 // add_common_revisions did not trigger a premature computation
522 // of `undecided`, let's check that and ask for them
549 // of `undecided`, let's check that and ask for them
523 assert_eq!(disco.undecided, None);
550 assert_eq!(disco.undecided, None);
524 disco.ensure_undecided()?;
551 disco.ensure_undecided()?;
525 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
552 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
526 assert_eq!(disco.stats().undecided, Some(4));
553 assert_eq!(disco.stats().undecided, Some(4));
527 Ok(())
554 Ok(())
528 }
555 }
529
556
530 /// in this test, we pretend that our peer misses exactly (8+10)::
557 /// in this test, we pretend that our peer misses exactly (8+10)::
531 /// and we're comparing all our repo to it (as in a bare push)
558 /// and we're comparing all our repo to it (as in a bare push)
532 #[test]
559 #[test]
533 fn test_discovery() -> Result<(), GraphError> {
560 fn test_discovery() -> Result<(), GraphError> {
534 let mut disco = full_disco();
561 let mut disco = full_disco();
535 disco.add_common_revisions(vec![11, 12])?;
562 disco.add_common_revisions(vec![11, 12])?;
536 disco.add_missing_revisions(vec![8, 10])?;
563 disco.add_missing_revisions(vec![8, 10])?;
537 assert_eq!(sorted_undecided(&disco), vec![5]);
564 assert_eq!(sorted_undecided(&disco), vec![5]);
538 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
565 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
539 assert!(!disco.is_complete());
566 assert!(!disco.is_complete());
540
567
541 disco.add_common_revisions(vec![5])?;
568 disco.add_common_revisions(vec![5])?;
542 assert_eq!(sorted_undecided(&disco), vec![]);
569 assert_eq!(sorted_undecided(&disco), vec![]);
543 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
544 assert!(disco.is_complete());
571 assert!(disco.is_complete());
545 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
572 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
546 Ok(())
573 Ok(())
547 }
574 }
548
575
549 #[test]
576 #[test]
577 fn test_add_missing_early_continue() -> Result<(), GraphError> {
578 eprintln!("test_add_missing_early_stop");
579 let mut disco = full_disco();
580 disco.add_common_revisions(vec![13, 3, 4])?;
581 disco.ensure_children_cache()?;
582 // 12 is grand-child of 6 through 9
583 // passing them in this order maximizes the chances of the
584 // early continue to do the wrong thing
585 disco.add_missing_revisions(vec![6, 9, 12])?;
586 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
587 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
588 assert!(!disco.is_complete());
589 Ok(())
590 }
591
592 #[test]
550 fn test_limit_sample_no_need_to() {
593 fn test_limit_sample_no_need_to() {
551 let sample = vec![1, 2, 3, 4];
594 let sample = vec![1, 2, 3, 4];
552 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
595 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
553 }
596 }
554
597
555 #[test]
598 #[test]
556 fn test_limit_sample_less_than_half() {
599 fn test_limit_sample_less_than_half() {
557 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
600 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
558 }
601 }
559
602
560 #[test]
603 #[test]
561 fn test_limit_sample_more_than_half() {
604 fn test_limit_sample_more_than_half() {
562 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
605 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
563 }
606 }
564
607
565 #[test]
608 #[test]
566 fn test_limit_sample_no_random() {
609 fn test_limit_sample_no_random() {
567 let mut disco = full_disco();
610 let mut disco = full_disco();
568 disco.randomize = false;
611 disco.randomize = false;
569 assert_eq!(
612 assert_eq!(
570 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
613 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
571 vec![1, 3, 5, 7]
614 vec![1, 3, 5, 7]
572 );
615 );
573 }
616 }
574
617
575 #[test]
618 #[test]
576 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
619 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
577 let mut disco = full_disco();
620 let mut disco = full_disco();
578 disco.undecided = Some((1..=13).collect());
621 disco.undecided = Some((1..=13).collect());
579
622
580 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
623 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
581 sample_vec.sort();
624 sample_vec.sort();
582 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
625 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
583 Ok(())
626 Ok(())
584 }
627 }
585
628
586 #[test]
629 #[test]
587 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
630 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
588 let mut disco = disco12();
631 let mut disco = disco12();
589 disco.ensure_undecided()?;
632 disco.ensure_undecided()?;
590
633
591 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
634 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
592 sample_vec.sort();
635 sample_vec.sort();
593 // r12's only parent is r9, whose unique grand-parent through the
636 // r12's only parent is r9, whose unique grand-parent through the
594 // diamond shape is r4. This ends there because the distance from r4
637 // diamond shape is r4. This ends there because the distance from r4
595 // to the root is only 3.
638 // to the root is only 3.
596 assert_eq!(sample_vec, vec![4, 9, 12]);
639 assert_eq!(sample_vec, vec![4, 9, 12]);
597 Ok(())
640 Ok(())
598 }
641 }
599
642
600 #[test]
643 #[test]
601 fn test_children_cache() -> Result<(), GraphError> {
644 fn test_children_cache() -> Result<(), GraphError> {
602 let mut disco = full_disco();
645 let mut disco = full_disco();
603 disco.ensure_children_cache()?;
646 disco.ensure_children_cache()?;
604
647
605 let cache = disco.children_cache.unwrap();
648 let cache = disco.children_cache.unwrap();
606 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
649 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
607 assert_eq!(cache.get(&10).cloned(), None);
650 assert_eq!(cache.get(&10).cloned(), None);
608
651
609 let mut children_4 = cache.get(&4).cloned().unwrap();
652 let mut children_4 = cache.get(&4).cloned().unwrap();
610 children_4.sort();
653 children_4.sort();
611 assert_eq!(children_4, vec![5, 6, 7]);
654 assert_eq!(children_4, vec![5, 6, 7]);
612
655
613 let mut children_7 = cache.get(&7).cloned().unwrap();
656 let mut children_7 = cache.get(&7).cloned().unwrap();
614 children_7.sort();
657 children_7.sort();
615 assert_eq!(children_7, vec![9, 11]);
658 assert_eq!(children_7, vec![9, 11]);
616
659
617 Ok(())
660 Ok(())
618 }
661 }
619
662
620 #[test]
663 #[test]
621 fn test_complete_sample() {
664 fn test_complete_sample() {
622 let mut disco = full_disco();
665 let mut disco = full_disco();
623 let undecided: HashSet<Revision> =
666 let undecided: HashSet<Revision> =
624 [4, 7, 9, 2, 3].iter().cloned().collect();
667 [4, 7, 9, 2, 3].iter().cloned().collect();
625 disco.undecided = Some(undecided);
668 disco.undecided = Some(undecided);
626
669
627 let mut sample = vec![0];
670 let mut sample = vec![0];
628 disco.random_complete_sample(&mut sample, 3);
671 disco.random_complete_sample(&mut sample, 3);
629 assert_eq!(sample.len(), 3);
672 assert_eq!(sample.len(), 3);
630
673
631 let mut sample = vec![2, 4, 7];
674 let mut sample = vec![2, 4, 7];
632 disco.random_complete_sample(&mut sample, 1);
675 disco.random_complete_sample(&mut sample, 1);
633 assert_eq!(sample.len(), 3);
676 assert_eq!(sample.len(), 3);
634 }
677 }
635
678
636 #[test]
679 #[test]
637 fn test_bidirectional_sample() -> Result<(), GraphError> {
680 fn test_bidirectional_sample() -> Result<(), GraphError> {
638 let mut disco = full_disco();
681 let mut disco = full_disco();
639 disco.undecided = Some((0..=13).into_iter().collect());
682 disco.undecided = Some((0..=13).into_iter().collect());
640
683
641 let (sample_set, size) = disco.bidirectional_sample(7)?;
684 let (sample_set, size) = disco.bidirectional_sample(7)?;
642 assert_eq!(size, 7);
685 assert_eq!(size, 7);
643 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
686 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
644 sample.sort();
687 sample.sort();
645 // our DAG is a bit too small for the results to be really interesting
688 // our DAG is a bit too small for the results to be really interesting
646 // at least it shows that
689 // at least it shows that
647 // - we went both ways
690 // - we went both ways
648 // - we didn't take all Revisions (6 is not in the sample)
691 // - we didn't take all Revisions (6 is not in the sample)
649 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
692 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
650 Ok(())
693 Ok(())
651 }
694 }
652
695
653 }
696 }
General Comments 0
You need to be logged in to leave comments. Login now