Show More
@@ -17,6 +17,7 b' use self::rand::{thread_rng, RngCore, Se' | |||||
17 | use super::{Graph, GraphError, Revision, NULL_REVISION}; |
|
17 | use super::{Graph, GraphError, Revision, NULL_REVISION}; | |
18 | use crate::ancestors::MissingAncestors; |
|
18 | use crate::ancestors::MissingAncestors; | |
19 | use crate::dagops; |
|
19 | use crate::dagops; | |
|
20 | use std::cmp::{max, min}; | |||
20 | use std::collections::{HashMap, HashSet, VecDeque}; |
|
21 | use std::collections::{HashMap, HashSet, VecDeque}; | |
21 |
|
22 | |||
22 | type Rng = self::rand_pcg::Pcg32; |
|
23 | type Rng = self::rand_pcg::Pcg32; | |
@@ -26,8 +27,10 b' pub struct PartialDiscovery<G: Graph + C' | |||||
26 | graph: G, // plays the role of self._repo |
|
27 | graph: G, // plays the role of self._repo | |
27 | common: MissingAncestors<G>, |
|
28 | common: MissingAncestors<G>, | |
28 | undecided: Option<HashSet<Revision>>, |
|
29 | undecided: Option<HashSet<Revision>>, | |
|
30 | children_cache: Option<HashMap<Revision, Vec<Revision>>>, | |||
29 | missing: HashSet<Revision>, |
|
31 | missing: HashSet<Revision>, | |
30 | rng: Rng, |
|
32 | rng: Rng, | |
|
33 | respect_size: bool, | |||
31 | } |
|
34 | } | |
32 |
|
35 | |||
33 | pub struct DiscoveryStats { |
|
36 | pub struct DiscoveryStats { | |
@@ -49,13 +52,16 b' pub struct DiscoveryStats {' | |||||
49 | /// - `sample`: a sample to update |
|
52 | /// - `sample`: a sample to update | |
50 | /// - `parentfn`: a callable to resolve parents for a revision |
|
53 | /// - `parentfn`: a callable to resolve parents for a revision | |
51 | /// - `quicksamplesize`: optional target size of the sample |
|
54 | /// - `quicksamplesize`: optional target size of the sample | |
52 | fn update_sample( |
|
55 | fn update_sample<I>( | |
53 | revs: Option<&HashSet<Revision>>, |
|
56 | revs: Option<&HashSet<Revision>>, | |
54 | heads: impl IntoIterator<Item = Revision>, |
|
57 | heads: impl IntoIterator<Item = Revision>, | |
55 | sample: &mut HashSet<Revision>, |
|
58 | sample: &mut HashSet<Revision>, | |
56 |
parentsfn: impl Fn(Revision) -> Result< |
|
59 | parentsfn: impl Fn(Revision) -> Result<I, GraphError>, | |
57 | quicksamplesize: Option<usize>, |
|
60 | quicksamplesize: Option<usize>, | |
58 |
) -> Result<(), GraphError> |
|
61 | ) -> Result<(), GraphError> | |
|
62 | where | |||
|
63 | I: Iterator<Item = Revision>, | |||
|
64 | { | |||
59 | let mut distances: HashMap<Revision, u32> = HashMap::new(); |
|
65 | let mut distances: HashMap<Revision, u32> = HashMap::new(); | |
60 | let mut visit: VecDeque<Revision> = heads.into_iter().collect(); |
|
66 | let mut visit: VecDeque<Revision> = heads.into_iter().collect(); | |
61 | let mut factor: u32 = 1; |
|
67 | let mut factor: u32 = 1; | |
@@ -83,10 +89,7 b' fn update_sample(' | |||||
83 | } |
|
89 | } | |
84 | } |
|
90 | } | |
85 | } |
|
91 | } | |
86 |
for |
|
92 | for p in parentsfn(current)? { | |
87 | if p == NULL_REVISION { |
|
|||
88 | continue; |
|
|||
89 | } |
|
|||
90 | if let Some(revs) = revs { |
|
93 | if let Some(revs) = revs { | |
91 | if !revs.contains(&p) { |
|
94 | if !revs.contains(&p) { | |
92 | continue; |
|
95 | continue; | |
@@ -99,6 +102,39 b' fn update_sample(' | |||||
99 | Ok(()) |
|
102 | Ok(()) | |
100 | } |
|
103 | } | |
101 |
|
104 | |||
|
105 | struct ParentsIterator { | |||
|
106 | parents: [Revision; 2], | |||
|
107 | cur: usize, | |||
|
108 | } | |||
|
109 | ||||
|
110 | impl ParentsIterator { | |||
|
111 | fn graph_parents( | |||
|
112 | graph: &impl Graph, | |||
|
113 | r: Revision, | |||
|
114 | ) -> Result<ParentsIterator, GraphError> { | |||
|
115 | Ok(ParentsIterator { | |||
|
116 | parents: graph.parents(r)?, | |||
|
117 | cur: 0, | |||
|
118 | }) | |||
|
119 | } | |||
|
120 | } | |||
|
121 | ||||
|
122 | impl Iterator for ParentsIterator { | |||
|
123 | type Item = Revision; | |||
|
124 | ||||
|
125 | fn next(&mut self) -> Option<Revision> { | |||
|
126 | if self.cur > 1 { | |||
|
127 | return None; | |||
|
128 | } | |||
|
129 | let rev = self.parents[self.cur]; | |||
|
130 | self.cur += 1; | |||
|
131 | if rev == NULL_REVISION { | |||
|
132 | return self.next(); | |||
|
133 | } | |||
|
134 | Some(rev) | |||
|
135 | } | |||
|
136 | } | |||
|
137 | ||||
102 | impl<G: Graph + Clone> PartialDiscovery<G> { |
|
138 | impl<G: Graph + Clone> PartialDiscovery<G> { | |
103 | /// Create a PartialDiscovery object, with the intent |
|
139 | /// Create a PartialDiscovery object, with the intent | |
104 | /// of comparing our `::<target_heads>` revset to the contents of another |
|
140 | /// of comparing our `::<target_heads>` revset to the contents of another | |
@@ -110,24 +146,36 b' impl<G: Graph + Clone> PartialDiscovery<' | |||||
110 | /// If we want to make the signature more flexible, |
|
146 | /// If we want to make the signature more flexible, | |
111 | /// we'll have to make it a type argument of `PartialDiscovery` or a trait |
|
147 | /// we'll have to make it a type argument of `PartialDiscovery` or a trait | |
112 | /// object since we'll keep it in the meanwhile |
|
148 | /// object since we'll keep it in the meanwhile | |
113 | pub fn new(graph: G, target_heads: Vec<Revision>) -> Self { |
|
149 | /// | |
|
150 | /// The `respect_size` boolean controls how the sampling methods | |||
|
151 | /// will interpret the size argument requested by the caller. If it's | |||
|
152 | /// `false`, they are allowed to produce a sample whose size is more | |||
|
153 | /// appropriate to the situation (typically bigger). | |||
|
154 | pub fn new( | |||
|
155 | graph: G, | |||
|
156 | target_heads: Vec<Revision>, | |||
|
157 | respect_size: bool, | |||
|
158 | ) -> Self { | |||
114 | let mut seed: [u8; 16] = [0; 16]; |
|
159 | let mut seed: [u8; 16] = [0; 16]; | |
115 | thread_rng().fill_bytes(&mut seed); |
|
160 | thread_rng().fill_bytes(&mut seed); | |
116 | Self::new_with_seed(graph, target_heads, seed) |
|
161 | Self::new_with_seed(graph, target_heads, seed, respect_size) | |
117 | } |
|
162 | } | |
118 |
|
163 | |||
119 | pub fn new_with_seed( |
|
164 | pub fn new_with_seed( | |
120 | graph: G, |
|
165 | graph: G, | |
121 | target_heads: Vec<Revision>, |
|
166 | target_heads: Vec<Revision>, | |
122 | seed: [u8; 16], |
|
167 | seed: [u8; 16], | |
|
168 | respect_size: bool, | |||
123 | ) -> Self { |
|
169 | ) -> Self { | |
124 | PartialDiscovery { |
|
170 | PartialDiscovery { | |
125 | undecided: None, |
|
171 | undecided: None, | |
|
172 | children_cache: None, | |||
126 | target_heads: Some(target_heads), |
|
173 | target_heads: Some(target_heads), | |
127 | graph: graph.clone(), |
|
174 | graph: graph.clone(), | |
128 | common: MissingAncestors::new(graph, vec![]), |
|
175 | common: MissingAncestors::new(graph, vec![]), | |
129 | missing: HashSet::new(), |
|
176 | missing: HashSet::new(), | |
130 | rng: Rng::from_seed(seed), |
|
177 | rng: Rng::from_seed(seed), | |
|
178 | respect_size: respect_size, | |||
131 | } |
|
179 | } | |
132 | } |
|
180 | } | |
133 |
|
181 | |||
@@ -228,6 +276,22 b' impl<G: Graph + Clone> PartialDiscovery<' | |||||
228 | Ok(()) |
|
276 | Ok(()) | |
229 | } |
|
277 | } | |
230 |
|
278 | |||
|
279 | fn ensure_children_cache(&mut self) -> Result<(), GraphError> { | |||
|
280 | if self.children_cache.is_some() { | |||
|
281 | return Ok(()); | |||
|
282 | } | |||
|
283 | self.ensure_undecided()?; | |||
|
284 | ||||
|
285 | let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new(); | |||
|
286 | for &rev in self.undecided.as_ref().unwrap() { | |||
|
287 | for p in ParentsIterator::graph_parents(&self.graph, rev)? { | |||
|
288 | children.entry(p).or_insert_with(|| Vec::new()).push(rev); | |||
|
289 | } | |||
|
290 | } | |||
|
291 | self.children_cache = Some(children); | |||
|
292 | Ok(()) | |||
|
293 | } | |||
|
294 | ||||
231 | /// Provide statistics about the current state of the discovery process |
|
295 | /// Provide statistics about the current state of the discovery process | |
232 | pub fn stats(&self) -> DiscoveryStats { |
|
296 | pub fn stats(&self) -> DiscoveryStats { | |
233 | DiscoveryStats { |
|
297 | DiscoveryStats { | |
@@ -255,11 +319,114 b' impl<G: Graph + Clone> PartialDiscovery<' | |||||
255 | None, |
|
319 | None, | |
256 | headrevs, |
|
320 | headrevs, | |
257 | &mut sample, |
|
321 | &mut sample, | |
258 | |r| self.graph.parents(r), |
|
322 | |r| ParentsIterator::graph_parents(&self.graph, r), | |
259 | Some(size), |
|
323 | Some(size), | |
260 | )?; |
|
324 | )?; | |
261 | Ok(sample.into_iter().collect()) |
|
325 | Ok(sample.into_iter().collect()) | |
262 | } |
|
326 | } | |
|
327 | ||||
|
328 | /// Extract a sample from `self.undecided`, going from its heads and roots. | |||
|
329 | /// | |||
|
330 | /// The `size` parameter is used to avoid useless computations if | |||
|
331 | /// it turns out to be bigger than the whole set of undecided Revisions. | |||
|
332 | /// | |||
|
333 | /// The sample is taken by using `update_sample` from the heads, then | |||
|
334 | /// from the roots, working on the reverse DAG, | |||
|
335 | /// expressed by `self.children_cache`. | |||
|
336 | /// | |||
|
337 | /// No effort is being made to complete or limit the sample to `size` | |||
|
338 | /// but this method returns another interesting size that it derives | |||
|
339 | /// from its knowledge of the structure of the various sets, leaving | |||
|
340 | /// to the caller the decision to use it or not. | |||
|
341 | fn bidirectional_sample( | |||
|
342 | &mut self, | |||
|
343 | size: usize, | |||
|
344 | ) -> Result<(HashSet<Revision>, usize), GraphError> { | |||
|
345 | self.ensure_undecided()?; | |||
|
346 | { | |||
|
347 | // we don't want to compute children_cache before this | |||
|
348 | // but doing it after extracting self.undecided takes a mutable | |||
|
349 | // ref to self while a shareable one is still active. | |||
|
350 | let undecided = self.undecided.as_ref().unwrap(); | |||
|
351 | if undecided.len() <= size { | |||
|
352 | return Ok((undecided.clone(), size)); | |||
|
353 | } | |||
|
354 | } | |||
|
355 | ||||
|
356 | self.ensure_children_cache()?; | |||
|
357 | let revs = self.undecided.as_ref().unwrap(); | |||
|
358 | let mut sample: HashSet<Revision> = revs.clone(); | |||
|
359 | ||||
|
360 | // it's possible that leveraging the children cache would be more | |||
|
361 | // efficient here | |||
|
362 | dagops::retain_heads(&self.graph, &mut sample)?; | |||
|
363 | let revsheads = sample.clone(); // was again heads(revs) in python | |||
|
364 | ||||
|
365 | // update from heads | |||
|
366 | update_sample( | |||
|
367 | Some(revs), | |||
|
368 | revsheads.iter().cloned(), | |||
|
369 | &mut sample, | |||
|
370 | |r| ParentsIterator::graph_parents(&self.graph, r), | |||
|
371 | None, | |||
|
372 | )?; | |||
|
373 | ||||
|
374 | // update from roots | |||
|
375 | let revroots: HashSet<Revision> = | |||
|
376 | dagops::roots(&self.graph, revs)?.into_iter().collect(); | |||
|
377 | let prescribed_size = max(size, min(revroots.len(), revsheads.len())); | |||
|
378 | ||||
|
379 | let children = self.children_cache.as_ref().unwrap(); | |||
|
380 | let empty_vec: Vec<Revision> = Vec::new(); | |||
|
381 | update_sample( | |||
|
382 | Some(revs), | |||
|
383 | revroots, | |||
|
384 | &mut sample, | |||
|
385 | |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()), | |||
|
386 | None, | |||
|
387 | )?; | |||
|
388 | Ok((sample, prescribed_size)) | |||
|
389 | } | |||
|
390 | ||||
|
391 | /// Fill up sample up to the wished size with random undecided Revisions. | |||
|
392 | /// | |||
|
393 | /// This is intended to be used as a last resort completion if the | |||
|
394 | /// regular sampling algorithm returns too few elements. | |||
|
395 | fn random_complete_sample( | |||
|
396 | &mut self, | |||
|
397 | sample: &mut Vec<Revision>, | |||
|
398 | size: usize, | |||
|
399 | ) { | |||
|
400 | let sample_len = sample.len(); | |||
|
401 | if size <= sample_len { | |||
|
402 | return; | |||
|
403 | } | |||
|
404 | let take_from: Vec<Revision> = self | |||
|
405 | .undecided | |||
|
406 | .as_ref() | |||
|
407 | .unwrap() | |||
|
408 | .iter() | |||
|
409 | .filter(|&r| !sample.contains(r)) | |||
|
410 | .cloned() | |||
|
411 | .collect(); | |||
|
412 | sample.extend(self.limit_sample(take_from, size - sample_len)); | |||
|
413 | } | |||
|
414 | ||||
|
415 | pub fn take_full_sample( | |||
|
416 | &mut self, | |||
|
417 | size: usize, | |||
|
418 | ) -> Result<Vec<Revision>, GraphError> { | |||
|
419 | let (sample_set, prescribed_size) = self.bidirectional_sample(size)?; | |||
|
420 | let size = if self.respect_size { | |||
|
421 | size | |||
|
422 | } else { | |||
|
423 | prescribed_size | |||
|
424 | }; | |||
|
425 | let mut sample = | |||
|
426 | self.limit_sample(sample_set.into_iter().collect(), size); | |||
|
427 | self.random_complete_sample(&mut sample, size); | |||
|
428 | Ok(sample) | |||
|
429 | } | |||
263 | } |
|
430 | } | |
264 |
|
431 | |||
265 | #[cfg(test)] |
|
432 | #[cfg(test)] | |
@@ -275,6 +442,7 b' mod tests {' | |||||
275 | SampleGraph, |
|
442 | SampleGraph, | |
276 | vec![10, 11, 12, 13], |
|
443 | vec![10, 11, 12, 13], | |
277 | [0; 16], |
|
444 | [0; 16], | |
|
445 | true, | |||
278 | ) |
|
446 | ) | |
279 | } |
|
447 | } | |
280 |
|
448 | |||
@@ -282,7 +450,7 b' mod tests {' | |||||
282 | /// |
|
450 | /// | |
283 | /// To avoid actual randomness in tests, we give it a fixed random seed. |
|
451 | /// To avoid actual randomness in tests, we give it a fixed random seed. | |
284 | fn disco12() -> PartialDiscovery<SampleGraph> { |
|
452 | fn disco12() -> PartialDiscovery<SampleGraph> { | |
285 | PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16]) |
|
453 | PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16], true) | |
286 | } |
|
454 | } | |
287 |
|
455 | |||
288 | fn sorted_undecided( |
|
456 | fn sorted_undecided( | |
@@ -390,4 +558,58 b' mod tests {' | |||||
390 | assert_eq!(sample_vec, vec![4, 9, 12]); |
|
558 | assert_eq!(sample_vec, vec![4, 9, 12]); | |
391 | Ok(()) |
|
559 | Ok(()) | |
392 | } |
|
560 | } | |
|
561 | ||||
|
562 | #[test] | |||
|
563 | fn test_children_cache() -> Result<(), GraphError> { | |||
|
564 | let mut disco = full_disco(); | |||
|
565 | disco.ensure_children_cache()?; | |||
|
566 | ||||
|
567 | let cache = disco.children_cache.unwrap(); | |||
|
568 | assert_eq!(cache.get(&2).cloned(), Some(vec![4])); | |||
|
569 | assert_eq!(cache.get(&10).cloned(), None); | |||
|
570 | ||||
|
571 | let mut children_4 = cache.get(&4).cloned().unwrap(); | |||
|
572 | children_4.sort(); | |||
|
573 | assert_eq!(children_4, vec![5, 6, 7]); | |||
|
574 | ||||
|
575 | let mut children_7 = cache.get(&7).cloned().unwrap(); | |||
|
576 | children_7.sort(); | |||
|
577 | assert_eq!(children_7, vec![9, 11]); | |||
|
578 | ||||
|
579 | Ok(()) | |||
|
580 | } | |||
|
581 | ||||
|
582 | #[test] | |||
|
583 | fn test_complete_sample() { | |||
|
584 | let mut disco = full_disco(); | |||
|
585 | let undecided: HashSet<Revision> = | |||
|
586 | [4, 7, 9, 2, 3].iter().cloned().collect(); | |||
|
587 | disco.undecided = Some(undecided); | |||
|
588 | ||||
|
589 | let mut sample = vec![0]; | |||
|
590 | disco.random_complete_sample(&mut sample, 3); | |||
|
591 | assert_eq!(sample.len(), 3); | |||
|
592 | ||||
|
593 | let mut sample = vec![2, 4, 7]; | |||
|
594 | disco.random_complete_sample(&mut sample, 1); | |||
|
595 | assert_eq!(sample.len(), 3); | |||
|
596 | } | |||
|
597 | ||||
|
598 | #[test] | |||
|
599 | fn test_bidirectional_sample() -> Result<(), GraphError> { | |||
|
600 | let mut disco = full_disco(); | |||
|
601 | disco.undecided = Some((0..=13).into_iter().collect()); | |||
|
602 | ||||
|
603 | let (sample_set, size) = disco.bidirectional_sample(7)?; | |||
|
604 | assert_eq!(size, 7); | |||
|
605 | let mut sample: Vec<Revision> = sample_set.into_iter().collect(); | |||
|
606 | sample.sort(); | |||
|
607 | // our DAG is a bit too small for the results to be really interesting | |||
|
608 | // at least it shows that | |||
|
609 | // - we went both ways | |||
|
610 | // - we didn't take all Revisions (6 is not in the sample) | |||
|
611 | assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]); | |||
|
612 | Ok(()) | |||
|
613 | } | |||
|
614 | ||||
393 | } |
|
615 | } |
@@ -36,7 +36,7 b' py_class!(pub class PartialDiscovery |py' | |||||
36 | _cls, |
|
36 | _cls, | |
37 | repo: PyObject, |
|
37 | repo: PyObject, | |
38 | targetheads: PyObject, |
|
38 | targetheads: PyObject, | |
39 |
|
|
39 | respectsize: bool | |
40 | ) -> PyResult<PartialDiscovery> { |
|
40 | ) -> PyResult<PartialDiscovery> { | |
41 | let index = repo.getattr(py, "changelog")?.getattr(py, "index")?; |
|
41 | let index = repo.getattr(py, "changelog")?.getattr(py, "index")?; | |
42 | Self::create_instance( |
|
42 | Self::create_instance( | |
@@ -44,6 +44,7 b' py_class!(pub class PartialDiscovery |py' | |||||
44 | RefCell::new(Box::new(CorePartialDiscovery::new( |
|
44 | RefCell::new(Box::new(CorePartialDiscovery::new( | |
45 | Index::new(py, index)?, |
|
45 | Index::new(py, index)?, | |
46 | rev_pyiter_collect(py, &targetheads)?, |
|
46 | rev_pyiter_collect(py, &targetheads)?, | |
|
47 | respectsize | |||
47 | ))) |
|
48 | ))) | |
48 | ) |
|
49 | ) | |
49 | } |
|
50 | } |
General Comments 0
You need to be logged in to leave comments.
Login now