Show More
@@ -8,12 +8,10 edition = "2018" | |||
|
8 | 8 | [lib] |
|
9 | 9 | name = "hg" |
|
10 | 10 | |
|
11 | [dev-dependencies] | |
|
12 | rand = "*" | |
|
13 | rand_pcg = "*" | |
|
14 | ||
|
15 | 11 | [dependencies] |
|
16 | 12 | byteorder = "1.3.1" |
|
17 | 13 | lazy_static = "1.3.0" |
|
18 | 14 | memchr = "2.2.0" |
|
15 | rand = "> 0.6.4" | |
|
16 | rand_pcg = "> 0.1.0" | |
|
19 | 17 | regex = "^1.1" |
@@ -10,10 +10,16 | |||
|
10 | 10 | //! This is a Rust counterpart to the `partialdiscovery` class of |
|
11 | 11 | //! `mercurial.setdiscovery` |
|
12 | 12 | |
|
13 | use super::{Graph, GraphError, Revision}; | |
|
13 | extern crate rand; | |
|
14 | extern crate rand_pcg; | |
|
15 | use self::rand::seq::SliceRandom; | |
|
16 | use self::rand::{thread_rng, RngCore, SeedableRng}; | |
|
17 | use super::{Graph, GraphError, Revision, NULL_REVISION}; | |
|
14 | 18 | use crate::ancestors::MissingAncestors; |
|
15 | 19 | use crate::dagops; |
|
16 | use std::collections::HashSet; | |
|
20 | use std::collections::{HashMap, HashSet, VecDeque}; | |
|
21 | ||
|
22 | type Rng = self::rand_pcg::Pcg32; | |
|
17 | 23 | |
|
18 | 24 | pub struct PartialDiscovery<G: Graph + Clone> { |
|
19 | 25 | target_heads: Option<Vec<Revision>>, |
@@ -21,12 +27,78 pub struct PartialDiscovery<G: Graph + C | |||
|
21 | 27 | common: MissingAncestors<G>, |
|
22 | 28 | undecided: Option<HashSet<Revision>>, |
|
23 | 29 | missing: HashSet<Revision>, |
|
30 | rng: Rng, | |
|
24 | 31 | } |
|
25 | 32 | |
|
26 | 33 | pub struct DiscoveryStats { |
|
27 | 34 | pub undecided: Option<usize>, |
|
28 | 35 | } |
|
29 | 36 | |
|
37 | /// Update an existing sample to match the expected size | |
|
38 | /// | |
|
39 | /// The sample is updated with revisions exponentially distant from each | |
|
40 | /// element of `heads`. | |
|
41 | /// | |
|
42 | /// If a target size is specified, the sampling will stop once this size is | |
|
43 | /// reached. Otherwise sampling will happen until roots of the <revs> set are | |
|
44 | /// reached. | |
|
45 | /// | |
|
46 | /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag | |
|
47 | /// represented by `parentfn` | |
|
48 | /// - `heads`: set of DAG head revs | |
|
49 | /// - `sample`: a sample to update | |
|
50 | /// - `parentfn`: a callable to resolve parents for a revision | |
|
51 | /// - `quicksamplesize`: optional target size of the sample | |
|
52 | fn update_sample( | |
|
53 | revs: Option<&HashSet<Revision>>, | |
|
54 | heads: impl IntoIterator<Item = Revision>, | |
|
55 | sample: &mut HashSet<Revision>, | |
|
56 | parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, | |
|
57 | quicksamplesize: Option<usize>, | |
|
58 | ) -> Result<(), GraphError> { | |
|
59 | let mut distances: HashMap<Revision, u32> = HashMap::new(); | |
|
60 | let mut visit: VecDeque<Revision> = heads.into_iter().collect(); | |
|
61 | let mut factor: u32 = 1; | |
|
62 | let mut seen: HashSet<Revision> = HashSet::new(); | |
|
63 | loop { | |
|
64 | let current = match visit.pop_front() { | |
|
65 | None => { | |
|
66 | break; | |
|
67 | } | |
|
68 | Some(r) => r, | |
|
69 | }; | |
|
70 | if !seen.insert(current) { | |
|
71 | continue; | |
|
72 | } | |
|
73 | ||
|
74 | let d = *distances.entry(current).or_insert(1); | |
|
75 | if d > factor { | |
|
76 | factor *= 2; | |
|
77 | } | |
|
78 | if d == factor { | |
|
79 | sample.insert(current); | |
|
80 | if let Some(sz) = quicksamplesize { | |
|
81 | if sample.len() >= sz { | |
|
82 | return Ok(()); | |
|
83 | } | |
|
84 | } | |
|
85 | } | |
|
86 | for &p in &parentsfn(current)? { | |
|
87 | if p == NULL_REVISION { | |
|
88 | continue; | |
|
89 | } | |
|
90 | if let Some(revs) = revs { | |
|
91 | if !revs.contains(&p) { | |
|
92 | continue; | |
|
93 | } | |
|
94 | } | |
|
95 | distances.entry(p).or_insert(d + 1); | |
|
96 | visit.push_back(p); | |
|
97 | } | |
|
98 | } | |
|
99 | Ok(()) | |
|
100 | } | |
|
101 | ||
|
30 | 102 | impl<G: Graph + Clone> PartialDiscovery<G> { |
|
31 | 103 | /// Create a PartialDiscovery object, with the intent |
|
32 | 104 | /// of comparing our `::<target_heads>` revset to the contents of another |
@@ -39,15 +111,47 impl<G: Graph + Clone> PartialDiscovery< | |||
|
39 | 111 | /// we'll have to make it a type argument of `PartialDiscovery` or a trait |
|
40 | 112 | /// object since we'll keep it in the meanwhile |
|
41 | 113 | pub fn new(graph: G, target_heads: Vec<Revision>) -> Self { |
|
114 | let mut seed: [u8; 16] = [0; 16]; | |
|
115 | thread_rng().fill_bytes(&mut seed); | |
|
116 | Self::new_with_seed(graph, target_heads, seed) | |
|
117 | } | |
|
118 | ||
|
119 | pub fn new_with_seed( | |
|
120 | graph: G, | |
|
121 | target_heads: Vec<Revision>, | |
|
122 | seed: [u8; 16], | |
|
123 | ) -> Self { | |
|
42 | 124 | PartialDiscovery { |
|
43 | 125 | undecided: None, |
|
44 | 126 | target_heads: Some(target_heads), |
|
45 | 127 | graph: graph.clone(), |
|
46 | 128 | common: MissingAncestors::new(graph, vec![]), |
|
47 | 129 | missing: HashSet::new(), |
|
130 | rng: Rng::from_seed(seed), | |
|
48 | 131 | } |
|
49 | 132 | } |
|
50 | 133 | |
|
134 | /// Extract at most `size` random elements from sample and return them | |
|
135 | /// as a vector | |
|
136 | fn limit_sample( | |
|
137 | &mut self, | |
|
138 | mut sample: Vec<Revision>, | |
|
139 | size: usize, | |
|
140 | ) -> Vec<Revision> { | |
|
141 | let sample_len = sample.len(); | |
|
142 | if sample_len <= size { | |
|
143 | return sample; | |
|
144 | } | |
|
145 | let rng = &mut self.rng; | |
|
146 | let dropped_size = sample_len - size; | |
|
147 | let limited_slice = if size < dropped_size { | |
|
148 | sample.partial_shuffle(rng, size).0 | |
|
149 | } else { | |
|
150 | sample.partial_shuffle(rng, dropped_size).1 | |
|
151 | }; | |
|
152 | limited_slice.to_owned() | |
|
153 | } | |
|
154 | ||
|
51 | 155 | /// Register revisions known as being common |
|
52 | 156 | pub fn add_common_revisions( |
|
53 | 157 | &mut self, |
@@ -130,6 +234,32 impl<G: Graph + Clone> PartialDiscovery< | |||
|
130 | 234 | undecided: self.undecided.as_ref().map(|s| s.len()), |
|
131 | 235 | } |
|
132 | 236 | } |
|
237 | ||
|
238 | pub fn take_quick_sample( | |
|
239 | &mut self, | |
|
240 | headrevs: impl IntoIterator<Item = Revision>, | |
|
241 | size: usize, | |
|
242 | ) -> Result<Vec<Revision>, GraphError> { | |
|
243 | self.ensure_undecided()?; | |
|
244 | let mut sample = { | |
|
245 | let undecided = self.undecided.as_ref().unwrap(); | |
|
246 | if undecided.len() <= size { | |
|
247 | return Ok(undecided.iter().cloned().collect()); | |
|
248 | } | |
|
249 | dagops::heads(&self.graph, undecided.iter())? | |
|
250 | }; | |
|
251 | if sample.len() >= size { | |
|
252 | return Ok(self.limit_sample(sample.into_iter().collect(), size)); | |
|
253 | } | |
|
254 | update_sample( | |
|
255 | None, | |
|
256 | headrevs, | |
|
257 | &mut sample, | |
|
258 | |r| self.graph.parents(r), | |
|
259 | Some(size), | |
|
260 | )?; | |
|
261 | Ok(sample.into_iter().collect()) | |
|
262 | } | |
|
133 | 263 | } |
|
134 | 264 | |
|
135 | 265 | #[cfg(test)] |
@@ -138,8 +268,21 mod tests { | |||
|
138 | 268 | use crate::testing::SampleGraph; |
|
139 | 269 | |
|
140 | 270 | /// A PartialDiscovery as for pushing all the heads of `SampleGraph` |
|
271 | /// | |
|
272 | /// To avoid actual randomness in tests, we give it a fixed random seed. | |
|
141 | 273 | fn full_disco() -> PartialDiscovery<SampleGraph> { |
|
142 | PartialDiscovery::new(SampleGraph, vec![10, 11, 12, 13]) | |
|
274 | PartialDiscovery::new_with_seed( | |
|
275 | SampleGraph, | |
|
276 | vec![10, 11, 12, 13], | |
|
277 | [0; 16], | |
|
278 | ) | |
|
279 | } | |
|
280 | ||
|
281 | /// A PartialDiscovery as for pushing the 12 head of `SampleGraph` | |
|
282 | /// | |
|
283 | /// To avoid actual randomness in tests, we give it a fixed random seed. | |
|
284 | fn disco12() -> PartialDiscovery<SampleGraph> { | |
|
285 | PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16]) | |
|
143 | 286 | } |
|
144 | 287 | |
|
145 | 288 | fn sorted_undecided( |
@@ -206,4 +349,45 mod tests { | |||
|
206 | 349 | assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]); |
|
207 | 350 | Ok(()) |
|
208 | 351 | } |
|
352 | ||
|
353 | #[test] | |
|
354 | fn test_limit_sample_no_need_to() { | |
|
355 | let sample = vec![1, 2, 3, 4]; | |
|
356 | assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]); | |
|
209 | 357 | } |
|
358 | ||
|
359 | #[test] | |
|
360 | fn test_limit_sample_less_than_half() { | |
|
361 | assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]); | |
|
362 | } | |
|
363 | ||
|
364 | #[test] | |
|
365 | fn test_limit_sample_more_than_half() { | |
|
366 | assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]); | |
|
367 | } | |
|
368 | ||
|
369 | #[test] | |
|
370 | fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> { | |
|
371 | let mut disco = full_disco(); | |
|
372 | disco.undecided = Some((1..=13).collect()); | |
|
373 | ||
|
374 | let mut sample_vec = disco.take_quick_sample(vec![], 4)?; | |
|
375 | sample_vec.sort(); | |
|
376 | assert_eq!(sample_vec, vec![10, 11, 12, 13]); | |
|
377 | Ok(()) | |
|
378 | } | |
|
379 | ||
|
380 | #[test] | |
|
381 | fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> { | |
|
382 | let mut disco = disco12(); | |
|
383 | disco.ensure_undecided()?; | |
|
384 | ||
|
385 | let mut sample_vec = disco.take_quick_sample(vec![12], 4)?; | |
|
386 | sample_vec.sort(); | |
|
387 | // r12's only parent is r9, whose unique grand-parent through the | |
|
388 | // diamond shape is r4. This ends there because the distance from r4 | |
|
389 | // to the root is only 3. | |
|
390 | assert_eq!(sample_vec, vec![4, 9, 12]); | |
|
391 | Ok(()) | |
|
392 | } | |
|
393 | } |
General Comments 0
You need to be logged in to leave comments.
Login now