Show More
@@ -8,12 +8,10 edition = "2018" | |||||
8 | [lib] |
|
8 | [lib] | |
9 | name = "hg" |
|
9 | name = "hg" | |
10 |
|
10 | |||
11 | [dev-dependencies] |
|
|||
12 | rand = "*" |
|
|||
13 | rand_pcg = "*" |
|
|||
14 |
|
||||
15 | [dependencies] |
|
11 | [dependencies] | |
16 | byteorder = "1.3.1" |
|
12 | byteorder = "1.3.1" | |
17 | lazy_static = "1.3.0" |
|
13 | lazy_static = "1.3.0" | |
18 | memchr = "2.2.0" |
|
14 | memchr = "2.2.0" | |
|
15 | rand = "> 0.6.4" | |||
|
16 | rand_pcg = "> 0.1.0" | |||
19 | regex = "^1.1" |
|
17 | regex = "^1.1" |
@@ -10,10 +10,16 | |||||
10 | //! This is a Rust counterpart to the `partialdiscovery` class of |
|
10 | //! This is a Rust counterpart to the `partialdiscovery` class of | |
11 | //! `mercurial.setdiscovery` |
|
11 | //! `mercurial.setdiscovery` | |
12 |
|
12 | |||
13 | use super::{Graph, GraphError, Revision}; |
|
13 | extern crate rand; | |
|
14 | extern crate rand_pcg; | |||
|
15 | use self::rand::seq::SliceRandom; | |||
|
16 | use self::rand::{thread_rng, RngCore, SeedableRng}; | |||
|
17 | use super::{Graph, GraphError, Revision, NULL_REVISION}; | |||
14 | use crate::ancestors::MissingAncestors; |
|
18 | use crate::ancestors::MissingAncestors; | |
15 | use crate::dagops; |
|
19 | use crate::dagops; | |
16 | use std::collections::HashSet; |
|
20 | use std::collections::{HashMap, HashSet, VecDeque}; | |
|
21 | ||||
|
22 | type Rng = self::rand_pcg::Pcg32; | |||
17 |
|
23 | |||
18 | pub struct PartialDiscovery<G: Graph + Clone> { |
|
24 | pub struct PartialDiscovery<G: Graph + Clone> { | |
19 | target_heads: Option<Vec<Revision>>, |
|
25 | target_heads: Option<Vec<Revision>>, | |
@@ -21,12 +27,78 pub struct PartialDiscovery<G: Graph + C | |||||
21 | common: MissingAncestors<G>, |
|
27 | common: MissingAncestors<G>, | |
22 | undecided: Option<HashSet<Revision>>, |
|
28 | undecided: Option<HashSet<Revision>>, | |
23 | missing: HashSet<Revision>, |
|
29 | missing: HashSet<Revision>, | |
|
30 | rng: Rng, | |||
24 | } |
|
31 | } | |
25 |
|
32 | |||
26 | pub struct DiscoveryStats { |
|
33 | pub struct DiscoveryStats { | |
27 | pub undecided: Option<usize>, |
|
34 | pub undecided: Option<usize>, | |
28 | } |
|
35 | } | |
29 |
|
36 | |||
|
37 | /// Update an existing sample to match the expected size | |||
|
38 | /// | |||
|
39 | /// The sample is updated with revisions exponentially distant from each | |||
|
40 | /// element of `heads`. | |||
|
41 | /// | |||
|
42 | /// If a target size is specified, the sampling will stop once this size is | |||
|
43 | /// reached. Otherwise sampling will happen until roots of the <revs> set are | |||
|
44 | /// reached. | |||
|
45 | /// | |||
|
46 | /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag | |||
|
47 | /// represented by `parentfn` | |||
|
48 | /// - `heads`: set of DAG head revs | |||
|
49 | /// - `sample`: a sample to update | |||
|
50 | /// - `parentfn`: a callable to resolve parents for a revision | |||
|
51 | /// - `quicksamplesize`: optional target size of the sample | |||
|
52 | fn update_sample( | |||
|
53 | revs: Option<&HashSet<Revision>>, | |||
|
54 | heads: impl IntoIterator<Item = Revision>, | |||
|
55 | sample: &mut HashSet<Revision>, | |||
|
56 | parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, | |||
|
57 | quicksamplesize: Option<usize>, | |||
|
58 | ) -> Result<(), GraphError> { | |||
|
59 | let mut distances: HashMap<Revision, u32> = HashMap::new(); | |||
|
60 | let mut visit: VecDeque<Revision> = heads.into_iter().collect(); | |||
|
61 | let mut factor: u32 = 1; | |||
|
62 | let mut seen: HashSet<Revision> = HashSet::new(); | |||
|
63 | loop { | |||
|
64 | let current = match visit.pop_front() { | |||
|
65 | None => { | |||
|
66 | break; | |||
|
67 | } | |||
|
68 | Some(r) => r, | |||
|
69 | }; | |||
|
70 | if !seen.insert(current) { | |||
|
71 | continue; | |||
|
72 | } | |||
|
73 | ||||
|
74 | let d = *distances.entry(current).or_insert(1); | |||
|
75 | if d > factor { | |||
|
76 | factor *= 2; | |||
|
77 | } | |||
|
78 | if d == factor { | |||
|
79 | sample.insert(current); | |||
|
80 | if let Some(sz) = quicksamplesize { | |||
|
81 | if sample.len() >= sz { | |||
|
82 | return Ok(()); | |||
|
83 | } | |||
|
84 | } | |||
|
85 | } | |||
|
86 | for &p in &parentsfn(current)? { | |||
|
87 | if p == NULL_REVISION { | |||
|
88 | continue; | |||
|
89 | } | |||
|
90 | if let Some(revs) = revs { | |||
|
91 | if !revs.contains(&p) { | |||
|
92 | continue; | |||
|
93 | } | |||
|
94 | } | |||
|
95 | distances.entry(p).or_insert(d + 1); | |||
|
96 | visit.push_back(p); | |||
|
97 | } | |||
|
98 | } | |||
|
99 | Ok(()) | |||
|
100 | } | |||
|
101 | ||||
30 | impl<G: Graph + Clone> PartialDiscovery<G> { |
|
102 | impl<G: Graph + Clone> PartialDiscovery<G> { | |
31 | /// Create a PartialDiscovery object, with the intent |
|
103 | /// Create a PartialDiscovery object, with the intent | |
32 | /// of comparing our `::<target_heads>` revset to the contents of another |
|
104 | /// of comparing our `::<target_heads>` revset to the contents of another | |
@@ -39,15 +111,47 impl<G: Graph + Clone> PartialDiscovery< | |||||
39 | /// we'll have to make it a type argument of `PartialDiscovery` or a trait |
|
111 | /// we'll have to make it a type argument of `PartialDiscovery` or a trait | |
40 | /// object since we'll keep it in the meanwhile |
|
112 | /// object since we'll keep it in the meanwhile | |
41 | pub fn new(graph: G, target_heads: Vec<Revision>) -> Self { |
|
113 | pub fn new(graph: G, target_heads: Vec<Revision>) -> Self { | |
|
114 | let mut seed: [u8; 16] = [0; 16]; | |||
|
115 | thread_rng().fill_bytes(&mut seed); | |||
|
116 | Self::new_with_seed(graph, target_heads, seed) | |||
|
117 | } | |||
|
118 | ||||
|
119 | pub fn new_with_seed( | |||
|
120 | graph: G, | |||
|
121 | target_heads: Vec<Revision>, | |||
|
122 | seed: [u8; 16], | |||
|
123 | ) -> Self { | |||
42 | PartialDiscovery { |
|
124 | PartialDiscovery { | |
43 | undecided: None, |
|
125 | undecided: None, | |
44 | target_heads: Some(target_heads), |
|
126 | target_heads: Some(target_heads), | |
45 | graph: graph.clone(), |
|
127 | graph: graph.clone(), | |
46 | common: MissingAncestors::new(graph, vec![]), |
|
128 | common: MissingAncestors::new(graph, vec![]), | |
47 | missing: HashSet::new(), |
|
129 | missing: HashSet::new(), | |
|
130 | rng: Rng::from_seed(seed), | |||
48 | } |
|
131 | } | |
49 | } |
|
132 | } | |
50 |
|
133 | |||
|
134 | /// Extract at most `size` random elements from sample and return them | |||
|
135 | /// as a vector | |||
|
136 | fn limit_sample( | |||
|
137 | &mut self, | |||
|
138 | mut sample: Vec<Revision>, | |||
|
139 | size: usize, | |||
|
140 | ) -> Vec<Revision> { | |||
|
141 | let sample_len = sample.len(); | |||
|
142 | if sample_len <= size { | |||
|
143 | return sample; | |||
|
144 | } | |||
|
145 | let rng = &mut self.rng; | |||
|
146 | let dropped_size = sample_len - size; | |||
|
147 | let limited_slice = if size < dropped_size { | |||
|
148 | sample.partial_shuffle(rng, size).0 | |||
|
149 | } else { | |||
|
150 | sample.partial_shuffle(rng, dropped_size).1 | |||
|
151 | }; | |||
|
152 | limited_slice.to_owned() | |||
|
153 | } | |||
|
154 | ||||
51 | /// Register revisions known as being common |
|
155 | /// Register revisions known as being common | |
52 | pub fn add_common_revisions( |
|
156 | pub fn add_common_revisions( | |
53 | &mut self, |
|
157 | &mut self, | |
@@ -130,6 +234,32 impl<G: Graph + Clone> PartialDiscovery< | |||||
130 | undecided: self.undecided.as_ref().map(|s| s.len()), |
|
234 | undecided: self.undecided.as_ref().map(|s| s.len()), | |
131 | } |
|
235 | } | |
132 | } |
|
236 | } | |
|
237 | ||||
|
238 | pub fn take_quick_sample( | |||
|
239 | &mut self, | |||
|
240 | headrevs: impl IntoIterator<Item = Revision>, | |||
|
241 | size: usize, | |||
|
242 | ) -> Result<Vec<Revision>, GraphError> { | |||
|
243 | self.ensure_undecided()?; | |||
|
244 | let mut sample = { | |||
|
245 | let undecided = self.undecided.as_ref().unwrap(); | |||
|
246 | if undecided.len() <= size { | |||
|
247 | return Ok(undecided.iter().cloned().collect()); | |||
|
248 | } | |||
|
249 | dagops::heads(&self.graph, undecided.iter())? | |||
|
250 | }; | |||
|
251 | if sample.len() >= size { | |||
|
252 | return Ok(self.limit_sample(sample.into_iter().collect(), size)); | |||
|
253 | } | |||
|
254 | update_sample( | |||
|
255 | None, | |||
|
256 | headrevs, | |||
|
257 | &mut sample, | |||
|
258 | |r| self.graph.parents(r), | |||
|
259 | Some(size), | |||
|
260 | )?; | |||
|
261 | Ok(sample.into_iter().collect()) | |||
|
262 | } | |||
133 | } |
|
263 | } | |
134 |
|
264 | |||
135 | #[cfg(test)] |
|
265 | #[cfg(test)] | |
@@ -138,8 +268,21 mod tests { | |||||
138 | use crate::testing::SampleGraph; |
|
268 | use crate::testing::SampleGraph; | |
139 |
|
269 | |||
140 | /// A PartialDiscovery as for pushing all the heads of `SampleGraph` |
|
270 | /// A PartialDiscovery as for pushing all the heads of `SampleGraph` | |
|
271 | /// | |||
|
272 | /// To avoid actual randomness in tests, we give it a fixed random seed. | |||
141 | fn full_disco() -> PartialDiscovery<SampleGraph> { |
|
273 | fn full_disco() -> PartialDiscovery<SampleGraph> { | |
142 | PartialDiscovery::new(SampleGraph, vec![10, 11, 12, 13]) |
|
274 | PartialDiscovery::new_with_seed( | |
|
275 | SampleGraph, | |||
|
276 | vec![10, 11, 12, 13], | |||
|
277 | [0; 16], | |||
|
278 | ) | |||
|
279 | } | |||
|
280 | ||||
|
281 | /// A PartialDiscovery as for pushing the 12 head of `SampleGraph` | |||
|
282 | /// | |||
|
283 | /// To avoid actual randomness in tests, we give it a fixed random seed. | |||
|
284 | fn disco12() -> PartialDiscovery<SampleGraph> { | |||
|
285 | PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16]) | |||
143 | } |
|
286 | } | |
144 |
|
287 | |||
145 | fn sorted_undecided( |
|
288 | fn sorted_undecided( | |
@@ -206,4 +349,45 mod tests { | |||||
206 | assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]); |
|
349 | assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]); | |
207 | Ok(()) |
|
350 | Ok(()) | |
208 | } |
|
351 | } | |
|
352 | ||||
|
353 | #[test] | |||
|
354 | fn test_limit_sample_no_need_to() { | |||
|
355 | let sample = vec![1, 2, 3, 4]; | |||
|
356 | assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]); | |||
209 | } |
|
357 | } | |
|
358 | ||||
|
359 | #[test] | |||
|
360 | fn test_limit_sample_less_than_half() { | |||
|
361 | assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]); | |||
|
362 | } | |||
|
363 | ||||
|
364 | #[test] | |||
|
365 | fn test_limit_sample_more_than_half() { | |||
|
366 | assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]); | |||
|
367 | } | |||
|
368 | ||||
|
369 | #[test] | |||
|
370 | fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> { | |||
|
371 | let mut disco = full_disco(); | |||
|
372 | disco.undecided = Some((1..=13).collect()); | |||
|
373 | ||||
|
374 | let mut sample_vec = disco.take_quick_sample(vec![], 4)?; | |||
|
375 | sample_vec.sort(); | |||
|
376 | assert_eq!(sample_vec, vec![10, 11, 12, 13]); | |||
|
377 | Ok(()) | |||
|
378 | } | |||
|
379 | ||||
|
380 | #[test] | |||
|
381 | fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> { | |||
|
382 | let mut disco = disco12(); | |||
|
383 | disco.ensure_undecided()?; | |||
|
384 | ||||
|
385 | let mut sample_vec = disco.take_quick_sample(vec![12], 4)?; | |||
|
386 | sample_vec.sort(); | |||
|
387 | // r12's only parent is r9, whose unique grand-parent through the | |||
|
388 | // diamond shape is r4. This ends there because the distance from r4 | |||
|
389 | // to the root is only 3. | |||
|
390 | assert_eq!(sample_vec, vec![4, 9, 12]); | |||
|
391 | Ok(()) | |||
|
392 | } | |||
|
393 | } |
General Comments 0
You need to be logged in to leave comments.
Login now