##// END OF EJS Templates
rust-discovery: core implementation for take_quick_sample()...
Georges Racinet -
r42965:388622cb default
parent child Browse files
Show More
@@ -8,12 +8,10 edition = "2018"
8 [lib]
8 [lib]
9 name = "hg"
9 name = "hg"
10
10
11 [dev-dependencies]
12 rand = "*"
13 rand_pcg = "*"
14
15 [dependencies]
11 [dependencies]
16 byteorder = "1.3.1"
12 byteorder = "1.3.1"
17 lazy_static = "1.3.0"
13 lazy_static = "1.3.0"
18 memchr = "2.2.0"
14 memchr = "2.2.0"
15 rand = "> 0.6.4"
16 rand_pcg = "> 0.1.0"
19 regex = "^1.1"
17 regex = "^1.1"
@@ -10,10 +10,16
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision};
13 extern crate rand;
14 extern crate rand_pcg;
15 use self::rand::seq::SliceRandom;
16 use self::rand::{thread_rng, RngCore, SeedableRng};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::ancestors::MissingAncestors;
18 use crate::ancestors::MissingAncestors;
15 use crate::dagops;
19 use crate::dagops;
16 use std::collections::HashSet;
20 use std::collections::{HashMap, HashSet, VecDeque};
21
22 type Rng = self::rand_pcg::Pcg32;
17
23
18 pub struct PartialDiscovery<G: Graph + Clone> {
24 pub struct PartialDiscovery<G: Graph + Clone> {
19 target_heads: Option<Vec<Revision>>,
25 target_heads: Option<Vec<Revision>>,
@@ -21,12 +27,78 pub struct PartialDiscovery<G: Graph + C
21 common: MissingAncestors<G>,
27 common: MissingAncestors<G>,
22 undecided: Option<HashSet<Revision>>,
28 undecided: Option<HashSet<Revision>>,
23 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
30 rng: Rng,
24 }
31 }
25
32
26 pub struct DiscoveryStats {
33 pub struct DiscoveryStats {
27 pub undecided: Option<usize>,
34 pub undecided: Option<usize>,
28 }
35 }
29
36
37 /// Update an existing sample to match the expected size
38 ///
39 /// The sample is updated with revisions exponentially distant from each
40 /// element of `heads`.
41 ///
42 /// If a target size is specified, the sampling will stop once this size is
43 /// reached. Otherwise sampling will happen until roots of the <revs> set are
44 /// reached.
45 ///
46 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
47 /// represented by `parentfn`
48 /// - `heads`: set of DAG head revs
49 /// - `sample`: a sample to update
50 /// - `parentfn`: a callable to resolve parents for a revision
51 /// - `quicksamplesize`: optional target size of the sample
52 fn update_sample(
53 revs: Option<&HashSet<Revision>>,
54 heads: impl IntoIterator<Item = Revision>,
55 sample: &mut HashSet<Revision>,
56 parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>,
57 quicksamplesize: Option<usize>,
58 ) -> Result<(), GraphError> {
59 let mut distances: HashMap<Revision, u32> = HashMap::new();
60 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
61 let mut factor: u32 = 1;
62 let mut seen: HashSet<Revision> = HashSet::new();
63 loop {
64 let current = match visit.pop_front() {
65 None => {
66 break;
67 }
68 Some(r) => r,
69 };
70 if !seen.insert(current) {
71 continue;
72 }
73
74 let d = *distances.entry(current).or_insert(1);
75 if d > factor {
76 factor *= 2;
77 }
78 if d == factor {
79 sample.insert(current);
80 if let Some(sz) = quicksamplesize {
81 if sample.len() >= sz {
82 return Ok(());
83 }
84 }
85 }
86 for &p in &parentsfn(current)? {
87 if p == NULL_REVISION {
88 continue;
89 }
90 if let Some(revs) = revs {
91 if !revs.contains(&p) {
92 continue;
93 }
94 }
95 distances.entry(p).or_insert(d + 1);
96 visit.push_back(p);
97 }
98 }
99 Ok(())
100 }
101
30 impl<G: Graph + Clone> PartialDiscovery<G> {
102 impl<G: Graph + Clone> PartialDiscovery<G> {
31 /// Create a PartialDiscovery object, with the intent
103 /// Create a PartialDiscovery object, with the intent
32 /// of comparing our `::<target_heads>` revset to the contents of another
104 /// of comparing our `::<target_heads>` revset to the contents of another
@@ -39,15 +111,47 impl<G: Graph + Clone> PartialDiscovery<
39 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
111 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
40 /// object since we'll keep it in the meanwhile
112 /// object since we'll keep it in the meanwhile
41 pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
113 pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
114 let mut seed: [u8; 16] = [0; 16];
115 thread_rng().fill_bytes(&mut seed);
116 Self::new_with_seed(graph, target_heads, seed)
117 }
118
119 pub fn new_with_seed(
120 graph: G,
121 target_heads: Vec<Revision>,
122 seed: [u8; 16],
123 ) -> Self {
42 PartialDiscovery {
124 PartialDiscovery {
43 undecided: None,
125 undecided: None,
44 target_heads: Some(target_heads),
126 target_heads: Some(target_heads),
45 graph: graph.clone(),
127 graph: graph.clone(),
46 common: MissingAncestors::new(graph, vec![]),
128 common: MissingAncestors::new(graph, vec![]),
47 missing: HashSet::new(),
129 missing: HashSet::new(),
130 rng: Rng::from_seed(seed),
48 }
131 }
49 }
132 }
50
133
134 /// Extract at most `size` random elements from sample and return them
135 /// as a vector
136 fn limit_sample(
137 &mut self,
138 mut sample: Vec<Revision>,
139 size: usize,
140 ) -> Vec<Revision> {
141 let sample_len = sample.len();
142 if sample_len <= size {
143 return sample;
144 }
145 let rng = &mut self.rng;
146 let dropped_size = sample_len - size;
147 let limited_slice = if size < dropped_size {
148 sample.partial_shuffle(rng, size).0
149 } else {
150 sample.partial_shuffle(rng, dropped_size).1
151 };
152 limited_slice.to_owned()
153 }
154
51 /// Register revisions known as being common
155 /// Register revisions known as being common
52 pub fn add_common_revisions(
156 pub fn add_common_revisions(
53 &mut self,
157 &mut self,
@@ -130,6 +234,32 impl<G: Graph + Clone> PartialDiscovery<
130 undecided: self.undecided.as_ref().map(|s| s.len()),
234 undecided: self.undecided.as_ref().map(|s| s.len()),
131 }
235 }
132 }
236 }
237
238 pub fn take_quick_sample(
239 &mut self,
240 headrevs: impl IntoIterator<Item = Revision>,
241 size: usize,
242 ) -> Result<Vec<Revision>, GraphError> {
243 self.ensure_undecided()?;
244 let mut sample = {
245 let undecided = self.undecided.as_ref().unwrap();
246 if undecided.len() <= size {
247 return Ok(undecided.iter().cloned().collect());
248 }
249 dagops::heads(&self.graph, undecided.iter())?
250 };
251 if sample.len() >= size {
252 return Ok(self.limit_sample(sample.into_iter().collect(), size));
253 }
254 update_sample(
255 None,
256 headrevs,
257 &mut sample,
258 |r| self.graph.parents(r),
259 Some(size),
260 )?;
261 Ok(sample.into_iter().collect())
262 }
133 }
263 }
134
264
135 #[cfg(test)]
265 #[cfg(test)]
@@ -138,8 +268,21 mod tests {
138 use crate::testing::SampleGraph;
268 use crate::testing::SampleGraph;
139
269
140 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
270 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
271 ///
272 /// To avoid actual randomness in tests, we give it a fixed random seed.
141 fn full_disco() -> PartialDiscovery<SampleGraph> {
273 fn full_disco() -> PartialDiscovery<SampleGraph> {
142 PartialDiscovery::new(SampleGraph, vec![10, 11, 12, 13])
274 PartialDiscovery::new_with_seed(
275 SampleGraph,
276 vec![10, 11, 12, 13],
277 [0; 16],
278 )
279 }
280
281 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
282 ///
283 /// To avoid actual randomness in tests, we give it a fixed random seed.
284 fn disco12() -> PartialDiscovery<SampleGraph> {
285 PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16])
143 }
286 }
144
287
145 fn sorted_undecided(
288 fn sorted_undecided(
@@ -206,4 +349,45 mod tests {
206 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
349 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
207 Ok(())
350 Ok(())
208 }
351 }
352
353 #[test]
354 fn test_limit_sample_no_need_to() {
355 let sample = vec![1, 2, 3, 4];
356 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
209 }
357 }
358
359 #[test]
360 fn test_limit_sample_less_than_half() {
361 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
362 }
363
364 #[test]
365 fn test_limit_sample_more_than_half() {
366 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
367 }
368
369 #[test]
370 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
371 let mut disco = full_disco();
372 disco.undecided = Some((1..=13).collect());
373
374 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
375 sample_vec.sort();
376 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
377 Ok(())
378 }
379
380 #[test]
381 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
382 let mut disco = disco12();
383 disco.ensure_undecided()?;
384
385 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
386 sample_vec.sort();
387 // r12's only parent is r9, whose unique grand-parent through the
388 // diamond shape is r4. This ends there because the distance from r4
389 // to the root is only 3.
390 assert_eq!(sample_vec, vec![4, 9, 12]);
391 Ok(())
392 }
393 }
General Comments 0
You need to be logged in to leave comments. Login now