##// END OF EJS Templates
rust-discovery: core implementation for take_quick_sample()...
Georges Racinet -
r42965:388622cb default
parent child Browse files
Show More
@@ -1,19 +1,17
1 [package]
1 [package]
2 name = "hg-core"
2 name = "hg-core"
3 version = "0.1.0"
3 version = "0.1.0"
4 authors = ["Georges Racinet <gracinet@anybox.fr>"]
4 authors = ["Georges Racinet <gracinet@anybox.fr>"]
5 description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
5 description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
6 edition = "2018"
6 edition = "2018"
7
7
8 [lib]
8 [lib]
9 name = "hg"
9 name = "hg"
10
10
11 [dev-dependencies]
12 rand = "*"
13 rand_pcg = "*"
14
15 [dependencies]
11 [dependencies]
16 byteorder = "1.3.1"
12 byteorder = "1.3.1"
17 lazy_static = "1.3.0"
13 lazy_static = "1.3.0"
18 memchr = "2.2.0"
14 memchr = "2.2.0"
15 rand = "> 0.6.4"
16 rand_pcg = "> 0.1.0"
19 regex = "^1.1"
17 regex = "^1.1"
@@ -1,209 +1,393
1 // discovery.rs
1 // discovery.rs
2 //
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Discovery operations
8 //! Discovery operations
9 //!
9 //!
10 //! This is a Rust counterpart to the `partialdiscovery` class of
10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 //! `mercurial.setdiscovery`
11 //! `mercurial.setdiscovery`
12
12
13 use super::{Graph, GraphError, Revision};
13 extern crate rand;
14 extern crate rand_pcg;
15 use self::rand::seq::SliceRandom;
16 use self::rand::{thread_rng, RngCore, SeedableRng};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 use crate::ancestors::MissingAncestors;
18 use crate::ancestors::MissingAncestors;
15 use crate::dagops;
19 use crate::dagops;
16 use std::collections::HashSet;
20 use std::collections::{HashMap, HashSet, VecDeque};
21
22 type Rng = self::rand_pcg::Pcg32;
17
23
18 pub struct PartialDiscovery<G: Graph + Clone> {
24 pub struct PartialDiscovery<G: Graph + Clone> {
19 target_heads: Option<Vec<Revision>>,
25 target_heads: Option<Vec<Revision>>,
20 graph: G, // plays the role of self._repo
26 graph: G, // plays the role of self._repo
21 common: MissingAncestors<G>,
27 common: MissingAncestors<G>,
22 undecided: Option<HashSet<Revision>>,
28 undecided: Option<HashSet<Revision>>,
23 missing: HashSet<Revision>,
29 missing: HashSet<Revision>,
30 rng: Rng,
24 }
31 }
25
32
26 pub struct DiscoveryStats {
33 pub struct DiscoveryStats {
27 pub undecided: Option<usize>,
34 pub undecided: Option<usize>,
28 }
35 }
29
36
37 /// Update an existing sample to match the expected size
38 ///
39 /// The sample is updated with revisions exponentially distant from each
40 /// element of `heads`.
41 ///
42 /// If a target size is specified, the sampling will stop once this size is
43 /// reached. Otherwise sampling will happen until roots of the <revs> set are
44 /// reached.
45 ///
46 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
47 /// represented by `parentfn`
48 /// - `heads`: set of DAG head revs
49 /// - `sample`: a sample to update
50 /// - `parentfn`: a callable to resolve parents for a revision
51 /// - `quicksamplesize`: optional target size of the sample
52 fn update_sample(
53 revs: Option<&HashSet<Revision>>,
54 heads: impl IntoIterator<Item = Revision>,
55 sample: &mut HashSet<Revision>,
56 parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>,
57 quicksamplesize: Option<usize>,
58 ) -> Result<(), GraphError> {
59 let mut distances: HashMap<Revision, u32> = HashMap::new();
60 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
61 let mut factor: u32 = 1;
62 let mut seen: HashSet<Revision> = HashSet::new();
63 loop {
64 let current = match visit.pop_front() {
65 None => {
66 break;
67 }
68 Some(r) => r,
69 };
70 if !seen.insert(current) {
71 continue;
72 }
73
74 let d = *distances.entry(current).or_insert(1);
75 if d > factor {
76 factor *= 2;
77 }
78 if d == factor {
79 sample.insert(current);
80 if let Some(sz) = quicksamplesize {
81 if sample.len() >= sz {
82 return Ok(());
83 }
84 }
85 }
86 for &p in &parentsfn(current)? {
87 if p == NULL_REVISION {
88 continue;
89 }
90 if let Some(revs) = revs {
91 if !revs.contains(&p) {
92 continue;
93 }
94 }
95 distances.entry(p).or_insert(d + 1);
96 visit.push_back(p);
97 }
98 }
99 Ok(())
100 }
101
30 impl<G: Graph + Clone> PartialDiscovery<G> {
102 impl<G: Graph + Clone> PartialDiscovery<G> {
31 /// Create a PartialDiscovery object, with the intent
103 /// Create a PartialDiscovery object, with the intent
32 /// of comparing our `::<target_heads>` revset to the contents of another
104 /// of comparing our `::<target_heads>` revset to the contents of another
33 /// repo.
105 /// repo.
34 ///
106 ///
35 /// For now `target_heads` is passed as a vector, and will be used
107 /// For now `target_heads` is passed as a vector, and will be used
36 /// at the first call to `ensure_undecided()`.
108 /// at the first call to `ensure_undecided()`.
37 ///
109 ///
38 /// If we want to make the signature more flexible,
110 /// If we want to make the signature more flexible,
39 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
111 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
40 /// object since we'll keep it in the meanwhile
112 /// object since we'll keep it in the meanwhile
41 pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
113 pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
114 let mut seed: [u8; 16] = [0; 16];
115 thread_rng().fill_bytes(&mut seed);
116 Self::new_with_seed(graph, target_heads, seed)
117 }
118
119 pub fn new_with_seed(
120 graph: G,
121 target_heads: Vec<Revision>,
122 seed: [u8; 16],
123 ) -> Self {
42 PartialDiscovery {
124 PartialDiscovery {
43 undecided: None,
125 undecided: None,
44 target_heads: Some(target_heads),
126 target_heads: Some(target_heads),
45 graph: graph.clone(),
127 graph: graph.clone(),
46 common: MissingAncestors::new(graph, vec![]),
128 common: MissingAncestors::new(graph, vec![]),
47 missing: HashSet::new(),
129 missing: HashSet::new(),
130 rng: Rng::from_seed(seed),
48 }
131 }
49 }
132 }
50
133
134 /// Extract at most `size` random elements from sample and return them
135 /// as a vector
136 fn limit_sample(
137 &mut self,
138 mut sample: Vec<Revision>,
139 size: usize,
140 ) -> Vec<Revision> {
141 let sample_len = sample.len();
142 if sample_len <= size {
143 return sample;
144 }
145 let rng = &mut self.rng;
146 let dropped_size = sample_len - size;
147 let limited_slice = if size < dropped_size {
148 sample.partial_shuffle(rng, size).0
149 } else {
150 sample.partial_shuffle(rng, dropped_size).1
151 };
152 limited_slice.to_owned()
153 }
154
51 /// Register revisions known as being common
155 /// Register revisions known as being common
52 pub fn add_common_revisions(
156 pub fn add_common_revisions(
53 &mut self,
157 &mut self,
54 common: impl IntoIterator<Item = Revision>,
158 common: impl IntoIterator<Item = Revision>,
55 ) -> Result<(), GraphError> {
159 ) -> Result<(), GraphError> {
56 self.common.add_bases(common);
160 self.common.add_bases(common);
57 if let Some(ref mut undecided) = self.undecided {
161 if let Some(ref mut undecided) = self.undecided {
58 self.common.remove_ancestors_from(undecided)?;
162 self.common.remove_ancestors_from(undecided)?;
59 }
163 }
60 Ok(())
164 Ok(())
61 }
165 }
62
166
63 /// Register revisions known as being missing
167 /// Register revisions known as being missing
64 pub fn add_missing_revisions(
168 pub fn add_missing_revisions(
65 &mut self,
169 &mut self,
66 missing: impl IntoIterator<Item = Revision>,
170 missing: impl IntoIterator<Item = Revision>,
67 ) -> Result<(), GraphError> {
171 ) -> Result<(), GraphError> {
68 self.ensure_undecided()?;
172 self.ensure_undecided()?;
69 let range = dagops::range(
173 let range = dagops::range(
70 &self.graph,
174 &self.graph,
71 missing,
175 missing,
72 self.undecided.as_ref().unwrap().iter().cloned(),
176 self.undecided.as_ref().unwrap().iter().cloned(),
73 )?;
177 )?;
74 let undecided_mut = self.undecided.as_mut().unwrap();
178 let undecided_mut = self.undecided.as_mut().unwrap();
75 for missrev in range {
179 for missrev in range {
76 self.missing.insert(missrev);
180 self.missing.insert(missrev);
77 undecided_mut.remove(&missrev);
181 undecided_mut.remove(&missrev);
78 }
182 }
79 Ok(())
183 Ok(())
80 }
184 }
81
185
82 /// Do we have any information about the peer?
186 /// Do we have any information about the peer?
83 pub fn has_info(&self) -> bool {
187 pub fn has_info(&self) -> bool {
84 self.common.has_bases()
188 self.common.has_bases()
85 }
189 }
86
190
87 /// Did we acquire full knowledge of our Revisions that the peer has?
191 /// Did we acquire full knowledge of our Revisions that the peer has?
88 pub fn is_complete(&self) -> bool {
192 pub fn is_complete(&self) -> bool {
89 self.undecided.as_ref().map_or(false, |s| s.is_empty())
193 self.undecided.as_ref().map_or(false, |s| s.is_empty())
90 }
194 }
91
195
92 /// Return the heads of the currently known common set of revisions.
196 /// Return the heads of the currently known common set of revisions.
93 ///
197 ///
94 /// If the discovery process is not complete (see `is_complete()`), the
198 /// If the discovery process is not complete (see `is_complete()`), the
95 /// caller must be aware that this is an intermediate state.
199 /// caller must be aware that this is an intermediate state.
96 ///
200 ///
97 /// On the other hand, if it is complete, then this is currently
201 /// On the other hand, if it is complete, then this is currently
98 /// the only way to retrieve the end results of the discovery process.
202 /// the only way to retrieve the end results of the discovery process.
99 ///
203 ///
100 /// We may introduce in the future an `into_common_heads` call that
204 /// We may introduce in the future an `into_common_heads` call that
101 /// would be more appropriate for normal Rust callers, dropping `self`
205 /// would be more appropriate for normal Rust callers, dropping `self`
102 /// if it is complete.
206 /// if it is complete.
103 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
207 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
104 self.common.bases_heads()
208 self.common.bases_heads()
105 }
209 }
106
210
107 /// Force first computation of `self.undecided`
211 /// Force first computation of `self.undecided`
108 ///
212 ///
109 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
213 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
110 /// unwrapped to get workable immutable or mutable references without
214 /// unwrapped to get workable immutable or mutable references without
111 /// any panic.
215 /// any panic.
112 ///
216 ///
113 /// This is an imperative call instead of an access with added lazyness
217 /// This is an imperative call instead of an access with added lazyness
114 /// to reduce easily the scope of mutable borrow for the caller,
218 /// to reduce easily the scope of mutable borrow for the caller,
115 /// compared to undecided(&'a mut self) -> &'a… that would keep it
219 /// compared to undecided(&'a mut self) -> &'a… that would keep it
116 /// as long as the resulting immutable one.
220 /// as long as the resulting immutable one.
117 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
221 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
118 if self.undecided.is_some() {
222 if self.undecided.is_some() {
119 return Ok(());
223 return Ok(());
120 }
224 }
121 let tgt = self.target_heads.take().unwrap();
225 let tgt = self.target_heads.take().unwrap();
122 self.undecided =
226 self.undecided =
123 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
227 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
124 Ok(())
228 Ok(())
125 }
229 }
126
230
127 /// Provide statistics about the current state of the discovery process
231 /// Provide statistics about the current state of the discovery process
128 pub fn stats(&self) -> DiscoveryStats {
232 pub fn stats(&self) -> DiscoveryStats {
129 DiscoveryStats {
233 DiscoveryStats {
130 undecided: self.undecided.as_ref().map(|s| s.len()),
234 undecided: self.undecided.as_ref().map(|s| s.len()),
131 }
235 }
132 }
236 }
237
238 pub fn take_quick_sample(
239 &mut self,
240 headrevs: impl IntoIterator<Item = Revision>,
241 size: usize,
242 ) -> Result<Vec<Revision>, GraphError> {
243 self.ensure_undecided()?;
244 let mut sample = {
245 let undecided = self.undecided.as_ref().unwrap();
246 if undecided.len() <= size {
247 return Ok(undecided.iter().cloned().collect());
248 }
249 dagops::heads(&self.graph, undecided.iter())?
250 };
251 if sample.len() >= size {
252 return Ok(self.limit_sample(sample.into_iter().collect(), size));
253 }
254 update_sample(
255 None,
256 headrevs,
257 &mut sample,
258 |r| self.graph.parents(r),
259 Some(size),
260 )?;
261 Ok(sample.into_iter().collect())
262 }
133 }
263 }
134
264
135 #[cfg(test)]
265 #[cfg(test)]
136 mod tests {
266 mod tests {
137 use super::*;
267 use super::*;
138 use crate::testing::SampleGraph;
268 use crate::testing::SampleGraph;
139
269
140 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
270 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
271 ///
272 /// To avoid actual randomness in tests, we give it a fixed random seed.
141 fn full_disco() -> PartialDiscovery<SampleGraph> {
273 fn full_disco() -> PartialDiscovery<SampleGraph> {
142 PartialDiscovery::new(SampleGraph, vec![10, 11, 12, 13])
274 PartialDiscovery::new_with_seed(
275 SampleGraph,
276 vec![10, 11, 12, 13],
277 [0; 16],
278 )
279 }
280
281 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
282 ///
283 /// To avoid actual randomness in tests, we give it a fixed random seed.
284 fn disco12() -> PartialDiscovery<SampleGraph> {
285 PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16])
143 }
286 }
144
287
145 fn sorted_undecided(
288 fn sorted_undecided(
146 disco: &PartialDiscovery<SampleGraph>,
289 disco: &PartialDiscovery<SampleGraph>,
147 ) -> Vec<Revision> {
290 ) -> Vec<Revision> {
148 let mut as_vec: Vec<Revision> =
291 let mut as_vec: Vec<Revision> =
149 disco.undecided.as_ref().unwrap().iter().cloned().collect();
292 disco.undecided.as_ref().unwrap().iter().cloned().collect();
150 as_vec.sort();
293 as_vec.sort();
151 as_vec
294 as_vec
152 }
295 }
153
296
154 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
297 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
155 let mut as_vec: Vec<Revision> =
298 let mut as_vec: Vec<Revision> =
156 disco.missing.iter().cloned().collect();
299 disco.missing.iter().cloned().collect();
157 as_vec.sort();
300 as_vec.sort();
158 as_vec
301 as_vec
159 }
302 }
160
303
161 fn sorted_common_heads(
304 fn sorted_common_heads(
162 disco: &PartialDiscovery<SampleGraph>,
305 disco: &PartialDiscovery<SampleGraph>,
163 ) -> Result<Vec<Revision>, GraphError> {
306 ) -> Result<Vec<Revision>, GraphError> {
164 let mut as_vec: Vec<Revision> =
307 let mut as_vec: Vec<Revision> =
165 disco.common_heads()?.iter().cloned().collect();
308 disco.common_heads()?.iter().cloned().collect();
166 as_vec.sort();
309 as_vec.sort();
167 Ok(as_vec)
310 Ok(as_vec)
168 }
311 }
169
312
170 #[test]
313 #[test]
171 fn test_add_common_get_undecided() -> Result<(), GraphError> {
314 fn test_add_common_get_undecided() -> Result<(), GraphError> {
172 let mut disco = full_disco();
315 let mut disco = full_disco();
173 assert_eq!(disco.undecided, None);
316 assert_eq!(disco.undecided, None);
174 assert!(!disco.has_info());
317 assert!(!disco.has_info());
175 assert_eq!(disco.stats().undecided, None);
318 assert_eq!(disco.stats().undecided, None);
176
319
177 disco.add_common_revisions(vec![11, 12])?;
320 disco.add_common_revisions(vec![11, 12])?;
178 assert!(disco.has_info());
321 assert!(disco.has_info());
179 assert!(!disco.is_complete());
322 assert!(!disco.is_complete());
180 assert!(disco.missing.is_empty());
323 assert!(disco.missing.is_empty());
181
324
182 // add_common_revisions did not trigger a premature computation
325 // add_common_revisions did not trigger a premature computation
183 // of `undecided`, let's check that and ask for them
326 // of `undecided`, let's check that and ask for them
184 assert_eq!(disco.undecided, None);
327 assert_eq!(disco.undecided, None);
185 disco.ensure_undecided()?;
328 disco.ensure_undecided()?;
186 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
329 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
187 assert_eq!(disco.stats().undecided, Some(4));
330 assert_eq!(disco.stats().undecided, Some(4));
188 Ok(())
331 Ok(())
189 }
332 }
190
333
191 /// in this test, we pretend that our peer misses exactly (8+10)::
334 /// in this test, we pretend that our peer misses exactly (8+10)::
192 /// and we're comparing all our repo to it (as in a bare push)
335 /// and we're comparing all our repo to it (as in a bare push)
193 #[test]
336 #[test]
194 fn test_discovery() -> Result<(), GraphError> {
337 fn test_discovery() -> Result<(), GraphError> {
195 let mut disco = full_disco();
338 let mut disco = full_disco();
196 disco.add_common_revisions(vec![11, 12])?;
339 disco.add_common_revisions(vec![11, 12])?;
197 disco.add_missing_revisions(vec![8, 10])?;
340 disco.add_missing_revisions(vec![8, 10])?;
198 assert_eq!(sorted_undecided(&disco), vec![5]);
341 assert_eq!(sorted_undecided(&disco), vec![5]);
199 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
342 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
200 assert!(!disco.is_complete());
343 assert!(!disco.is_complete());
201
344
202 disco.add_common_revisions(vec![5])?;
345 disco.add_common_revisions(vec![5])?;
203 assert_eq!(sorted_undecided(&disco), vec![]);
346 assert_eq!(sorted_undecided(&disco), vec![]);
204 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
347 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
205 assert!(disco.is_complete());
348 assert!(disco.is_complete());
206 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
349 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
207 Ok(())
350 Ok(())
208 }
351 }
352
353 #[test]
354 fn test_limit_sample_no_need_to() {
355 let sample = vec![1, 2, 3, 4];
356 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
209 }
357 }
358
359 #[test]
360 fn test_limit_sample_less_than_half() {
361 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
362 }
363
364 #[test]
365 fn test_limit_sample_more_than_half() {
366 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
367 }
368
369 #[test]
370 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
371 let mut disco = full_disco();
372 disco.undecided = Some((1..=13).collect());
373
374 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
375 sample_vec.sort();
376 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
377 Ok(())
378 }
379
380 #[test]
381 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
382 let mut disco = disco12();
383 disco.ensure_undecided()?;
384
385 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
386 sample_vec.sort();
387 // r12's only parent is r9, whose unique grand-parent through the
388 // diamond shape is r4. This ends there because the distance from r4
389 // to the root is only 3.
390 assert_eq!(sample_vec, vec![4, 9, 12]);
391 Ok(())
392 }
393 }
General Comments 0
You need to be logged in to leave comments. Login now