##// END OF EJS Templates
rust-discovery: core implementation for take_quick_sample()...
Georges Racinet -
r42965:388622cb default
parent child Browse files
Show More
@@ -1,19 +1,17
1 1 [package]
2 2 name = "hg-core"
3 3 version = "0.1.0"
4 4 authors = ["Georges Racinet <gracinet@anybox.fr>"]
5 5 description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
6 6 edition = "2018"
7 7
8 8 [lib]
9 9 name = "hg"
10 10
11 [dev-dependencies]
12 rand = "*"
13 rand_pcg = "*"
14
15 11 [dependencies]
16 12 byteorder = "1.3.1"
17 13 lazy_static = "1.3.0"
18 14 memchr = "2.2.0"
15 rand = "> 0.6.4"
16 rand_pcg = "> 0.1.0"
19 17 regex = "^1.1"
@@ -1,209 +1,393
1 1 // discovery.rs
2 2 //
3 3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Discovery operations
9 9 //!
10 10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 11 //! `mercurial.setdiscovery`
12 12
13 use super::{Graph, GraphError, Revision};
13 extern crate rand;
14 extern crate rand_pcg;
15 use self::rand::seq::SliceRandom;
16 use self::rand::{thread_rng, RngCore, SeedableRng};
17 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 18 use crate::ancestors::MissingAncestors;
15 19 use crate::dagops;
16 use std::collections::HashSet;
20 use std::collections::{HashMap, HashSet, VecDeque};
21
22 type Rng = self::rand_pcg::Pcg32;
17 23
18 24 pub struct PartialDiscovery<G: Graph + Clone> {
19 25 target_heads: Option<Vec<Revision>>,
20 26 graph: G, // plays the role of self._repo
21 27 common: MissingAncestors<G>,
22 28 undecided: Option<HashSet<Revision>>,
23 29 missing: HashSet<Revision>,
30 rng: Rng,
24 31 }
25 32
26 33 pub struct DiscoveryStats {
27 34 pub undecided: Option<usize>,
28 35 }
29 36
37 /// Update an existing sample to match the expected size
38 ///
39 /// The sample is updated with revisions exponentially distant from each
40 /// element of `heads`.
41 ///
42 /// If a target size is specified, the sampling will stop once this size is
43 /// reached. Otherwise sampling will happen until roots of the <revs> set are
44 /// reached.
45 ///
46 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
47 /// represented by `parentfn`
48 /// - `heads`: set of DAG head revs
49 /// - `sample`: a sample to update
50 /// - `parentfn`: a callable to resolve parents for a revision
51 /// - `quicksamplesize`: optional target size of the sample
52 fn update_sample(
53 revs: Option<&HashSet<Revision>>,
54 heads: impl IntoIterator<Item = Revision>,
55 sample: &mut HashSet<Revision>,
56 parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>,
57 quicksamplesize: Option<usize>,
58 ) -> Result<(), GraphError> {
59 let mut distances: HashMap<Revision, u32> = HashMap::new();
60 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
61 let mut factor: u32 = 1;
62 let mut seen: HashSet<Revision> = HashSet::new();
63 loop {
64 let current = match visit.pop_front() {
65 None => {
66 break;
67 }
68 Some(r) => r,
69 };
70 if !seen.insert(current) {
71 continue;
72 }
73
74 let d = *distances.entry(current).or_insert(1);
75 if d > factor {
76 factor *= 2;
77 }
78 if d == factor {
79 sample.insert(current);
80 if let Some(sz) = quicksamplesize {
81 if sample.len() >= sz {
82 return Ok(());
83 }
84 }
85 }
86 for &p in &parentsfn(current)? {
87 if p == NULL_REVISION {
88 continue;
89 }
90 if let Some(revs) = revs {
91 if !revs.contains(&p) {
92 continue;
93 }
94 }
95 distances.entry(p).or_insert(d + 1);
96 visit.push_back(p);
97 }
98 }
99 Ok(())
100 }
101
30 102 impl<G: Graph + Clone> PartialDiscovery<G> {
31 103 /// Create a PartialDiscovery object, with the intent
32 104 /// of comparing our `::<target_heads>` revset to the contents of another
33 105 /// repo.
34 106 ///
35 107 /// For now `target_heads` is passed as a vector, and will be used
36 108 /// at the first call to `ensure_undecided()`.
37 109 ///
38 110 /// If we want to make the signature more flexible,
39 111 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
40 112 /// object since we'll keep it in the meanwhile
41 113 pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
114 let mut seed: [u8; 16] = [0; 16];
115 thread_rng().fill_bytes(&mut seed);
116 Self::new_with_seed(graph, target_heads, seed)
117 }
118
119 pub fn new_with_seed(
120 graph: G,
121 target_heads: Vec<Revision>,
122 seed: [u8; 16],
123 ) -> Self {
42 124 PartialDiscovery {
43 125 undecided: None,
44 126 target_heads: Some(target_heads),
45 127 graph: graph.clone(),
46 128 common: MissingAncestors::new(graph, vec![]),
47 129 missing: HashSet::new(),
130 rng: Rng::from_seed(seed),
48 131 }
49 132 }
50 133
134 /// Extract at most `size` random elements from sample and return them
135 /// as a vector
136 fn limit_sample(
137 &mut self,
138 mut sample: Vec<Revision>,
139 size: usize,
140 ) -> Vec<Revision> {
141 let sample_len = sample.len();
142 if sample_len <= size {
143 return sample;
144 }
145 let rng = &mut self.rng;
146 let dropped_size = sample_len - size;
147 let limited_slice = if size < dropped_size {
148 sample.partial_shuffle(rng, size).0
149 } else {
150 sample.partial_shuffle(rng, dropped_size).1
151 };
152 limited_slice.to_owned()
153 }
154
51 155 /// Register revisions known as being common
52 156 pub fn add_common_revisions(
53 157 &mut self,
54 158 common: impl IntoIterator<Item = Revision>,
55 159 ) -> Result<(), GraphError> {
56 160 self.common.add_bases(common);
57 161 if let Some(ref mut undecided) = self.undecided {
58 162 self.common.remove_ancestors_from(undecided)?;
59 163 }
60 164 Ok(())
61 165 }
62 166
63 167 /// Register revisions known as being missing
64 168 pub fn add_missing_revisions(
65 169 &mut self,
66 170 missing: impl IntoIterator<Item = Revision>,
67 171 ) -> Result<(), GraphError> {
68 172 self.ensure_undecided()?;
69 173 let range = dagops::range(
70 174 &self.graph,
71 175 missing,
72 176 self.undecided.as_ref().unwrap().iter().cloned(),
73 177 )?;
74 178 let undecided_mut = self.undecided.as_mut().unwrap();
75 179 for missrev in range {
76 180 self.missing.insert(missrev);
77 181 undecided_mut.remove(&missrev);
78 182 }
79 183 Ok(())
80 184 }
81 185
82 186 /// Do we have any information about the peer?
83 187 pub fn has_info(&self) -> bool {
84 188 self.common.has_bases()
85 189 }
86 190
87 191 /// Did we acquire full knowledge of our Revisions that the peer has?
88 192 pub fn is_complete(&self) -> bool {
89 193 self.undecided.as_ref().map_or(false, |s| s.is_empty())
90 194 }
91 195
92 196 /// Return the heads of the currently known common set of revisions.
93 197 ///
94 198 /// If the discovery process is not complete (see `is_complete()`), the
95 199 /// caller must be aware that this is an intermediate state.
96 200 ///
97 201 /// On the other hand, if it is complete, then this is currently
98 202 /// the only way to retrieve the end results of the discovery process.
99 203 ///
100 204 /// We may introduce in the future an `into_common_heads` call that
101 205 /// would be more appropriate for normal Rust callers, dropping `self`
102 206 /// if it is complete.
103 207 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
104 208 self.common.bases_heads()
105 209 }
106 210
107 211 /// Force first computation of `self.undecided`
108 212 ///
109 213 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
110 214 /// unwrapped to get workable immutable or mutable references without
111 215 /// any panic.
112 216 ///
113 217 /// This is an imperative call instead of an access with added lazyness
114 218 /// to reduce easily the scope of mutable borrow for the caller,
115 219 /// compared to undecided(&'a mut self) -> &'a… that would keep it
116 220 /// as long as the resulting immutable one.
117 221 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
118 222 if self.undecided.is_some() {
119 223 return Ok(());
120 224 }
121 225 let tgt = self.target_heads.take().unwrap();
122 226 self.undecided =
123 227 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
124 228 Ok(())
125 229 }
126 230
127 231 /// Provide statistics about the current state of the discovery process
128 232 pub fn stats(&self) -> DiscoveryStats {
129 233 DiscoveryStats {
130 234 undecided: self.undecided.as_ref().map(|s| s.len()),
131 235 }
132 236 }
237
238 pub fn take_quick_sample(
239 &mut self,
240 headrevs: impl IntoIterator<Item = Revision>,
241 size: usize,
242 ) -> Result<Vec<Revision>, GraphError> {
243 self.ensure_undecided()?;
244 let mut sample = {
245 let undecided = self.undecided.as_ref().unwrap();
246 if undecided.len() <= size {
247 return Ok(undecided.iter().cloned().collect());
248 }
249 dagops::heads(&self.graph, undecided.iter())?
250 };
251 if sample.len() >= size {
252 return Ok(self.limit_sample(sample.into_iter().collect(), size));
253 }
254 update_sample(
255 None,
256 headrevs,
257 &mut sample,
258 |r| self.graph.parents(r),
259 Some(size),
260 )?;
261 Ok(sample.into_iter().collect())
262 }
133 263 }
134 264
135 265 #[cfg(test)]
136 266 mod tests {
137 267 use super::*;
138 268 use crate::testing::SampleGraph;
139 269
140 270 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
271 ///
272 /// To avoid actual randomness in tests, we give it a fixed random seed.
141 273 fn full_disco() -> PartialDiscovery<SampleGraph> {
142 PartialDiscovery::new(SampleGraph, vec![10, 11, 12, 13])
274 PartialDiscovery::new_with_seed(
275 SampleGraph,
276 vec![10, 11, 12, 13],
277 [0; 16],
278 )
279 }
280
281 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
282 ///
283 /// To avoid actual randomness in tests, we give it a fixed random seed.
284 fn disco12() -> PartialDiscovery<SampleGraph> {
285 PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16])
143 286 }
144 287
145 288 fn sorted_undecided(
146 289 disco: &PartialDiscovery<SampleGraph>,
147 290 ) -> Vec<Revision> {
148 291 let mut as_vec: Vec<Revision> =
149 292 disco.undecided.as_ref().unwrap().iter().cloned().collect();
150 293 as_vec.sort();
151 294 as_vec
152 295 }
153 296
154 297 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
155 298 let mut as_vec: Vec<Revision> =
156 299 disco.missing.iter().cloned().collect();
157 300 as_vec.sort();
158 301 as_vec
159 302 }
160 303
161 304 fn sorted_common_heads(
162 305 disco: &PartialDiscovery<SampleGraph>,
163 306 ) -> Result<Vec<Revision>, GraphError> {
164 307 let mut as_vec: Vec<Revision> =
165 308 disco.common_heads()?.iter().cloned().collect();
166 309 as_vec.sort();
167 310 Ok(as_vec)
168 311 }
169 312
170 313 #[test]
171 314 fn test_add_common_get_undecided() -> Result<(), GraphError> {
172 315 let mut disco = full_disco();
173 316 assert_eq!(disco.undecided, None);
174 317 assert!(!disco.has_info());
175 318 assert_eq!(disco.stats().undecided, None);
176 319
177 320 disco.add_common_revisions(vec![11, 12])?;
178 321 assert!(disco.has_info());
179 322 assert!(!disco.is_complete());
180 323 assert!(disco.missing.is_empty());
181 324
182 325 // add_common_revisions did not trigger a premature computation
183 326 // of `undecided`, let's check that and ask for them
184 327 assert_eq!(disco.undecided, None);
185 328 disco.ensure_undecided()?;
186 329 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
187 330 assert_eq!(disco.stats().undecided, Some(4));
188 331 Ok(())
189 332 }
190 333
191 334 /// in this test, we pretend that our peer misses exactly (8+10)::
192 335 /// and we're comparing all our repo to it (as in a bare push)
193 336 #[test]
194 337 fn test_discovery() -> Result<(), GraphError> {
195 338 let mut disco = full_disco();
196 339 disco.add_common_revisions(vec![11, 12])?;
197 340 disco.add_missing_revisions(vec![8, 10])?;
198 341 assert_eq!(sorted_undecided(&disco), vec![5]);
199 342 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
200 343 assert!(!disco.is_complete());
201 344
202 345 disco.add_common_revisions(vec![5])?;
203 346 assert_eq!(sorted_undecided(&disco), vec![]);
204 347 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
205 348 assert!(disco.is_complete());
206 349 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
207 350 Ok(())
208 351 }
352
353 #[test]
354 fn test_limit_sample_no_need_to() {
355 let sample = vec![1, 2, 3, 4];
356 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
357 }
358
359 #[test]
360 fn test_limit_sample_less_than_half() {
361 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]);
362 }
363
364 #[test]
365 fn test_limit_sample_more_than_half() {
366 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]);
367 }
368
369 #[test]
370 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
371 let mut disco = full_disco();
372 disco.undecided = Some((1..=13).collect());
373
374 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
375 sample_vec.sort();
376 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
377 Ok(())
378 }
379
380 #[test]
381 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
382 let mut disco = disco12();
383 disco.ensure_undecided()?;
384
385 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
386 sample_vec.sort();
387 // r12's only parent is r9, whose unique grand-parent through the
388 // diamond shape is r4. This ends there because the distance from r4
389 // to the root is only 3.
390 assert_eq!(sample_vec, vec![4, 9, 12]);
391 Ok(())
392 }
209 393 }
General Comments 0
You need to be logged in to leave comments. Login now