Show More
@@ -1,694 +1,695 | |||
|
1 | 1 | // discovery.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2019 Georges Racinet <georges.racinet@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | //! Discovery operations |
|
9 | 9 | //! |
|
10 | 10 | //! This is a Rust counterpart to the `partialdiscovery` class of |
|
11 | 11 | //! `mercurial.setdiscovery` |
|
12 | 12 | |
|
13 | 13 | use super::{Graph, GraphError, Revision, NULL_REVISION}; |
|
14 | 14 | use crate::{ancestors::MissingAncestors, dagops, FastHashMap}; |
|
15 | 15 | use rand::seq::SliceRandom; |
|
16 | 16 | use rand::{thread_rng, RngCore, SeedableRng}; |
|
17 | 17 | use std::cmp::{max, min}; |
|
18 | 18 | use std::collections::{HashSet, VecDeque}; |
|
19 | 19 | |
|
20 | 20 | type Rng = rand_pcg::Pcg32; |
|
21 | type Seed = [u8; 16]; | |
|
21 | 22 | |
|
22 | 23 | pub struct PartialDiscovery<G: Graph + Clone> { |
|
23 | 24 | target_heads: Option<Vec<Revision>>, |
|
24 | 25 | graph: G, // plays the role of self._repo |
|
25 | 26 | common: MissingAncestors<G>, |
|
26 | 27 | undecided: Option<HashSet<Revision>>, |
|
27 | 28 | children_cache: Option<FastHashMap<Revision, Vec<Revision>>>, |
|
28 | 29 | missing: HashSet<Revision>, |
|
29 | 30 | rng: Rng, |
|
30 | 31 | respect_size: bool, |
|
31 | 32 | randomize: bool, |
|
32 | 33 | } |
|
33 | 34 | |
|
34 | 35 | pub struct DiscoveryStats { |
|
35 | 36 | pub undecided: Option<usize>, |
|
36 | 37 | } |
|
37 | 38 | |
|
38 | 39 | /// Update an existing sample to match the expected size |
|
39 | 40 | /// |
|
40 | 41 | /// The sample is updated with revisions exponentially distant from each |
|
41 | 42 | /// element of `heads`. |
|
42 | 43 | /// |
|
43 | 44 | /// If a target size is specified, the sampling will stop once this size is |
|
44 | 45 | /// reached. Otherwise sampling will happen until roots of the <revs> set are |
|
45 | 46 | /// reached. |
|
46 | 47 | /// |
|
47 | 48 | /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag |
|
48 | 49 | /// represented by `parentfn` |
|
49 | 50 | /// - `heads`: set of DAG head revs |
|
50 | 51 | /// - `sample`: a sample to update |
|
51 | 52 | /// - `parentfn`: a callable to resolve parents for a revision |
|
52 | 53 | /// - `quicksamplesize`: optional target size of the sample |
|
53 | 54 | fn update_sample<I>( |
|
54 | 55 | revs: Option<&HashSet<Revision>>, |
|
55 | 56 | heads: impl IntoIterator<Item = Revision>, |
|
56 | 57 | sample: &mut HashSet<Revision>, |
|
57 | 58 | parentsfn: impl Fn(Revision) -> Result<I, GraphError>, |
|
58 | 59 | quicksamplesize: Option<usize>, |
|
59 | 60 | ) -> Result<(), GraphError> |
|
60 | 61 | where |
|
61 | 62 | I: Iterator<Item = Revision>, |
|
62 | 63 | { |
|
63 | 64 | let mut distances: FastHashMap<Revision, u32> = FastHashMap::default(); |
|
64 | 65 | let mut visit: VecDeque<Revision> = heads.into_iter().collect(); |
|
65 | 66 | let mut factor: u32 = 1; |
|
66 | 67 | let mut seen: HashSet<Revision> = HashSet::new(); |
|
67 | 68 | while let Some(current) = visit.pop_front() { |
|
68 | 69 | if !seen.insert(current) { |
|
69 | 70 | continue; |
|
70 | 71 | } |
|
71 | 72 | |
|
72 | 73 | let d = *distances.entry(current).or_insert(1); |
|
73 | 74 | if d > factor { |
|
74 | 75 | factor *= 2; |
|
75 | 76 | } |
|
76 | 77 | if d == factor { |
|
77 | 78 | sample.insert(current); |
|
78 | 79 | if let Some(sz) = quicksamplesize { |
|
79 | 80 | if sample.len() >= sz { |
|
80 | 81 | return Ok(()); |
|
81 | 82 | } |
|
82 | 83 | } |
|
83 | 84 | } |
|
84 | 85 | for p in parentsfn(current)? { |
|
85 | 86 | if let Some(revs) = revs { |
|
86 | 87 | if !revs.contains(&p) { |
|
87 | 88 | continue; |
|
88 | 89 | } |
|
89 | 90 | } |
|
90 | 91 | distances.entry(p).or_insert(d + 1); |
|
91 | 92 | visit.push_back(p); |
|
92 | 93 | } |
|
93 | 94 | } |
|
94 | 95 | Ok(()) |
|
95 | 96 | } |
|
96 | 97 | |
|
97 | 98 | struct ParentsIterator { |
|
98 | 99 | parents: [Revision; 2], |
|
99 | 100 | cur: usize, |
|
100 | 101 | } |
|
101 | 102 | |
|
102 | 103 | impl ParentsIterator { |
|
103 | 104 | fn graph_parents( |
|
104 | 105 | graph: &impl Graph, |
|
105 | 106 | r: Revision, |
|
106 | 107 | ) -> Result<ParentsIterator, GraphError> { |
|
107 | 108 | Ok(ParentsIterator { |
|
108 | 109 | parents: graph.parents(r)?, |
|
109 | 110 | cur: 0, |
|
110 | 111 | }) |
|
111 | 112 | } |
|
112 | 113 | } |
|
113 | 114 | |
|
114 | 115 | impl Iterator for ParentsIterator { |
|
115 | 116 | type Item = Revision; |
|
116 | 117 | |
|
117 | 118 | fn next(&mut self) -> Option<Revision> { |
|
118 | 119 | if self.cur > 1 { |
|
119 | 120 | return None; |
|
120 | 121 | } |
|
121 | 122 | let rev = self.parents[self.cur]; |
|
122 | 123 | self.cur += 1; |
|
123 | 124 | if rev == NULL_REVISION { |
|
124 | 125 | return self.next(); |
|
125 | 126 | } |
|
126 | 127 | Some(rev) |
|
127 | 128 | } |
|
128 | 129 | } |
|
129 | 130 | |
|
130 | 131 | impl<G: Graph + Clone> PartialDiscovery<G> { |
|
131 | 132 | /// Create a PartialDiscovery object, with the intent |
|
132 | 133 | /// of comparing our `::<target_heads>` revset to the contents of another |
|
133 | 134 | /// repo. |
|
134 | 135 | /// |
|
135 | 136 | /// For now `target_heads` is passed as a vector, and will be used |
|
136 | 137 | /// at the first call to `ensure_undecided()`. |
|
137 | 138 | /// |
|
138 | 139 | /// If we want to make the signature more flexible, |
|
139 | 140 | /// we'll have to make it a type argument of `PartialDiscovery` or a trait |
|
140 | 141 | /// object since we'll keep it in the meanwhile |
|
141 | 142 | /// |
|
142 | 143 | /// The `respect_size` boolean controls how the sampling methods |
|
143 | 144 | /// will interpret the size argument requested by the caller. If it's |
|
144 | 145 | /// `false`, they are allowed to produce a sample whose size is more |
|
145 | 146 | /// appropriate to the situation (typically bigger). |
|
146 | 147 | /// |
|
147 | 148 | /// The `randomize` boolean affects sampling, and specifically how |
|
148 | 149 | /// limiting or last-minute expanding is been done: |
|
149 | 150 | /// |
|
150 | 151 | /// If `true`, both will perform random picking from `self.undecided`. |
|
151 | 152 | /// This is currently the best for actual discoveries. |
|
152 | 153 | /// |
|
153 | 154 | /// If `false`, a reproductible picking strategy is performed. This is |
|
154 | 155 | /// useful for integration tests. |
|
155 | 156 | pub fn new( |
|
156 | 157 | graph: G, |
|
157 | 158 | target_heads: Vec<Revision>, |
|
158 | 159 | respect_size: bool, |
|
159 | 160 | randomize: bool, |
|
160 | 161 | ) -> Self { |
|
161 |
let mut seed |
|
|
162 | let mut seed = [0; 16]; | |
|
162 | 163 | if randomize { |
|
163 | 164 | thread_rng().fill_bytes(&mut seed); |
|
164 | 165 | } |
|
165 | 166 | Self::new_with_seed(graph, target_heads, seed, respect_size, randomize) |
|
166 | 167 | } |
|
167 | 168 | |
|
168 | 169 | pub fn new_with_seed( |
|
169 | 170 | graph: G, |
|
170 | 171 | target_heads: Vec<Revision>, |
|
171 |
seed: |
|
|
172 | seed: Seed, | |
|
172 | 173 | respect_size: bool, |
|
173 | 174 | randomize: bool, |
|
174 | 175 | ) -> Self { |
|
175 | 176 | PartialDiscovery { |
|
176 | 177 | undecided: None, |
|
177 | 178 | children_cache: None, |
|
178 | 179 | target_heads: Some(target_heads), |
|
179 | 180 | graph: graph.clone(), |
|
180 | 181 | common: MissingAncestors::new(graph, vec![]), |
|
181 | 182 | missing: HashSet::new(), |
|
182 | 183 | rng: Rng::from_seed(seed), |
|
183 | 184 | respect_size: respect_size, |
|
184 | 185 | randomize: randomize, |
|
185 | 186 | } |
|
186 | 187 | } |
|
187 | 188 | |
|
188 | 189 | /// Extract at most `size` random elements from sample and return them |
|
189 | 190 | /// as a vector |
|
190 | 191 | fn limit_sample( |
|
191 | 192 | &mut self, |
|
192 | 193 | mut sample: Vec<Revision>, |
|
193 | 194 | size: usize, |
|
194 | 195 | ) -> Vec<Revision> { |
|
195 | 196 | if !self.randomize { |
|
196 | 197 | sample.sort(); |
|
197 | 198 | sample.truncate(size); |
|
198 | 199 | return sample; |
|
199 | 200 | } |
|
200 | 201 | let sample_len = sample.len(); |
|
201 | 202 | if sample_len <= size { |
|
202 | 203 | return sample; |
|
203 | 204 | } |
|
204 | 205 | let rng = &mut self.rng; |
|
205 | 206 | let dropped_size = sample_len - size; |
|
206 | 207 | let limited_slice = if size < dropped_size { |
|
207 | 208 | sample.partial_shuffle(rng, size).0 |
|
208 | 209 | } else { |
|
209 | 210 | sample.partial_shuffle(rng, dropped_size).1 |
|
210 | 211 | }; |
|
211 | 212 | limited_slice.to_owned() |
|
212 | 213 | } |
|
213 | 214 | |
|
214 | 215 | /// Register revisions known as being common |
|
215 | 216 | pub fn add_common_revisions( |
|
216 | 217 | &mut self, |
|
217 | 218 | common: impl IntoIterator<Item = Revision>, |
|
218 | 219 | ) -> Result<(), GraphError> { |
|
219 | 220 | let before_len = self.common.get_bases().len(); |
|
220 | 221 | self.common.add_bases(common); |
|
221 | 222 | if self.common.get_bases().len() == before_len { |
|
222 | 223 | return Ok(()); |
|
223 | 224 | } |
|
224 | 225 | if let Some(ref mut undecided) = self.undecided { |
|
225 | 226 | self.common.remove_ancestors_from(undecided)?; |
|
226 | 227 | } |
|
227 | 228 | Ok(()) |
|
228 | 229 | } |
|
229 | 230 | |
|
230 | 231 | /// Register revisions known as being missing |
|
231 | 232 | /// |
|
232 | 233 | /// # Performance note |
|
233 | 234 | /// |
|
234 | 235 | /// Except in the most trivial case, the first call of this method has |
|
235 | 236 | /// the side effect of computing `self.undecided` set for the first time, |
|
236 | 237 | /// and the related caches it might need for efficiency of its internal |
|
237 | 238 | /// computation. This is typically faster if more information is |
|
238 | 239 | /// available in `self.common`. Therefore, for good performance, the |
|
239 | 240 | /// caller should avoid calling this too early. |
|
240 | 241 | pub fn add_missing_revisions( |
|
241 | 242 | &mut self, |
|
242 | 243 | missing: impl IntoIterator<Item = Revision>, |
|
243 | 244 | ) -> Result<(), GraphError> { |
|
244 | 245 | let mut tovisit: VecDeque<Revision> = missing.into_iter().collect(); |
|
245 | 246 | if tovisit.is_empty() { |
|
246 | 247 | return Ok(()); |
|
247 | 248 | } |
|
248 | 249 | self.ensure_children_cache()?; |
|
249 | 250 | self.ensure_undecided()?; // for safety of possible future refactors |
|
250 | 251 | let children = self.children_cache.as_ref().unwrap(); |
|
251 | 252 | let mut seen: HashSet<Revision> = HashSet::new(); |
|
252 | 253 | let undecided_mut = self.undecided.as_mut().unwrap(); |
|
253 | 254 | while let Some(rev) = tovisit.pop_front() { |
|
254 | 255 | if !self.missing.insert(rev) { |
|
255 | 256 | // either it's known to be missing from a previous |
|
256 | 257 | // invocation, and there's no need to iterate on its |
|
257 | 258 | // children (we now they are all missing) |
|
258 | 259 | // or it's from a previous iteration of this loop |
|
259 | 260 | // and its children have already been queued |
|
260 | 261 | continue; |
|
261 | 262 | } |
|
262 | 263 | undecided_mut.remove(&rev); |
|
263 | 264 | match children.get(&rev) { |
|
264 | 265 | None => { |
|
265 | 266 | continue; |
|
266 | 267 | } |
|
267 | 268 | Some(this_children) => { |
|
268 | 269 | for child in this_children.iter().cloned() { |
|
269 | 270 | if seen.insert(child) { |
|
270 | 271 | tovisit.push_back(child); |
|
271 | 272 | } |
|
272 | 273 | } |
|
273 | 274 | } |
|
274 | 275 | } |
|
275 | 276 | } |
|
276 | 277 | Ok(()) |
|
277 | 278 | } |
|
278 | 279 | |
|
279 | 280 | /// Do we have any information about the peer? |
|
280 | 281 | pub fn has_info(&self) -> bool { |
|
281 | 282 | self.common.has_bases() |
|
282 | 283 | } |
|
283 | 284 | |
|
284 | 285 | /// Did we acquire full knowledge of our Revisions that the peer has? |
|
285 | 286 | pub fn is_complete(&self) -> bool { |
|
286 | 287 | self.undecided.as_ref().map_or(false, |s| s.is_empty()) |
|
287 | 288 | } |
|
288 | 289 | |
|
289 | 290 | /// Return the heads of the currently known common set of revisions. |
|
290 | 291 | /// |
|
291 | 292 | /// If the discovery process is not complete (see `is_complete()`), the |
|
292 | 293 | /// caller must be aware that this is an intermediate state. |
|
293 | 294 | /// |
|
294 | 295 | /// On the other hand, if it is complete, then this is currently |
|
295 | 296 | /// the only way to retrieve the end results of the discovery process. |
|
296 | 297 | /// |
|
297 | 298 | /// We may introduce in the future an `into_common_heads` call that |
|
298 | 299 | /// would be more appropriate for normal Rust callers, dropping `self` |
|
299 | 300 | /// if it is complete. |
|
300 | 301 | pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> { |
|
301 | 302 | self.common.bases_heads() |
|
302 | 303 | } |
|
303 | 304 | |
|
304 | 305 | /// Force first computation of `self.undecided` |
|
305 | 306 | /// |
|
306 | 307 | /// After this, `self.undecided.as_ref()` and `.as_mut()` can be |
|
307 | 308 | /// unwrapped to get workable immutable or mutable references without |
|
308 | 309 | /// any panic. |
|
309 | 310 | /// |
|
310 | 311 | /// This is an imperative call instead of an access with added lazyness |
|
311 | 312 | /// to reduce easily the scope of mutable borrow for the caller, |
|
312 | 313 | /// compared to undecided(&'a mut self) -> &'a⦠that would keep it |
|
313 | 314 | /// as long as the resulting immutable one. |
|
314 | 315 | fn ensure_undecided(&mut self) -> Result<(), GraphError> { |
|
315 | 316 | if self.undecided.is_some() { |
|
316 | 317 | return Ok(()); |
|
317 | 318 | } |
|
318 | 319 | let tgt = self.target_heads.take().unwrap(); |
|
319 | 320 | self.undecided = |
|
320 | 321 | Some(self.common.missing_ancestors(tgt)?.into_iter().collect()); |
|
321 | 322 | Ok(()) |
|
322 | 323 | } |
|
323 | 324 | |
|
324 | 325 | fn ensure_children_cache(&mut self) -> Result<(), GraphError> { |
|
325 | 326 | if self.children_cache.is_some() { |
|
326 | 327 | return Ok(()); |
|
327 | 328 | } |
|
328 | 329 | self.ensure_undecided()?; |
|
329 | 330 | |
|
330 | 331 | let mut children: FastHashMap<Revision, Vec<Revision>> = |
|
331 | 332 | FastHashMap::default(); |
|
332 | 333 | for &rev in self.undecided.as_ref().unwrap() { |
|
333 | 334 | for p in ParentsIterator::graph_parents(&self.graph, rev)? { |
|
334 | 335 | children.entry(p).or_insert_with(|| Vec::new()).push(rev); |
|
335 | 336 | } |
|
336 | 337 | } |
|
337 | 338 | self.children_cache = Some(children); |
|
338 | 339 | Ok(()) |
|
339 | 340 | } |
|
340 | 341 | |
|
341 | 342 | /// Provide statistics about the current state of the discovery process |
|
342 | 343 | pub fn stats(&self) -> DiscoveryStats { |
|
343 | 344 | DiscoveryStats { |
|
344 | 345 | undecided: self.undecided.as_ref().map(|s| s.len()), |
|
345 | 346 | } |
|
346 | 347 | } |
|
347 | 348 | |
|
348 | 349 | pub fn take_quick_sample( |
|
349 | 350 | &mut self, |
|
350 | 351 | headrevs: impl IntoIterator<Item = Revision>, |
|
351 | 352 | size: usize, |
|
352 | 353 | ) -> Result<Vec<Revision>, GraphError> { |
|
353 | 354 | self.ensure_undecided()?; |
|
354 | 355 | let mut sample = { |
|
355 | 356 | let undecided = self.undecided.as_ref().unwrap(); |
|
356 | 357 | if undecided.len() <= size { |
|
357 | 358 | return Ok(undecided.iter().cloned().collect()); |
|
358 | 359 | } |
|
359 | 360 | dagops::heads(&self.graph, undecided.iter())? |
|
360 | 361 | }; |
|
361 | 362 | if sample.len() >= size { |
|
362 | 363 | return Ok(self.limit_sample(sample.into_iter().collect(), size)); |
|
363 | 364 | } |
|
364 | 365 | update_sample( |
|
365 | 366 | None, |
|
366 | 367 | headrevs, |
|
367 | 368 | &mut sample, |
|
368 | 369 | |r| ParentsIterator::graph_parents(&self.graph, r), |
|
369 | 370 | Some(size), |
|
370 | 371 | )?; |
|
371 | 372 | Ok(sample.into_iter().collect()) |
|
372 | 373 | } |
|
373 | 374 | |
|
374 | 375 | /// Extract a sample from `self.undecided`, going from its heads and roots. |
|
375 | 376 | /// |
|
376 | 377 | /// The `size` parameter is used to avoid useless computations if |
|
377 | 378 | /// it turns out to be bigger than the whole set of undecided Revisions. |
|
378 | 379 | /// |
|
379 | 380 | /// The sample is taken by using `update_sample` from the heads, then |
|
380 | 381 | /// from the roots, working on the reverse DAG, |
|
381 | 382 | /// expressed by `self.children_cache`. |
|
382 | 383 | /// |
|
383 | 384 | /// No effort is being made to complete or limit the sample to `size` |
|
384 | 385 | /// but this method returns another interesting size that it derives |
|
385 | 386 | /// from its knowledge of the structure of the various sets, leaving |
|
386 | 387 | /// to the caller the decision to use it or not. |
|
387 | 388 | fn bidirectional_sample( |
|
388 | 389 | &mut self, |
|
389 | 390 | size: usize, |
|
390 | 391 | ) -> Result<(HashSet<Revision>, usize), GraphError> { |
|
391 | 392 | self.ensure_undecided()?; |
|
392 | 393 | { |
|
393 | 394 | // we don't want to compute children_cache before this |
|
394 | 395 | // but doing it after extracting self.undecided takes a mutable |
|
395 | 396 | // ref to self while a shareable one is still active. |
|
396 | 397 | let undecided = self.undecided.as_ref().unwrap(); |
|
397 | 398 | if undecided.len() <= size { |
|
398 | 399 | return Ok((undecided.clone(), size)); |
|
399 | 400 | } |
|
400 | 401 | } |
|
401 | 402 | |
|
402 | 403 | self.ensure_children_cache()?; |
|
403 | 404 | let revs = self.undecided.as_ref().unwrap(); |
|
404 | 405 | let mut sample: HashSet<Revision> = revs.clone(); |
|
405 | 406 | |
|
406 | 407 | // it's possible that leveraging the children cache would be more |
|
407 | 408 | // efficient here |
|
408 | 409 | dagops::retain_heads(&self.graph, &mut sample)?; |
|
409 | 410 | let revsheads = sample.clone(); // was again heads(revs) in python |
|
410 | 411 | |
|
411 | 412 | // update from heads |
|
412 | 413 | update_sample( |
|
413 | 414 | Some(revs), |
|
414 | 415 | revsheads.iter().cloned(), |
|
415 | 416 | &mut sample, |
|
416 | 417 | |r| ParentsIterator::graph_parents(&self.graph, r), |
|
417 | 418 | None, |
|
418 | 419 | )?; |
|
419 | 420 | |
|
420 | 421 | // update from roots |
|
421 | 422 | let revroots: HashSet<Revision> = |
|
422 | 423 | dagops::roots(&self.graph, revs)?.into_iter().collect(); |
|
423 | 424 | let prescribed_size = max(size, min(revroots.len(), revsheads.len())); |
|
424 | 425 | |
|
425 | 426 | let children = self.children_cache.as_ref().unwrap(); |
|
426 | 427 | let empty_vec: Vec<Revision> = Vec::new(); |
|
427 | 428 | update_sample( |
|
428 | 429 | Some(revs), |
|
429 | 430 | revroots, |
|
430 | 431 | &mut sample, |
|
431 | 432 | |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()), |
|
432 | 433 | None, |
|
433 | 434 | )?; |
|
434 | 435 | Ok((sample, prescribed_size)) |
|
435 | 436 | } |
|
436 | 437 | |
|
437 | 438 | /// Fill up sample up to the wished size with random undecided Revisions. |
|
438 | 439 | /// |
|
439 | 440 | /// This is intended to be used as a last resort completion if the |
|
440 | 441 | /// regular sampling algorithm returns too few elements. |
|
441 | 442 | fn random_complete_sample( |
|
442 | 443 | &mut self, |
|
443 | 444 | sample: &mut Vec<Revision>, |
|
444 | 445 | size: usize, |
|
445 | 446 | ) { |
|
446 | 447 | let sample_len = sample.len(); |
|
447 | 448 | if size <= sample_len { |
|
448 | 449 | return; |
|
449 | 450 | } |
|
450 | 451 | let take_from: Vec<Revision> = self |
|
451 | 452 | .undecided |
|
452 | 453 | .as_ref() |
|
453 | 454 | .unwrap() |
|
454 | 455 | .iter() |
|
455 | 456 | .filter(|&r| !sample.contains(r)) |
|
456 | 457 | .cloned() |
|
457 | 458 | .collect(); |
|
458 | 459 | sample.extend(self.limit_sample(take_from, size - sample_len)); |
|
459 | 460 | } |
|
460 | 461 | |
|
461 | 462 | pub fn take_full_sample( |
|
462 | 463 | &mut self, |
|
463 | 464 | size: usize, |
|
464 | 465 | ) -> Result<Vec<Revision>, GraphError> { |
|
465 | 466 | let (sample_set, prescribed_size) = self.bidirectional_sample(size)?; |
|
466 | 467 | let size = if self.respect_size { |
|
467 | 468 | size |
|
468 | 469 | } else { |
|
469 | 470 | prescribed_size |
|
470 | 471 | }; |
|
471 | 472 | let mut sample = |
|
472 | 473 | self.limit_sample(sample_set.into_iter().collect(), size); |
|
473 | 474 | self.random_complete_sample(&mut sample, size); |
|
474 | 475 | Ok(sample) |
|
475 | 476 | } |
|
476 | 477 | } |
|
477 | 478 | |
|
478 | 479 | #[cfg(test)] |
|
479 | 480 | mod tests { |
|
480 | 481 | use super::*; |
|
481 | 482 | use crate::testing::SampleGraph; |
|
482 | 483 | |
|
483 | 484 | /// A PartialDiscovery as for pushing all the heads of `SampleGraph` |
|
484 | 485 | /// |
|
485 | 486 | /// To avoid actual randomness in these tests, we give it a fixed |
|
486 | 487 | /// random seed, but by default we'll test the random version. |
|
487 | 488 | fn full_disco() -> PartialDiscovery<SampleGraph> { |
|
488 | 489 | PartialDiscovery::new_with_seed( |
|
489 | 490 | SampleGraph, |
|
490 | 491 | vec![10, 11, 12, 13], |
|
491 | 492 | [0; 16], |
|
492 | 493 | true, |
|
493 | 494 | true, |
|
494 | 495 | ) |
|
495 | 496 | } |
|
496 | 497 | |
|
497 | 498 | /// A PartialDiscovery as for pushing the 12 head of `SampleGraph` |
|
498 | 499 | /// |
|
499 | 500 | /// To avoid actual randomness in tests, we give it a fixed random seed. |
|
500 | 501 | fn disco12() -> PartialDiscovery<SampleGraph> { |
|
501 | 502 | PartialDiscovery::new_with_seed( |
|
502 | 503 | SampleGraph, |
|
503 | 504 | vec![12], |
|
504 | 505 | [0; 16], |
|
505 | 506 | true, |
|
506 | 507 | true, |
|
507 | 508 | ) |
|
508 | 509 | } |
|
509 | 510 | |
|
510 | 511 | fn sorted_undecided( |
|
511 | 512 | disco: &PartialDiscovery<SampleGraph>, |
|
512 | 513 | ) -> Vec<Revision> { |
|
513 | 514 | let mut as_vec: Vec<Revision> = |
|
514 | 515 | disco.undecided.as_ref().unwrap().iter().cloned().collect(); |
|
515 | 516 | as_vec.sort(); |
|
516 | 517 | as_vec |
|
517 | 518 | } |
|
518 | 519 | |
|
519 | 520 | fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> { |
|
520 | 521 | let mut as_vec: Vec<Revision> = |
|
521 | 522 | disco.missing.iter().cloned().collect(); |
|
522 | 523 | as_vec.sort(); |
|
523 | 524 | as_vec |
|
524 | 525 | } |
|
525 | 526 | |
|
526 | 527 | fn sorted_common_heads( |
|
527 | 528 | disco: &PartialDiscovery<SampleGraph>, |
|
528 | 529 | ) -> Result<Vec<Revision>, GraphError> { |
|
529 | 530 | let mut as_vec: Vec<Revision> = |
|
530 | 531 | disco.common_heads()?.iter().cloned().collect(); |
|
531 | 532 | as_vec.sort(); |
|
532 | 533 | Ok(as_vec) |
|
533 | 534 | } |
|
534 | 535 | |
|
535 | 536 | #[test] |
|
536 | 537 | fn test_add_common_get_undecided() -> Result<(), GraphError> { |
|
537 | 538 | let mut disco = full_disco(); |
|
538 | 539 | assert_eq!(disco.undecided, None); |
|
539 | 540 | assert!(!disco.has_info()); |
|
540 | 541 | assert_eq!(disco.stats().undecided, None); |
|
541 | 542 | |
|
542 | 543 | disco.add_common_revisions(vec![11, 12])?; |
|
543 | 544 | assert!(disco.has_info()); |
|
544 | 545 | assert!(!disco.is_complete()); |
|
545 | 546 | assert!(disco.missing.is_empty()); |
|
546 | 547 | |
|
547 | 548 | // add_common_revisions did not trigger a premature computation |
|
548 | 549 | // of `undecided`, let's check that and ask for them |
|
549 | 550 | assert_eq!(disco.undecided, None); |
|
550 | 551 | disco.ensure_undecided()?; |
|
551 | 552 | assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]); |
|
552 | 553 | assert_eq!(disco.stats().undecided, Some(4)); |
|
553 | 554 | Ok(()) |
|
554 | 555 | } |
|
555 | 556 | |
|
556 | 557 | /// in this test, we pretend that our peer misses exactly (8+10):: |
|
557 | 558 | /// and we're comparing all our repo to it (as in a bare push) |
|
558 | 559 | #[test] |
|
559 | 560 | fn test_discovery() -> Result<(), GraphError> { |
|
560 | 561 | let mut disco = full_disco(); |
|
561 | 562 | disco.add_common_revisions(vec![11, 12])?; |
|
562 | 563 | disco.add_missing_revisions(vec![8, 10])?; |
|
563 | 564 | assert_eq!(sorted_undecided(&disco), vec![5]); |
|
564 | 565 | assert_eq!(sorted_missing(&disco), vec![8, 10, 13]); |
|
565 | 566 | assert!(!disco.is_complete()); |
|
566 | 567 | |
|
567 | 568 | disco.add_common_revisions(vec![5])?; |
|
568 | 569 | assert_eq!(sorted_undecided(&disco), vec![]); |
|
569 | 570 | assert_eq!(sorted_missing(&disco), vec![8, 10, 13]); |
|
570 | 571 | assert!(disco.is_complete()); |
|
571 | 572 | assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]); |
|
572 | 573 | Ok(()) |
|
573 | 574 | } |
|
574 | 575 | |
|
575 | 576 | #[test] |
|
576 | 577 | fn test_add_missing_early_continue() -> Result<(), GraphError> { |
|
577 | 578 | eprintln!("test_add_missing_early_stop"); |
|
578 | 579 | let mut disco = full_disco(); |
|
579 | 580 | disco.add_common_revisions(vec![13, 3, 4])?; |
|
580 | 581 | disco.ensure_children_cache()?; |
|
581 | 582 | // 12 is grand-child of 6 through 9 |
|
582 | 583 | // passing them in this order maximizes the chances of the |
|
583 | 584 | // early continue to do the wrong thing |
|
584 | 585 | disco.add_missing_revisions(vec![6, 9, 12])?; |
|
585 | 586 | assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]); |
|
586 | 587 | assert_eq!(sorted_missing(&disco), vec![6, 9, 12]); |
|
587 | 588 | assert!(!disco.is_complete()); |
|
588 | 589 | Ok(()) |
|
589 | 590 | } |
|
590 | 591 | |
|
591 | 592 | #[test] |
|
592 | 593 | fn test_limit_sample_no_need_to() { |
|
593 | 594 | let sample = vec![1, 2, 3, 4]; |
|
594 | 595 | assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]); |
|
595 | 596 | } |
|
596 | 597 | |
|
597 | 598 | #[test] |
|
598 | 599 | fn test_limit_sample_less_than_half() { |
|
599 | 600 | assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]); |
|
600 | 601 | } |
|
601 | 602 | |
|
602 | 603 | #[test] |
|
603 | 604 | fn test_limit_sample_more_than_half() { |
|
604 | 605 | assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]); |
|
605 | 606 | } |
|
606 | 607 | |
|
607 | 608 | #[test] |
|
608 | 609 | fn test_limit_sample_no_random() { |
|
609 | 610 | let mut disco = full_disco(); |
|
610 | 611 | disco.randomize = false; |
|
611 | 612 | assert_eq!( |
|
612 | 613 | disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4), |
|
613 | 614 | vec![1, 3, 5, 7] |
|
614 | 615 | ); |
|
615 | 616 | } |
|
616 | 617 | |
|
617 | 618 | #[test] |
|
618 | 619 | fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> { |
|
619 | 620 | let mut disco = full_disco(); |
|
620 | 621 | disco.undecided = Some((1..=13).collect()); |
|
621 | 622 | |
|
622 | 623 | let mut sample_vec = disco.take_quick_sample(vec![], 4)?; |
|
623 | 624 | sample_vec.sort(); |
|
624 | 625 | assert_eq!(sample_vec, vec![10, 11, 12, 13]); |
|
625 | 626 | Ok(()) |
|
626 | 627 | } |
|
627 | 628 | |
|
628 | 629 | #[test] |
|
629 | 630 | fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> { |
|
630 | 631 | let mut disco = disco12(); |
|
631 | 632 | disco.ensure_undecided()?; |
|
632 | 633 | |
|
633 | 634 | let mut sample_vec = disco.take_quick_sample(vec![12], 4)?; |
|
634 | 635 | sample_vec.sort(); |
|
635 | 636 | // r12's only parent is r9, whose unique grand-parent through the |
|
636 | 637 | // diamond shape is r4. This ends there because the distance from r4 |
|
637 | 638 | // to the root is only 3. |
|
638 | 639 | assert_eq!(sample_vec, vec![4, 9, 12]); |
|
639 | 640 | Ok(()) |
|
640 | 641 | } |
|
641 | 642 | |
|
642 | 643 | #[test] |
|
643 | 644 | fn test_children_cache() -> Result<(), GraphError> { |
|
644 | 645 | let mut disco = full_disco(); |
|
645 | 646 | disco.ensure_children_cache()?; |
|
646 | 647 | |
|
647 | 648 | let cache = disco.children_cache.unwrap(); |
|
648 | 649 | assert_eq!(cache.get(&2).cloned(), Some(vec![4])); |
|
649 | 650 | assert_eq!(cache.get(&10).cloned(), None); |
|
650 | 651 | |
|
651 | 652 | let mut children_4 = cache.get(&4).cloned().unwrap(); |
|
652 | 653 | children_4.sort(); |
|
653 | 654 | assert_eq!(children_4, vec![5, 6, 7]); |
|
654 | 655 | |
|
655 | 656 | let mut children_7 = cache.get(&7).cloned().unwrap(); |
|
656 | 657 | children_7.sort(); |
|
657 | 658 | assert_eq!(children_7, vec![9, 11]); |
|
658 | 659 | |
|
659 | 660 | Ok(()) |
|
660 | 661 | } |
|
661 | 662 | |
|
662 | 663 | #[test] |
|
663 | 664 | fn test_complete_sample() { |
|
664 | 665 | let mut disco = full_disco(); |
|
665 | 666 | let undecided: HashSet<Revision> = |
|
666 | 667 | [4, 7, 9, 2, 3].iter().cloned().collect(); |
|
667 | 668 | disco.undecided = Some(undecided); |
|
668 | 669 | |
|
669 | 670 | let mut sample = vec![0]; |
|
670 | 671 | disco.random_complete_sample(&mut sample, 3); |
|
671 | 672 | assert_eq!(sample.len(), 3); |
|
672 | 673 | |
|
673 | 674 | let mut sample = vec![2, 4, 7]; |
|
674 | 675 | disco.random_complete_sample(&mut sample, 1); |
|
675 | 676 | assert_eq!(sample.len(), 3); |
|
676 | 677 | } |
|
677 | 678 | |
|
678 | 679 | #[test] |
|
679 | 680 | fn test_bidirectional_sample() -> Result<(), GraphError> { |
|
680 | 681 | let mut disco = full_disco(); |
|
681 | 682 | disco.undecided = Some((0..=13).into_iter().collect()); |
|
682 | 683 | |
|
683 | 684 | let (sample_set, size) = disco.bidirectional_sample(7)?; |
|
684 | 685 | assert_eq!(size, 7); |
|
685 | 686 | let mut sample: Vec<Revision> = sample_set.into_iter().collect(); |
|
686 | 687 | sample.sort(); |
|
687 | 688 | // our DAG is a bit too small for the results to be really interesting |
|
688 | 689 | // at least it shows that |
|
689 | 690 | // - we went both ways |
|
690 | 691 | // - we didn't take all Revisions (6 is not in the sample) |
|
691 | 692 | assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]); |
|
692 | 693 | Ok(()) |
|
693 | 694 | } |
|
694 | 695 | } |
General Comments 0
You need to be logged in to leave comments.
Login now