Show More
@@ -1,19 +1,17 | |||
|
1 | 1 | [package] |
|
2 | 2 | name = "hg-core" |
|
3 | 3 | version = "0.1.0" |
|
4 | 4 | authors = ["Georges Racinet <gracinet@anybox.fr>"] |
|
5 | 5 | description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" |
|
6 | 6 | edition = "2018" |
|
7 | 7 | |
|
8 | 8 | [lib] |
|
9 | 9 | name = "hg" |
|
10 | 10 | |
|
11 | [dev-dependencies] | |
|
12 | rand = "*" | |
|
13 | rand_pcg = "*" | |
|
14 | ||
|
15 | 11 | [dependencies] |
|
16 | 12 | byteorder = "1.3.1" |
|
17 | 13 | lazy_static = "1.3.0" |
|
18 | 14 | memchr = "2.2.0" |
|
15 | rand = "> 0.6.4" | |
|
16 | rand_pcg = "> 0.1.0" | |
|
19 | 17 | regex = "^1.1" |
@@ -1,209 +1,393 | |||
|
1 | 1 | // discovery.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2019 Georges Racinet <georges.racinet@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | //! Discovery operations |
|
9 | 9 | //! |
|
10 | 10 | //! This is a Rust counterpart to the `partialdiscovery` class of |
|
11 | 11 | //! `mercurial.setdiscovery` |
|
12 | 12 | |
|
13 | use super::{Graph, GraphError, Revision}; | |
|
13 | extern crate rand; | |
|
14 | extern crate rand_pcg; | |
|
15 | use self::rand::seq::SliceRandom; | |
|
16 | use self::rand::{thread_rng, RngCore, SeedableRng}; | |
|
17 | use super::{Graph, GraphError, Revision, NULL_REVISION}; | |
|
14 | 18 | use crate::ancestors::MissingAncestors; |
|
15 | 19 | use crate::dagops; |
|
16 | use std::collections::HashSet; | |
|
20 | use std::collections::{HashMap, HashSet, VecDeque}; | |
|
21 | ||
|
22 | type Rng = self::rand_pcg::Pcg32; | |
|
17 | 23 | |
|
18 | 24 | pub struct PartialDiscovery<G: Graph + Clone> { |
|
19 | 25 | target_heads: Option<Vec<Revision>>, |
|
20 | 26 | graph: G, // plays the role of self._repo |
|
21 | 27 | common: MissingAncestors<G>, |
|
22 | 28 | undecided: Option<HashSet<Revision>>, |
|
23 | 29 | missing: HashSet<Revision>, |
|
30 | rng: Rng, | |
|
24 | 31 | } |
|
25 | 32 | |
|
26 | 33 | pub struct DiscoveryStats { |
|
27 | 34 | pub undecided: Option<usize>, |
|
28 | 35 | } |
|
29 | 36 | |
|
37 | /// Update an existing sample to match the expected size | |
|
38 | /// | |
|
39 | /// The sample is updated with revisions exponentially distant from each | |
|
40 | /// element of `heads`. | |
|
41 | /// | |
|
42 | /// If a target size is specified, the sampling will stop once this size is | |
|
43 | /// reached. Otherwise sampling will happen until roots of the <revs> set are | |
|
44 | /// reached. | |
|
45 | /// | |
|
46 | /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag | |
|
47 | /// represented by `parentfn` | |
|
48 | /// - `heads`: set of DAG head revs | |
|
49 | /// - `sample`: a sample to update | |
|
50 | /// - `parentfn`: a callable to resolve parents for a revision | |
|
51 | /// - `quicksamplesize`: optional target size of the sample | |
|
52 | fn update_sample( | |
|
53 | revs: Option<&HashSet<Revision>>, | |
|
54 | heads: impl IntoIterator<Item = Revision>, | |
|
55 | sample: &mut HashSet<Revision>, | |
|
56 | parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, | |
|
57 | quicksamplesize: Option<usize>, | |
|
58 | ) -> Result<(), GraphError> { | |
|
59 | let mut distances: HashMap<Revision, u32> = HashMap::new(); | |
|
60 | let mut visit: VecDeque<Revision> = heads.into_iter().collect(); | |
|
61 | let mut factor: u32 = 1; | |
|
62 | let mut seen: HashSet<Revision> = HashSet::new(); | |
|
63 | loop { | |
|
64 | let current = match visit.pop_front() { | |
|
65 | None => { | |
|
66 | break; | |
|
67 | } | |
|
68 | Some(r) => r, | |
|
69 | }; | |
|
70 | if !seen.insert(current) { | |
|
71 | continue; | |
|
72 | } | |
|
73 | ||
|
74 | let d = *distances.entry(current).or_insert(1); | |
|
75 | if d > factor { | |
|
76 | factor *= 2; | |
|
77 | } | |
|
78 | if d == factor { | |
|
79 | sample.insert(current); | |
|
80 | if let Some(sz) = quicksamplesize { | |
|
81 | if sample.len() >= sz { | |
|
82 | return Ok(()); | |
|
83 | } | |
|
84 | } | |
|
85 | } | |
|
86 | for &p in &parentsfn(current)? { | |
|
87 | if p == NULL_REVISION { | |
|
88 | continue; | |
|
89 | } | |
|
90 | if let Some(revs) = revs { | |
|
91 | if !revs.contains(&p) { | |
|
92 | continue; | |
|
93 | } | |
|
94 | } | |
|
95 | distances.entry(p).or_insert(d + 1); | |
|
96 | visit.push_back(p); | |
|
97 | } | |
|
98 | } | |
|
99 | Ok(()) | |
|
100 | } | |
|
101 | ||
|
30 | 102 | impl<G: Graph + Clone> PartialDiscovery<G> { |
|
31 | 103 | /// Create a PartialDiscovery object, with the intent |
|
32 | 104 | /// of comparing our `::<target_heads>` revset to the contents of another |
|
33 | 105 | /// repo. |
|
34 | 106 | /// |
|
35 | 107 | /// For now `target_heads` is passed as a vector, and will be used |
|
36 | 108 | /// at the first call to `ensure_undecided()`. |
|
37 | 109 | /// |
|
38 | 110 | /// If we want to make the signature more flexible, |
|
39 | 111 | /// we'll have to make it a type argument of `PartialDiscovery` or a trait |
|
40 | 112 | /// object since we'll keep it in the meanwhile |
|
41 | 113 | pub fn new(graph: G, target_heads: Vec<Revision>) -> Self { |
|
114 | let mut seed: [u8; 16] = [0; 16]; | |
|
115 | thread_rng().fill_bytes(&mut seed); | |
|
116 | Self::new_with_seed(graph, target_heads, seed) | |
|
117 | } | |
|
118 | ||
|
119 | pub fn new_with_seed( | |
|
120 | graph: G, | |
|
121 | target_heads: Vec<Revision>, | |
|
122 | seed: [u8; 16], | |
|
123 | ) -> Self { | |
|
42 | 124 | PartialDiscovery { |
|
43 | 125 | undecided: None, |
|
44 | 126 | target_heads: Some(target_heads), |
|
45 | 127 | graph: graph.clone(), |
|
46 | 128 | common: MissingAncestors::new(graph, vec![]), |
|
47 | 129 | missing: HashSet::new(), |
|
130 | rng: Rng::from_seed(seed), | |
|
48 | 131 | } |
|
49 | 132 | } |
|
50 | 133 | |
|
134 | /// Extract at most `size` random elements from sample and return them | |
|
135 | /// as a vector | |
|
136 | fn limit_sample( | |
|
137 | &mut self, | |
|
138 | mut sample: Vec<Revision>, | |
|
139 | size: usize, | |
|
140 | ) -> Vec<Revision> { | |
|
141 | let sample_len = sample.len(); | |
|
142 | if sample_len <= size { | |
|
143 | return sample; | |
|
144 | } | |
|
145 | let rng = &mut self.rng; | |
|
146 | let dropped_size = sample_len - size; | |
|
147 | let limited_slice = if size < dropped_size { | |
|
148 | sample.partial_shuffle(rng, size).0 | |
|
149 | } else { | |
|
150 | sample.partial_shuffle(rng, dropped_size).1 | |
|
151 | }; | |
|
152 | limited_slice.to_owned() | |
|
153 | } | |
|
154 | ||
|
51 | 155 | /// Register revisions known as being common |
|
52 | 156 | pub fn add_common_revisions( |
|
53 | 157 | &mut self, |
|
54 | 158 | common: impl IntoIterator<Item = Revision>, |
|
55 | 159 | ) -> Result<(), GraphError> { |
|
56 | 160 | self.common.add_bases(common); |
|
57 | 161 | if let Some(ref mut undecided) = self.undecided { |
|
58 | 162 | self.common.remove_ancestors_from(undecided)?; |
|
59 | 163 | } |
|
60 | 164 | Ok(()) |
|
61 | 165 | } |
|
62 | 166 | |
|
63 | 167 | /// Register revisions known as being missing |
|
64 | 168 | pub fn add_missing_revisions( |
|
65 | 169 | &mut self, |
|
66 | 170 | missing: impl IntoIterator<Item = Revision>, |
|
67 | 171 | ) -> Result<(), GraphError> { |
|
68 | 172 | self.ensure_undecided()?; |
|
69 | 173 | let range = dagops::range( |
|
70 | 174 | &self.graph, |
|
71 | 175 | missing, |
|
72 | 176 | self.undecided.as_ref().unwrap().iter().cloned(), |
|
73 | 177 | )?; |
|
74 | 178 | let undecided_mut = self.undecided.as_mut().unwrap(); |
|
75 | 179 | for missrev in range { |
|
76 | 180 | self.missing.insert(missrev); |
|
77 | 181 | undecided_mut.remove(&missrev); |
|
78 | 182 | } |
|
79 | 183 | Ok(()) |
|
80 | 184 | } |
|
81 | 185 | |
|
82 | 186 | /// Do we have any information about the peer? |
|
83 | 187 | pub fn has_info(&self) -> bool { |
|
84 | 188 | self.common.has_bases() |
|
85 | 189 | } |
|
86 | 190 | |
|
87 | 191 | /// Did we acquire full knowledge of our Revisions that the peer has? |
|
88 | 192 | pub fn is_complete(&self) -> bool { |
|
89 | 193 | self.undecided.as_ref().map_or(false, |s| s.is_empty()) |
|
90 | 194 | } |
|
91 | 195 | |
|
92 | 196 | /// Return the heads of the currently known common set of revisions. |
|
93 | 197 | /// |
|
94 | 198 | /// If the discovery process is not complete (see `is_complete()`), the |
|
95 | 199 | /// caller must be aware that this is an intermediate state. |
|
96 | 200 | /// |
|
97 | 201 | /// On the other hand, if it is complete, then this is currently |
|
98 | 202 | /// the only way to retrieve the end results of the discovery process. |
|
99 | 203 | /// |
|
100 | 204 | /// We may introduce in the future an `into_common_heads` call that |
|
101 | 205 | /// would be more appropriate for normal Rust callers, dropping `self` |
|
102 | 206 | /// if it is complete. |
|
103 | 207 | pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> { |
|
104 | 208 | self.common.bases_heads() |
|
105 | 209 | } |
|
106 | 210 | |
|
107 | 211 | /// Force first computation of `self.undecided` |
|
108 | 212 | /// |
|
109 | 213 | /// After this, `self.undecided.as_ref()` and `.as_mut()` can be |
|
110 | 214 | /// unwrapped to get workable immutable or mutable references without |
|
111 | 215 | /// any panic. |
|
112 | 216 | /// |
|
113 | 217 | /// This is an imperative call instead of an access with added lazyness |
|
114 | 218 | /// to reduce easily the scope of mutable borrow for the caller, |
|
115 | 219 | /// compared to undecided(&'a mut self) -> &'a⦠that would keep it |
|
116 | 220 | /// as long as the resulting immutable one. |
|
117 | 221 | fn ensure_undecided(&mut self) -> Result<(), GraphError> { |
|
118 | 222 | if self.undecided.is_some() { |
|
119 | 223 | return Ok(()); |
|
120 | 224 | } |
|
121 | 225 | let tgt = self.target_heads.take().unwrap(); |
|
122 | 226 | self.undecided = |
|
123 | 227 | Some(self.common.missing_ancestors(tgt)?.into_iter().collect()); |
|
124 | 228 | Ok(()) |
|
125 | 229 | } |
|
126 | 230 | |
|
127 | 231 | /// Provide statistics about the current state of the discovery process |
|
128 | 232 | pub fn stats(&self) -> DiscoveryStats { |
|
129 | 233 | DiscoveryStats { |
|
130 | 234 | undecided: self.undecided.as_ref().map(|s| s.len()), |
|
131 | 235 | } |
|
132 | 236 | } |
|
237 | ||
|
238 | pub fn take_quick_sample( | |
|
239 | &mut self, | |
|
240 | headrevs: impl IntoIterator<Item = Revision>, | |
|
241 | size: usize, | |
|
242 | ) -> Result<Vec<Revision>, GraphError> { | |
|
243 | self.ensure_undecided()?; | |
|
244 | let mut sample = { | |
|
245 | let undecided = self.undecided.as_ref().unwrap(); | |
|
246 | if undecided.len() <= size { | |
|
247 | return Ok(undecided.iter().cloned().collect()); | |
|
248 | } | |
|
249 | dagops::heads(&self.graph, undecided.iter())? | |
|
250 | }; | |
|
251 | if sample.len() >= size { | |
|
252 | return Ok(self.limit_sample(sample.into_iter().collect(), size)); | |
|
253 | } | |
|
254 | update_sample( | |
|
255 | None, | |
|
256 | headrevs, | |
|
257 | &mut sample, | |
|
258 | |r| self.graph.parents(r), | |
|
259 | Some(size), | |
|
260 | )?; | |
|
261 | Ok(sample.into_iter().collect()) | |
|
262 | } | |
|
133 | 263 | } |
|
134 | 264 | |
|
135 | 265 | #[cfg(test)] |
|
136 | 266 | mod tests { |
|
137 | 267 | use super::*; |
|
138 | 268 | use crate::testing::SampleGraph; |
|
139 | 269 | |
|
140 | 270 | /// A PartialDiscovery as for pushing all the heads of `SampleGraph` |
|
271 | /// | |
|
272 | /// To avoid actual randomness in tests, we give it a fixed random seed. | |
|
141 | 273 | fn full_disco() -> PartialDiscovery<SampleGraph> { |
|
142 | PartialDiscovery::new(SampleGraph, vec![10, 11, 12, 13]) | |
|
274 | PartialDiscovery::new_with_seed( | |
|
275 | SampleGraph, | |
|
276 | vec![10, 11, 12, 13], | |
|
277 | [0; 16], | |
|
278 | ) | |
|
279 | } | |
|
280 | ||
|
281 | /// A PartialDiscovery as for pushing the 12 head of `SampleGraph` | |
|
282 | /// | |
|
283 | /// To avoid actual randomness in tests, we give it a fixed random seed. | |
|
284 | fn disco12() -> PartialDiscovery<SampleGraph> { | |
|
285 | PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16]) | |
|
143 | 286 | } |
|
144 | 287 | |
|
145 | 288 | fn sorted_undecided( |
|
146 | 289 | disco: &PartialDiscovery<SampleGraph>, |
|
147 | 290 | ) -> Vec<Revision> { |
|
148 | 291 | let mut as_vec: Vec<Revision> = |
|
149 | 292 | disco.undecided.as_ref().unwrap().iter().cloned().collect(); |
|
150 | 293 | as_vec.sort(); |
|
151 | 294 | as_vec |
|
152 | 295 | } |
|
153 | 296 | |
|
154 | 297 | fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> { |
|
155 | 298 | let mut as_vec: Vec<Revision> = |
|
156 | 299 | disco.missing.iter().cloned().collect(); |
|
157 | 300 | as_vec.sort(); |
|
158 | 301 | as_vec |
|
159 | 302 | } |
|
160 | 303 | |
|
161 | 304 | fn sorted_common_heads( |
|
162 | 305 | disco: &PartialDiscovery<SampleGraph>, |
|
163 | 306 | ) -> Result<Vec<Revision>, GraphError> { |
|
164 | 307 | let mut as_vec: Vec<Revision> = |
|
165 | 308 | disco.common_heads()?.iter().cloned().collect(); |
|
166 | 309 | as_vec.sort(); |
|
167 | 310 | Ok(as_vec) |
|
168 | 311 | } |
|
169 | 312 | |
|
170 | 313 | #[test] |
|
171 | 314 | fn test_add_common_get_undecided() -> Result<(), GraphError> { |
|
172 | 315 | let mut disco = full_disco(); |
|
173 | 316 | assert_eq!(disco.undecided, None); |
|
174 | 317 | assert!(!disco.has_info()); |
|
175 | 318 | assert_eq!(disco.stats().undecided, None); |
|
176 | 319 | |
|
177 | 320 | disco.add_common_revisions(vec![11, 12])?; |
|
178 | 321 | assert!(disco.has_info()); |
|
179 | 322 | assert!(!disco.is_complete()); |
|
180 | 323 | assert!(disco.missing.is_empty()); |
|
181 | 324 | |
|
182 | 325 | // add_common_revisions did not trigger a premature computation |
|
183 | 326 | // of `undecided`, let's check that and ask for them |
|
184 | 327 | assert_eq!(disco.undecided, None); |
|
185 | 328 | disco.ensure_undecided()?; |
|
186 | 329 | assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]); |
|
187 | 330 | assert_eq!(disco.stats().undecided, Some(4)); |
|
188 | 331 | Ok(()) |
|
189 | 332 | } |
|
190 | 333 | |
|
191 | 334 | /// in this test, we pretend that our peer misses exactly (8+10):: |
|
192 | 335 | /// and we're comparing all our repo to it (as in a bare push) |
|
193 | 336 | #[test] |
|
194 | 337 | fn test_discovery() -> Result<(), GraphError> { |
|
195 | 338 | let mut disco = full_disco(); |
|
196 | 339 | disco.add_common_revisions(vec![11, 12])?; |
|
197 | 340 | disco.add_missing_revisions(vec![8, 10])?; |
|
198 | 341 | assert_eq!(sorted_undecided(&disco), vec![5]); |
|
199 | 342 | assert_eq!(sorted_missing(&disco), vec![8, 10, 13]); |
|
200 | 343 | assert!(!disco.is_complete()); |
|
201 | 344 | |
|
202 | 345 | disco.add_common_revisions(vec![5])?; |
|
203 | 346 | assert_eq!(sorted_undecided(&disco), vec![]); |
|
204 | 347 | assert_eq!(sorted_missing(&disco), vec![8, 10, 13]); |
|
205 | 348 | assert!(disco.is_complete()); |
|
206 | 349 | assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]); |
|
207 | 350 | Ok(()) |
|
208 | 351 | } |
|
352 | ||
|
353 | #[test] | |
|
354 | fn test_limit_sample_no_need_to() { | |
|
355 | let sample = vec![1, 2, 3, 4]; | |
|
356 | assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]); | |
|
209 | 357 | } |
|
358 | ||
|
359 | #[test] | |
|
360 | fn test_limit_sample_less_than_half() { | |
|
361 | assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![4, 2]); | |
|
362 | } | |
|
363 | ||
|
364 | #[test] | |
|
365 | fn test_limit_sample_more_than_half() { | |
|
366 | assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![3, 2]); | |
|
367 | } | |
|
368 | ||
|
369 | #[test] | |
|
370 | fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> { | |
|
371 | let mut disco = full_disco(); | |
|
372 | disco.undecided = Some((1..=13).collect()); | |
|
373 | ||
|
374 | let mut sample_vec = disco.take_quick_sample(vec![], 4)?; | |
|
375 | sample_vec.sort(); | |
|
376 | assert_eq!(sample_vec, vec![10, 11, 12, 13]); | |
|
377 | Ok(()) | |
|
378 | } | |
|
379 | ||
|
380 | #[test] | |
|
381 | fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> { | |
|
382 | let mut disco = disco12(); | |
|
383 | disco.ensure_undecided()?; | |
|
384 | ||
|
385 | let mut sample_vec = disco.take_quick_sample(vec![12], 4)?; | |
|
386 | sample_vec.sort(); | |
|
387 | // r12's only parent is r9, whose unique grand-parent through the | |
|
388 | // diamond shape is r4. This ends there because the distance from r4 | |
|
389 | // to the root is only 3. | |
|
390 | assert_eq!(sample_vec, vec![4, 9, 12]); | |
|
391 | Ok(()) | |
|
392 | } | |
|
393 | } |
General Comments 0
You need to be logged in to leave comments.
Login now