##// END OF EJS Templates
rust: translation of missingancestors...
Georges Racinet -
r40995:d097dd0a default
parent child Browse files
Show More
@@ -8,6 +8,7 b''
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9
9
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 use std::cmp::max;
11 use std::collections::{BinaryHeap, HashSet};
12 use std::collections::{BinaryHeap, HashSet};
12
13
13 /// Iterator over the ancestors of a given list of revisions
14 /// Iterator over the ancestors of a given list of revisions
@@ -24,6 +25,11 b' pub struct AncestorsIterator<G: Graph> {'
24 stoprev: Revision,
25 stoprev: Revision,
25 }
26 }
26
27
28 pub struct MissingAncestors<G: Graph> {
29 graph: G,
30 bases: HashSet<Revision>,
31 }
32
27 impl<G: Graph> AncestorsIterator<G> {
33 impl<G: Graph> AncestorsIterator<G> {
28 /// Constructor.
34 /// Constructor.
29 ///
35 ///
@@ -131,10 +137,187 b' impl<G: Graph> Iterator for AncestorsIte'
131 }
137 }
132 }
138 }
133
139
140 impl<G: Graph> MissingAncestors<G> {
141 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
142 let mut bases: HashSet<Revision> = bases.into_iter().collect();
143 if bases.is_empty() {
144 bases.insert(NULL_REVISION);
145 }
146 MissingAncestors { graph, bases }
147 }
148
149 pub fn has_bases(&self) -> bool {
150 self.bases.iter().any(|&b| b != NULL_REVISION)
151 }
152
153 /// Return a reference to current bases.
154 ///
155 /// This is useful in unit tests, but also setdiscovery.py does
156 /// read the bases attribute of a ancestor.missingancestors instance.
157 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
158 &self.bases
159 }
160
161 pub fn add_bases(
162 &mut self,
163 new_bases: impl IntoIterator<Item = Revision>,
164 ) {
165 self.bases.extend(new_bases);
166 }
167
168 /// Remove all ancestors of self.bases from the revs set (in place)
169 pub fn remove_ancestors_from(
170 &mut self,
171 revs: &mut HashSet<Revision>,
172 ) -> Result<(), GraphError> {
173 revs.retain(|r| !self.bases.contains(r));
174 // the null revision is always an ancestor
175 revs.remove(&NULL_REVISION);
176 if revs.is_empty() {
177 return Ok(());
178 }
179 // anything in revs > start is definitely not an ancestor of bases
180 // revs <= start need to be investigated
181 // TODO optim: if a missingancestors is to be used several times,
182 // we shouldn't need to iterate each time on bases
183 let start = match self.bases.iter().cloned().max() {
184 Some(m) => m,
185 None => {
186 // bases is empty (shouldn't happen, but let's be safe)
187 return Ok(());
188 }
189 };
190 // whatever happens, we'll keep at least keepcount of them
191 // knowing this gives us a earlier stop condition than
192 // going all the way to the root
193 let keepcount = revs.iter().filter(|r| **r > start).count();
194
195 let mut curr = start;
196 while curr != NULL_REVISION && revs.len() > keepcount {
197 if self.bases.contains(&curr) {
198 revs.remove(&curr);
199 self.add_parents(curr)?;
200 }
201 curr -= 1;
202 }
203 Ok(())
204 }
205
206 /// Add rev's parents to self.bases
207 #[inline]
208 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
209 // No need to bother the set with inserting NULL_REVISION over and
210 // over
211 for p in self.graph.parents(rev)?.iter().cloned() {
212 if p != NULL_REVISION {
213 self.bases.insert(p);
214 }
215 }
216 Ok(())
217 }
218
219 /// Return all the ancestors of revs that are not ancestors of self.bases
220 ///
221 /// This may include elements from revs.
222 ///
223 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
224 /// revision number order, which is a topological order.
225 pub fn missing_ancestors(
226 &mut self,
227 revs: impl IntoIterator<Item = Revision>,
228 ) -> Result<Vec<Revision>, GraphError> {
229 // just for convenience and comparison with Python version
230 let bases_visit = &mut self.bases;
231 let mut revs: HashSet<Revision> = revs
232 .into_iter()
233 .filter(|r| !bases_visit.contains(r))
234 .collect();
235 let revs_visit = &mut revs;
236 let mut both_visit: HashSet<Revision> =
237 revs_visit.intersection(&bases_visit).cloned().collect();
238 if revs_visit.is_empty() {
239 return Ok(Vec::new());
240 }
241
242 let max_bases =
243 bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
244 let max_revs =
245 revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
246 let start = max(max_bases, max_revs);
247
248 // TODO heuristics for with_capacity()?
249 let mut missing: Vec<Revision> = Vec::new();
250 for curr in (0..=start).map(|i| start - i) {
251 if revs_visit.is_empty() {
252 break;
253 }
254 if both_visit.contains(&curr) {
255 // curr's parents might have made it into revs_visit through
256 // another path
257 // TODO optim: Rust's HashSet.remove returns a boolean telling
258 // if it happened. This will spare us one set lookup
259 both_visit.remove(&curr);
260 for p in self.graph.parents(curr)?.iter().cloned() {
261 if p == NULL_REVISION {
262 continue;
263 }
264 revs_visit.remove(&p);
265 bases_visit.insert(p);
266 both_visit.insert(p);
267 }
268 continue;
269 }
270 // in Rust, one can't just use mutable variables assignation
271 // to be more straightforward. Instead of Python's
272 // thisvisit and othervisit, we'll differentiate with a boolean
273 let this_visit_is_revs = {
274 if revs_visit.remove(&curr) {
275 missing.push(curr);
276 true
277 } else if bases_visit.contains(&curr) {
278 false
279 } else {
280 // not an ancestor of revs or bases: ignore
281 continue;
282 }
283 };
284
285 for p in self.graph.parents(curr)?.iter().cloned() {
286 if p == NULL_REVISION {
287 continue;
288 }
289 let in_other_visit = if this_visit_is_revs {
290 bases_visit.contains(&p)
291 } else {
292 revs_visit.contains(&p)
293 };
294 if in_other_visit || both_visit.contains(&p) {
295 // p is implicitely in this_visit.
296 // This means p is or should be in bothvisit
297 // TODO optim: hence if bothvisit, we look up twice
298 revs_visit.remove(&p);
299 bases_visit.insert(p);
300 both_visit.insert(p);
301 } else {
302 // visit later
303 if this_visit_is_revs {
304 revs_visit.insert(p);
305 } else {
306 bases_visit.insert(p);
307 }
308 }
309 }
310 }
311 missing.reverse();
312 Ok(missing)
313 }
314 }
315
134 #[cfg(test)]
316 #[cfg(test)]
135 mod tests {
317 mod tests {
136
318
137 use super::*;
319 use super::*;
320 use std::iter::FromIterator;
138
321
139 #[derive(Clone, Debug)]
322 #[derive(Clone, Debug)]
140 struct Stub;
323 struct Stub;
@@ -252,4 +435,211 b' mod tests {'
252 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
435 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
253 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
436 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
254 }
437 }
438
439 #[test]
440 /// Test constructor, add/get bases
441 fn test_missing_bases() {
442 let mut missing_ancestors =
443 MissingAncestors::new(Stub, [5, 3, 1, 3].iter().cloned());
444 let mut as_vec: Vec<Revision> =
445 missing_ancestors.get_bases().iter().cloned().collect();
446 as_vec.sort();
447 assert_eq!(as_vec, [1, 3, 5]);
448
449 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
450 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
451 as_vec.sort();
452 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
453 }
454
455 fn assert_missing_remove(
456 bases: &[Revision],
457 revs: &[Revision],
458 expected: &[Revision],
459 ) {
460 let mut missing_ancestors =
461 MissingAncestors::new(Stub, bases.iter().cloned());
462 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
463 missing_ancestors
464 .remove_ancestors_from(&mut revset)
465 .unwrap();
466 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
467 as_vec.sort();
468 assert_eq!(as_vec.as_slice(), expected);
469 }
470
471 #[test]
472 fn test_missing_remove() {
473 assert_missing_remove(
474 &[1, 2, 3, 4, 7],
475 Vec::from_iter(1..10).as_slice(),
476 &[5, 6, 8, 9],
477 );
478 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
479 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
480 }
481
482 fn assert_missing_ancestors(
483 bases: &[Revision],
484 revs: &[Revision],
485 expected: &[Revision],
486 ) {
487 let mut missing_ancestors =
488 MissingAncestors::new(Stub, bases.iter().cloned());
489 let missing = missing_ancestors
490 .missing_ancestors(revs.iter().cloned())
491 .unwrap();
492 assert_eq!(missing.as_slice(), expected);
493 }
494
495 #[test]
496 fn test_missing_ancestors() {
497 // examples taken from test-ancestors.py by having it run
498 // on the same graph (both naive and fast Python algs)
499 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
500 assert_missing_ancestors(&[11], &[10], &[5, 10]);
501 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
502 }
503
504 // A Graph represented by a vector whose indices are revisions
505 // and values are parents of the revisions
506 type VecGraph = Vec<[Revision; 2]>;
507
508 impl Graph for VecGraph {
509 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
510 Ok(self[rev as usize])
511 }
512 }
513
514 /// An interesting case found by a random generator similar to
515 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
516 /// failed this, yet none of the integration tests of the whole suite
517 /// catched it.
518 #[test]
519 fn test_remove_ancestors_from_case1() {
520 let graph: VecGraph = vec![
521 [NULL_REVISION, NULL_REVISION],
522 [0, NULL_REVISION],
523 [1, 0],
524 [2, 1],
525 [3, NULL_REVISION],
526 [4, NULL_REVISION],
527 [5, 1],
528 [2, NULL_REVISION],
529 [7, NULL_REVISION],
530 [8, NULL_REVISION],
531 [9, NULL_REVISION],
532 [10, 1],
533 [3, NULL_REVISION],
534 [12, NULL_REVISION],
535 [13, NULL_REVISION],
536 [14, NULL_REVISION],
537 [4, NULL_REVISION],
538 [16, NULL_REVISION],
539 [17, NULL_REVISION],
540 [18, NULL_REVISION],
541 [19, 11],
542 [20, NULL_REVISION],
543 [21, NULL_REVISION],
544 [22, NULL_REVISION],
545 [23, NULL_REVISION],
546 [2, NULL_REVISION],
547 [3, NULL_REVISION],
548 [26, 24],
549 [27, NULL_REVISION],
550 [28, NULL_REVISION],
551 [12, NULL_REVISION],
552 [1, NULL_REVISION],
553 [1, 9],
554 [32, NULL_REVISION],
555 [33, NULL_REVISION],
556 [34, 31],
557 [35, NULL_REVISION],
558 [36, 26],
559 [37, NULL_REVISION],
560 [38, NULL_REVISION],
561 [39, NULL_REVISION],
562 [40, NULL_REVISION],
563 [41, NULL_REVISION],
564 [42, 26],
565 [0, NULL_REVISION],
566 [44, NULL_REVISION],
567 [45, 4],
568 [40, NULL_REVISION],
569 [47, NULL_REVISION],
570 [36, 0],
571 [49, NULL_REVISION],
572 [NULL_REVISION, NULL_REVISION],
573 [51, NULL_REVISION],
574 [52, NULL_REVISION],
575 [53, NULL_REVISION],
576 [14, NULL_REVISION],
577 [55, NULL_REVISION],
578 [15, NULL_REVISION],
579 [23, NULL_REVISION],
580 [58, NULL_REVISION],
581 [59, NULL_REVISION],
582 [2, NULL_REVISION],
583 [61, 59],
584 [62, NULL_REVISION],
585 [63, NULL_REVISION],
586 [NULL_REVISION, NULL_REVISION],
587 [65, NULL_REVISION],
588 [66, NULL_REVISION],
589 [67, NULL_REVISION],
590 [68, NULL_REVISION],
591 [37, 28],
592 [69, 25],
593 [71, NULL_REVISION],
594 [72, NULL_REVISION],
595 [50, 2],
596 [74, NULL_REVISION],
597 [12, NULL_REVISION],
598 [18, NULL_REVISION],
599 [77, NULL_REVISION],
600 [78, NULL_REVISION],
601 [79, NULL_REVISION],
602 [43, 33],
603 [81, NULL_REVISION],
604 [82, NULL_REVISION],
605 [83, NULL_REVISION],
606 [84, 45],
607 [85, NULL_REVISION],
608 [86, NULL_REVISION],
609 [NULL_REVISION, NULL_REVISION],
610 [88, NULL_REVISION],
611 [NULL_REVISION, NULL_REVISION],
612 [76, 83],
613 [44, NULL_REVISION],
614 [92, NULL_REVISION],
615 [93, NULL_REVISION],
616 [9, NULL_REVISION],
617 [95, 67],
618 [96, NULL_REVISION],
619 [97, NULL_REVISION],
620 [NULL_REVISION, NULL_REVISION],
621 ];
622 let problem_rev = 28 as Revision;
623 let problem_base = 70 as Revision;
624 // making the problem obvious: problem_rev is a parent of problem_base
625 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
626
627 let mut missing_ancestors: MissingAncestors<VecGraph> =
628 MissingAncestors::new(
629 graph,
630 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
631 .iter()
632 .cloned(),
633 );
634 assert!(missing_ancestors.bases.contains(&problem_base));
635
636 let mut revs: HashSet<Revision> =
637 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
638 .iter()
639 .cloned()
640 .collect();
641 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
642 assert!(!revs.contains(&problem_rev));
643 }
644
255 }
645 }
@@ -3,7 +3,7 b''
3 // This software may be used and distributed according to the terms of the
3 // This software may be used and distributed according to the terms of the
4 // GNU General Public License version 2 or any later version.
4 // GNU General Public License version 2 or any later version.
5 mod ancestors;
5 mod ancestors;
6 pub use ancestors::AncestorsIterator;
6 pub use ancestors::{AncestorsIterator, MissingAncestors};
7
7
8 /// Mercurial revision numbers
8 /// Mercurial revision numbers
9 ///
9 ///
@@ -15,6 +15,9 b' pub const NULL_REVISION: Revision = -1;'
15
15
16 /// The simplest expression of what we need of Mercurial DAGs.
16 /// The simplest expression of what we need of Mercurial DAGs.
17 pub trait Graph {
17 pub trait Graph {
18 /// Return the two parents of the given `Revision`.
19 ///
20 /// Each of the parents can be independently `NULL_REVISION`
18 fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
21 fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
19 }
22 }
20
23
@@ -182,6 +182,64 b' graph = {0: [-1, -1], 1: [0, -1], 2: [1,'
182 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7],
182 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7],
183 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]}
183 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]}
184
184
185 def test_missingancestors_explicit():
186 """A few explicit cases, easier to check for catching errors in refactors.
187
188 The bigger graph at the end has been produced by the random generator
189 above, and we have some evidence that the other tests don't cover it.
190 """
191 for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))),
192 ({10}, set({11, 12, 13, 14})),
193 ({7}, set({1, 2, 3, 4, 5})),
194 )):
195 print("%% removeancestorsfrom(), example %d" % (i + 1))
196 missanc = ancestor.incrementalmissingancestors(graph.get, bases)
197 missanc.removeancestorsfrom(revs)
198 print("remaining (sorted): %s" % sorted(list(revs)))
199
200 for i, (bases, revs) in enumerate((({10}, {11}),
201 ({11}, {10}),
202 ({7}, {9, 11}),
203 )):
204 print("%% missingancestors(), example %d" % (i + 1))
205 missanc = ancestor.incrementalmissingancestors(graph.get, bases)
206 print("return %s" % missanc.missingancestors(revs))
207
208 print("% removeancestorsfrom(), bigger graph")
209 vecgraph = [
210 [-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1],
211 [2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1],
212 [13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1],
213 [19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1],
214 [3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9],
215 [32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1],
216 [38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1],
217 [44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1],
218 [-1, -1], [51, -1], [52, -1], [53, -1], [14, -1],
219 [55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1],
220 [61, 59], [62, -1], [63, -1], [-1, -1], [65, -1],
221 [66, -1], [67, -1], [68, -1], [37, 28], [69, 25],
222 [71, -1], [72, -1], [50, 2], [74, -1], [12, -1],
223 [18, -1], [77, -1], [78, -1], [79, -1], [43, 33],
224 [81, -1], [82, -1], [83, -1], [84, 45], [85, -1],
225 [86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1],
226 [92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1],
227 [-1, -1]]
228 problem_rev = 28
229 problem_base = 70
230 # problem_rev is a parent of problem_base, but a faulty implementation
231 # could forget to remove it.
232 bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6}
233 if problem_rev not in vecgraph[problem_base] or problem_base not in bases:
234 print("Conditions have changed")
235 missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases)
236 revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44}
237 missanc.removeancestorsfrom(revs)
238 if 28 in revs:
239 print("Failed!")
240 else:
241 print("Ok")
242
185 def genlazyancestors(revs, stoprev=0, inclusive=False):
243 def genlazyancestors(revs, stoprev=0, inclusive=False):
186 print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
244 print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
187 (revs, stoprev, inclusive)))
245 (revs, stoprev, inclusive)))
@@ -276,6 +334,7 b' def main():'
276 seed = long(time.time() * 1000)
334 seed = long(time.time() * 1000)
277
335
278 rng = random.Random(seed)
336 rng = random.Random(seed)
337 test_missingancestors_explicit()
279 test_missingancestors(seed, rng)
338 test_missingancestors(seed, rng)
280 test_lazyancestors()
339 test_lazyancestors()
281 test_gca()
340 test_gca()
@@ -1,3 +1,17 b''
1 % removeancestorsfrom(), example 1
2 remaining (sorted): [5, 6, 8, 9]
3 % removeancestorsfrom(), example 2
4 remaining (sorted): [11, 12, 13, 14]
5 % removeancestorsfrom(), example 3
6 remaining (sorted): [3, 5]
7 % missingancestors(), example 1
8 return [3, 7, 11]
9 % missingancestors(), example 2
10 return [5, 10]
11 % missingancestors(), example 3
12 return [3, 6, 9, 11]
13 % removeancestorsfrom(), bigger graph
14 Ok
1 % lazy ancestor set for [], stoprev = 0, inclusive = False
15 % lazy ancestor set for [], stoprev = 0, inclusive = False
2 membership: []
16 membership: []
3 iteration: []
17 iteration: []
General Comments 0
You need to be logged in to leave comments. Login now