##// END OF EJS Templates
rust: itering less on MissingAncestors.bases for max()...
Georges Racinet -
r41866:9060af28 default
parent child Browse files
Show More
@@ -1,766 +1,788 b''
1 1 // ancestors.rs
2 2 //
3 3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9 9
10 10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 11 use std::cmp::max;
12 12 use std::collections::{BinaryHeap, HashSet};
13 13 use crate::dagops;
14 14
15 15 /// Iterator over the ancestors of a given list of revisions
16 16 /// This is a generic type, defined and implemented for any Graph, so that
17 17 /// it's easy to
18 18 ///
19 19 /// - unit test in pure Rust
20 20 /// - bind to main Mercurial code, potentially in several ways and have these
21 21 /// bindings evolve over time
22 22 pub struct AncestorsIterator<G: Graph> {
23 23 graph: G,
24 24 visit: BinaryHeap<Revision>,
25 25 seen: HashSet<Revision>,
26 26 stoprev: Revision,
27 27 }
28 28
29 29 /// Lazy ancestors set, backed by AncestorsIterator
30 30 pub struct LazyAncestors<G: Graph + Clone> {
31 31 graph: G,
32 32 containsiter: AncestorsIterator<G>,
33 33 initrevs: Vec<Revision>,
34 34 stoprev: Revision,
35 35 inclusive: bool,
36 36 }
37 37
38 38 pub struct MissingAncestors<G: Graph> {
39 39 graph: G,
40 40 bases: HashSet<Revision>,
41 max_base: Revision,
41 42 }
42 43
43 44 impl<G: Graph> AncestorsIterator<G> {
44 45 /// Constructor.
45 46 ///
46 47 /// if `inclusive` is true, then the init revisions are emitted in
47 48 /// particular, otherwise iteration starts from their parents.
48 49 pub fn new(
49 50 graph: G,
50 51 initrevs: impl IntoIterator<Item = Revision>,
51 52 stoprev: Revision,
52 53 inclusive: bool,
53 54 ) -> Result<Self, GraphError> {
54 55 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
55 56 if inclusive {
56 57 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
57 58 let seen = visit.iter().map(|&x| x).collect();
58 59 return Ok(AncestorsIterator {
59 60 visit: visit,
60 61 seen: seen,
61 62 stoprev: stoprev,
62 63 graph: graph,
63 64 });
64 65 }
65 66 let mut this = AncestorsIterator {
66 67 visit: BinaryHeap::new(),
67 68 seen: HashSet::new(),
68 69 stoprev: stoprev,
69 70 graph: graph,
70 71 };
71 72 this.seen.insert(NULL_REVISION);
72 73 for rev in filtered_initrevs {
73 74 for parent in this.graph.parents(rev)?.iter().cloned() {
74 75 this.conditionally_push_rev(parent);
75 76 }
76 77 }
77 78 Ok(this)
78 79 }
79 80
80 81 #[inline]
81 82 fn conditionally_push_rev(&mut self, rev: Revision) {
82 83 if self.stoprev <= rev && self.seen.insert(rev) {
83 84 self.visit.push(rev);
84 85 }
85 86 }
86 87
87 88 /// Consumes partially the iterator to tell if the given target
88 89 /// revision
89 90 /// is in the ancestors it emits.
90 91 /// This is meant for iterators actually dedicated to that kind of
91 92 /// purpose
92 93 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
93 94 if self.seen.contains(&target) && target != NULL_REVISION {
94 95 return Ok(true);
95 96 }
96 97 for item in self {
97 98 let rev = item?;
98 99 if rev == target {
99 100 return Ok(true);
100 101 }
101 102 if rev < target {
102 103 return Ok(false);
103 104 }
104 105 }
105 106 Ok(false)
106 107 }
107 108
108 109 pub fn peek(&self) -> Option<Revision> {
109 110 self.visit.peek().map(|&r| r)
110 111 }
111 112
112 113 /// Tell if the iterator is about an empty set
113 114 ///
114 115 /// The result does not depend whether the iterator has been consumed
115 116 /// or not.
116 117 /// This is mostly meant for iterators backing a lazy ancestors set
117 118 pub fn is_empty(&self) -> bool {
118 119 if self.visit.len() > 0 {
119 120 return false;
120 121 }
121 122 if self.seen.len() > 1 {
122 123 return false;
123 124 }
124 125 // at this point, the seen set is at most a singleton.
125 126 // If not `self.inclusive`, it's still possible that it has only
126 127 // the null revision
127 128 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
128 129 }
129 130 }
130 131
131 132 /// Main implementation for the iterator
132 133 ///
133 134 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
134 135 /// with a few non crucial differences:
135 136 ///
136 137 /// - there's no filtering of invalid parent revisions. Actually, it should be
137 138 /// consistent and more efficient to filter them from the end caller.
138 139 /// - we don't have the optimization for adjacent revisions (i.e., the case
139 140 /// where `p1 == rev - 1`), because it amounts to update the first element of
140 141 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
141 142 /// - we save a few pushes by comparing with `stoprev` before pushing
142 143 impl<G: Graph> Iterator for AncestorsIterator<G> {
143 144 type Item = Result<Revision, GraphError>;
144 145
145 146 fn next(&mut self) -> Option<Self::Item> {
146 147 let current = match self.visit.peek() {
147 148 None => {
148 149 return None;
149 150 }
150 151 Some(c) => *c,
151 152 };
152 153 let [p1, p2] = match self.graph.parents(current) {
153 154 Ok(ps) => ps,
154 155 Err(e) => return Some(Err(e)),
155 156 };
156 157 if p1 < self.stoprev || !self.seen.insert(p1) {
157 158 self.visit.pop();
158 159 } else {
159 160 *(self.visit.peek_mut().unwrap()) = p1;
160 161 };
161 162
162 163 self.conditionally_push_rev(p2);
163 164 Some(Ok(current))
164 165 }
165 166 }
166 167
167 168 impl<G: Graph + Clone> LazyAncestors<G> {
168 169 pub fn new(
169 170 graph: G,
170 171 initrevs: impl IntoIterator<Item = Revision>,
171 172 stoprev: Revision,
172 173 inclusive: bool,
173 174 ) -> Result<Self, GraphError> {
174 175 let v: Vec<Revision> = initrevs.into_iter().collect();
175 176 Ok(LazyAncestors {
176 177 graph: graph.clone(),
177 178 containsiter: AncestorsIterator::new(
178 179 graph,
179 180 v.iter().cloned(),
180 181 stoprev,
181 182 inclusive,
182 183 )?,
183 184 initrevs: v,
184 185 stoprev: stoprev,
185 186 inclusive: inclusive,
186 187 })
187 188 }
188 189
189 190 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
190 191 self.containsiter.contains(rev)
191 192 }
192 193
193 194 pub fn is_empty(&self) -> bool {
194 195 self.containsiter.is_empty()
195 196 }
196 197
197 198 pub fn iter(&self) -> AncestorsIterator<G> {
198 199 // the arguments being the same as for self.containsiter, we know
199 200 // for sure that AncestorsIterator constructor can't fail
200 201 AncestorsIterator::new(
201 202 self.graph.clone(),
202 203 self.initrevs.iter().cloned(),
203 204 self.stoprev,
204 205 self.inclusive,
205 206 )
206 207 .unwrap()
207 208 }
208 209 }
209 210
210 211 impl<G: Graph> MissingAncestors<G> {
211 212 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
212 MissingAncestors { graph: graph, bases: bases.into_iter().collect() }
213 let mut created = MissingAncestors {
214 graph: graph,
215 bases: HashSet::new(),
216 max_base: NULL_REVISION,
217 };
218 created.add_bases(bases);
219 created
213 220 }
214 221
215 222 pub fn has_bases(&self) -> bool {
216 223 !self.bases.is_empty()
217 224 }
218 225
219 226 /// Return a reference to current bases.
220 227 ///
221 228 /// This is useful in unit tests, but also setdiscovery.py does
222 229 /// read the bases attribute of a ancestor.missingancestors instance.
223 230 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
224 231 &self.bases
225 232 }
226 233
227 234 /// Computes the relative heads of current bases.
228 235 ///
229 236 /// The object is still usable after this.
230 237 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
231 238 dagops::heads(&self.graph, self.bases.iter())
232 239 }
233 240
234 241 /// Consumes the object and returns the relative heads of its bases.
235 pub fn into_bases_heads(mut self) -> Result<HashSet<Revision>, GraphError> {
242 pub fn into_bases_heads(
243 mut self,
244 ) -> Result<HashSet<Revision>, GraphError> {
236 245 dagops::retain_heads(&self.graph, &mut self.bases)?;
237 246 Ok(self.bases)
238 247 }
239 248
249 /// Add some revisions to `self.bases`
250 ///
251 /// Takes care of keeping `self.max_base` up to date.
240 252 pub fn add_bases(
241 253 &mut self,
242 254 new_bases: impl IntoIterator<Item = Revision>,
243 255 ) {
244 self.bases
245 .extend(new_bases.into_iter().filter(|&rev| rev != NULL_REVISION));
256 let mut max_base = self.max_base;
257 self.bases.extend(
258 new_bases
259 .into_iter()
260 .filter(|&rev| rev != NULL_REVISION)
261 .map(|r| {
262 if r > max_base {
263 max_base = r;
264 }
265 r
266 }),
267 );
268 self.max_base = max_base;
246 269 }
247 270
248 271 /// Remove all ancestors of self.bases from the revs set (in place)
249 272 pub fn remove_ancestors_from(
250 273 &mut self,
251 274 revs: &mut HashSet<Revision>,
252 275 ) -> Result<(), GraphError> {
253 276 revs.retain(|r| !self.bases.contains(r));
254 277 // the null revision is always an ancestor. Logically speaking
255 278 // it's debatable in case bases is empty, but the Python
256 279 // implementation always adds NULL_REVISION to bases, making it
257 280 // unconditionnally true.
258 281 revs.remove(&NULL_REVISION);
259 282 if revs.is_empty() {
260 283 return Ok(());
261 284 }
262 285 // anything in revs > start is definitely not an ancestor of bases
263 286 // revs <= start need to be investigated
264 // TODO optim: if a missingancestors is to be used several times,
265 // we shouldn't need to iterate each time on bases
266 let start = match self.bases.iter().cloned().max() {
267 Some(m) => m,
268 None => { // self.bases is empty
269 return Ok(());
270 }
271 };
287 if self.max_base == NULL_REVISION {
288 return Ok(());
289 }
290
272 291 // whatever happens, we'll keep at least keepcount of them
273 292 // knowing this gives us a earlier stop condition than
274 293 // going all the way to the root
275 let keepcount = revs.iter().filter(|r| **r > start).count();
294 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
276 295
277 let mut curr = start;
296 let mut curr = self.max_base;
278 297 while curr != NULL_REVISION && revs.len() > keepcount {
279 298 if self.bases.contains(&curr) {
280 299 revs.remove(&curr);
281 300 self.add_parents(curr)?;
282 301 }
283 302 curr -= 1;
284 303 }
285 304 Ok(())
286 305 }
287 306
288 /// Add rev's parents to self.bases
307 /// Add the parents of `rev` to `self.bases`
308 ///
309 /// This has no effect on `self.max_base`
289 310 #[inline]
290 311 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
291 // No need to bother the set with inserting NULL_REVISION over and
292 // over
312 if rev == NULL_REVISION {
313 return Ok(());
314 }
293 315 for p in self.graph.parents(rev)?.iter().cloned() {
316 // No need to bother the set with inserting NULL_REVISION over and
317 // over
294 318 if p != NULL_REVISION {
295 319 self.bases.insert(p);
296 320 }
297 321 }
298 322 Ok(())
299 323 }
300 324
301 325 /// Return all the ancestors of revs that are not ancestors of self.bases
302 326 ///
303 327 /// This may include elements from revs.
304 328 ///
305 329 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
306 330 /// revision number order, which is a topological order.
307 331 pub fn missing_ancestors(
308 332 &mut self,
309 333 revs: impl IntoIterator<Item = Revision>,
310 334 ) -> Result<Vec<Revision>, GraphError> {
311 335 // just for convenience and comparison with Python version
312 336 let bases_visit = &mut self.bases;
313 337 let mut revs: HashSet<Revision> = revs
314 338 .into_iter()
315 339 .filter(|r| !bases_visit.contains(r))
316 340 .collect();
317 341 let revs_visit = &mut revs;
318 342 let mut both_visit: HashSet<Revision> =
319 343 revs_visit.intersection(&bases_visit).cloned().collect();
320 344 if revs_visit.is_empty() {
321 345 return Ok(Vec::new());
322 346 }
323
324 let max_bases =
325 bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
326 let max_revs =
327 revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
328 let start = max(max_bases, max_revs);
347 let max_revs = revs_visit.iter().cloned().max().unwrap();
348 let start = max(self.max_base, max_revs);
329 349
330 350 // TODO heuristics for with_capacity()?
331 351 let mut missing: Vec<Revision> = Vec::new();
332 352 for curr in (0..=start).rev() {
333 353 if revs_visit.is_empty() {
334 354 break;
335 355 }
336 356 if both_visit.remove(&curr) {
337 357 // curr's parents might have made it into revs_visit through
338 358 // another path
339 359 for p in self.graph.parents(curr)?.iter().cloned() {
340 360 if p == NULL_REVISION {
341 361 continue;
342 362 }
343 363 revs_visit.remove(&p);
344 364 bases_visit.insert(p);
345 365 both_visit.insert(p);
346 366 }
347 367 } else if revs_visit.remove(&curr) {
348 368 missing.push(curr);
349 369 for p in self.graph.parents(curr)?.iter().cloned() {
350 370 if p == NULL_REVISION {
351 371 continue;
352 372 }
353 373 if bases_visit.contains(&p) {
354 374 // p is already known to be an ancestor of revs_visit
355 375 revs_visit.remove(&p);
356 376 both_visit.insert(p);
357 377 } else if both_visit.contains(&p) {
358 378 // p should have been in bases_visit
359 379 revs_visit.remove(&p);
360 380 bases_visit.insert(p);
361 381 } else {
362 382 // visit later
363 383 revs_visit.insert(p);
364 384 }
365 385 }
366 386 } else if bases_visit.contains(&curr) {
367 387 for p in self.graph.parents(curr)?.iter().cloned() {
368 388 if p == NULL_REVISION {
369 389 continue;
370 390 }
371 391 if revs_visit.remove(&p) || both_visit.contains(&p) {
372 392 // p is an ancestor of bases_visit, and is implicitly
373 393 // in revs_visit, which means p is ::revs & ::bases.
374 394 bases_visit.insert(p);
375 395 both_visit.insert(p);
376 396 } else {
377 397 bases_visit.insert(p);
378 398 }
379 399 }
380 400 }
381 401 }
382 402 missing.reverse();
383 403 Ok(missing)
384 404 }
385 405 }
386 406
387 407 #[cfg(test)]
388 408 mod tests {
389 409
390 410 use super::*;
391 411 use crate::testing::{SampleGraph, VecGraph};
392 412 use std::iter::FromIterator;
393 413
394 414 fn list_ancestors<G: Graph>(
395 415 graph: G,
396 416 initrevs: Vec<Revision>,
397 417 stoprev: Revision,
398 418 inclusive: bool,
399 419 ) -> Vec<Revision> {
400 420 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
401 421 .unwrap()
402 422 .map(|res| res.unwrap())
403 423 .collect()
404 424 }
405 425
406 426 #[test]
407 427 /// Same tests as test-ancestor.py, without membership
408 428 /// (see also test-ancestor.py.out)
409 429 fn test_list_ancestor() {
410 430 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
411 431 assert_eq!(
412 432 list_ancestors(SampleGraph, vec![11, 13], 0, false),
413 433 vec![8, 7, 4, 3, 2, 1, 0]
414 434 );
415 435 assert_eq!(
416 436 list_ancestors(SampleGraph, vec![1, 3], 0, false),
417 437 vec![1, 0]
418 438 );
419 439 assert_eq!(
420 440 list_ancestors(SampleGraph, vec![11, 13], 0, true),
421 441 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
422 442 );
423 443 assert_eq!(
424 444 list_ancestors(SampleGraph, vec![11, 13], 6, false),
425 445 vec![8, 7]
426 446 );
427 447 assert_eq!(
428 448 list_ancestors(SampleGraph, vec![11, 13], 6, true),
429 449 vec![13, 11, 8, 7]
430 450 );
431 451 assert_eq!(
432 452 list_ancestors(SampleGraph, vec![11, 13], 11, true),
433 453 vec![13, 11]
434 454 );
435 455 assert_eq!(
436 456 list_ancestors(SampleGraph, vec![11, 13], 12, true),
437 457 vec![13]
438 458 );
439 459 assert_eq!(
440 460 list_ancestors(SampleGraph, vec![10, 1], 0, true),
441 461 vec![10, 5, 4, 2, 1, 0]
442 462 );
443 463 }
444 464
445 465 #[test]
446 466 /// Corner case that's not directly in test-ancestors.py, but
447 467 /// that happens quite often, as demonstrated by running the whole
448 468 /// suite.
449 469 /// For instance, run tests/test-obsolete-checkheads.t
450 470 fn test_nullrev_input() {
451 471 let mut iter =
452 472 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
453 473 assert_eq!(iter.next(), None)
454 474 }
455 475
456 476 #[test]
457 477 fn test_contains() {
458 478 let mut lazy =
459 479 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
460 480 assert!(lazy.contains(1).unwrap());
461 481 assert!(!lazy.contains(3).unwrap());
462 482
463 483 let mut lazy =
464 484 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
465 485 assert!(!lazy.contains(NULL_REVISION).unwrap());
466 486 }
467 487
468 488 #[test]
469 489 fn test_peek() {
470 490 let mut iter =
471 491 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
472 492 // peek() gives us the next value
473 493 assert_eq!(iter.peek(), Some(10));
474 494 // but it's not been consumed
475 495 assert_eq!(iter.next(), Some(Ok(10)));
476 496 // and iteration resumes normally
477 497 assert_eq!(iter.next(), Some(Ok(5)));
478 498
479 499 // let's drain the iterator to test peek() at the end
480 500 while iter.next().is_some() {}
481 501 assert_eq!(iter.peek(), None);
482 502 }
483 503
484 504 #[test]
485 505 fn test_empty() {
486 506 let mut iter =
487 507 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
488 508 assert!(!iter.is_empty());
489 509 while iter.next().is_some() {}
490 510 assert!(!iter.is_empty());
491 511
492 512 let iter =
493 513 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
494 514 assert!(iter.is_empty());
495 515
496 516 // case where iter.seen == {NULL_REVISION}
497 517 let iter =
498 518 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
499 519 assert!(iter.is_empty());
500 520 }
501 521
502 522 /// A corrupted Graph, supporting error handling tests
503 523 #[derive(Clone, Debug)]
504 524 struct Corrupted;
505 525
506 526 impl Graph for Corrupted {
507 527 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
508 528 match rev {
509 529 1 => Ok([0, -1]),
510 530 r => Err(GraphError::ParentOutOfRange(r)),
511 531 }
512 532 }
513 533 }
514 534
515 535 #[test]
516 536 fn test_initrev_out_of_range() {
517 537 // inclusive=false looks up initrev's parents right away
518 538 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
519 539 Ok(_) => panic!("Should have been ParentOutOfRange"),
520 540 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
521 541 }
522 542 }
523 543
524 544 #[test]
525 545 fn test_next_out_of_range() {
526 546 // inclusive=false looks up initrev's parents right away
527 547 let mut iter =
528 548 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
529 549 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
530 550 }
531 551
532 552 #[test]
533 553 fn test_lazy_iter_contains() {
534 554 let mut lazy =
535 555 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
536 556
537 557 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
538 558 // compare with iterator tests on the same initial revisions
539 559 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
540 560
541 561 // contains() results are correct, unaffected by the fact that
542 562 // we consumed entirely an iterator out of lazy
543 563 assert_eq!(lazy.contains(2), Ok(true));
544 564 assert_eq!(lazy.contains(9), Ok(false));
545 565 }
546 566
547 567 #[test]
548 568 fn test_lazy_contains_iter() {
549 569 let mut lazy =
550 570 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
551 571
552 572 assert_eq!(lazy.contains(2), Ok(true));
553 573 assert_eq!(lazy.contains(6), Ok(false));
554 574
555 575 // after consumption of 2 by the inner iterator, results stay
556 576 // consistent
557 577 assert_eq!(lazy.contains(2), Ok(true));
558 578 assert_eq!(lazy.contains(5), Ok(false));
559 579
560 580 // iter() still gives us a fresh iterator
561 581 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
562 582 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
563 583 }
564 584
565 585 #[test]
566 586 /// Test constructor, add/get bases and heads
567 587 fn test_missing_bases() -> Result<(), GraphError> {
568 588 let mut missing_ancestors =
569 589 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
570 590 let mut as_vec: Vec<Revision> =
571 591 missing_ancestors.get_bases().iter().cloned().collect();
572 592 as_vec.sort();
573 593 assert_eq!(as_vec, [1, 3, 5]);
594 assert_eq!(missing_ancestors.max_base, 5);
574 595
575 596 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
576 597 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
577 598 as_vec.sort();
578 599 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
600 assert_eq!(missing_ancestors.max_base, 8);
579 601
580 602 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
581 603 as_vec.sort();
582 604 assert_eq!(as_vec, [3, 5, 7, 8]);
583 605 Ok(())
584 606 }
585 607
586 608 fn assert_missing_remove(
587 609 bases: &[Revision],
588 610 revs: &[Revision],
589 611 expected: &[Revision],
590 612 ) {
591 613 let mut missing_ancestors =
592 614 MissingAncestors::new(SampleGraph, bases.iter().cloned());
593 615 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
594 616 missing_ancestors
595 617 .remove_ancestors_from(&mut revset)
596 618 .unwrap();
597 619 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
598 620 as_vec.sort();
599 621 assert_eq!(as_vec.as_slice(), expected);
600 622 }
601 623
602 624 #[test]
603 625 fn test_missing_remove() {
604 626 assert_missing_remove(
605 627 &[1, 2, 3, 4, 7],
606 628 Vec::from_iter(1..10).as_slice(),
607 629 &[5, 6, 8, 9],
608 630 );
609 631 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
610 632 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
611 633 }
612 634
613 635 fn assert_missing_ancestors(
614 636 bases: &[Revision],
615 637 revs: &[Revision],
616 638 expected: &[Revision],
617 639 ) {
618 640 let mut missing_ancestors =
619 641 MissingAncestors::new(SampleGraph, bases.iter().cloned());
620 642 let missing = missing_ancestors
621 643 .missing_ancestors(revs.iter().cloned())
622 644 .unwrap();
623 645 assert_eq!(missing.as_slice(), expected);
624 646 }
625 647
626 648 #[test]
627 649 fn test_missing_ancestors() {
628 650 // examples taken from test-ancestors.py by having it run
629 651 // on the same graph (both naive and fast Python algs)
630 652 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
631 653 assert_missing_ancestors(&[11], &[10], &[5, 10]);
632 654 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
633 655 }
634 656
635 657 /// An interesting case found by a random generator similar to
636 658 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
637 659 /// failed this, yet none of the integration tests of the whole suite
638 660 /// catched it.
639 661 #[test]
640 662 fn test_remove_ancestors_from_case1() {
641 663 let graph: VecGraph = vec![
642 664 [NULL_REVISION, NULL_REVISION],
643 665 [0, NULL_REVISION],
644 666 [1, 0],
645 667 [2, 1],
646 668 [3, NULL_REVISION],
647 669 [4, NULL_REVISION],
648 670 [5, 1],
649 671 [2, NULL_REVISION],
650 672 [7, NULL_REVISION],
651 673 [8, NULL_REVISION],
652 674 [9, NULL_REVISION],
653 675 [10, 1],
654 676 [3, NULL_REVISION],
655 677 [12, NULL_REVISION],
656 678 [13, NULL_REVISION],
657 679 [14, NULL_REVISION],
658 680 [4, NULL_REVISION],
659 681 [16, NULL_REVISION],
660 682 [17, NULL_REVISION],
661 683 [18, NULL_REVISION],
662 684 [19, 11],
663 685 [20, NULL_REVISION],
664 686 [21, NULL_REVISION],
665 687 [22, NULL_REVISION],
666 688 [23, NULL_REVISION],
667 689 [2, NULL_REVISION],
668 690 [3, NULL_REVISION],
669 691 [26, 24],
670 692 [27, NULL_REVISION],
671 693 [28, NULL_REVISION],
672 694 [12, NULL_REVISION],
673 695 [1, NULL_REVISION],
674 696 [1, 9],
675 697 [32, NULL_REVISION],
676 698 [33, NULL_REVISION],
677 699 [34, 31],
678 700 [35, NULL_REVISION],
679 701 [36, 26],
680 702 [37, NULL_REVISION],
681 703 [38, NULL_REVISION],
682 704 [39, NULL_REVISION],
683 705 [40, NULL_REVISION],
684 706 [41, NULL_REVISION],
685 707 [42, 26],
686 708 [0, NULL_REVISION],
687 709 [44, NULL_REVISION],
688 710 [45, 4],
689 711 [40, NULL_REVISION],
690 712 [47, NULL_REVISION],
691 713 [36, 0],
692 714 [49, NULL_REVISION],
693 715 [NULL_REVISION, NULL_REVISION],
694 716 [51, NULL_REVISION],
695 717 [52, NULL_REVISION],
696 718 [53, NULL_REVISION],
697 719 [14, NULL_REVISION],
698 720 [55, NULL_REVISION],
699 721 [15, NULL_REVISION],
700 722 [23, NULL_REVISION],
701 723 [58, NULL_REVISION],
702 724 [59, NULL_REVISION],
703 725 [2, NULL_REVISION],
704 726 [61, 59],
705 727 [62, NULL_REVISION],
706 728 [63, NULL_REVISION],
707 729 [NULL_REVISION, NULL_REVISION],
708 730 [65, NULL_REVISION],
709 731 [66, NULL_REVISION],
710 732 [67, NULL_REVISION],
711 733 [68, NULL_REVISION],
712 734 [37, 28],
713 735 [69, 25],
714 736 [71, NULL_REVISION],
715 737 [72, NULL_REVISION],
716 738 [50, 2],
717 739 [74, NULL_REVISION],
718 740 [12, NULL_REVISION],
719 741 [18, NULL_REVISION],
720 742 [77, NULL_REVISION],
721 743 [78, NULL_REVISION],
722 744 [79, NULL_REVISION],
723 745 [43, 33],
724 746 [81, NULL_REVISION],
725 747 [82, NULL_REVISION],
726 748 [83, NULL_REVISION],
727 749 [84, 45],
728 750 [85, NULL_REVISION],
729 751 [86, NULL_REVISION],
730 752 [NULL_REVISION, NULL_REVISION],
731 753 [88, NULL_REVISION],
732 754 [NULL_REVISION, NULL_REVISION],
733 755 [76, 83],
734 756 [44, NULL_REVISION],
735 757 [92, NULL_REVISION],
736 758 [93, NULL_REVISION],
737 759 [9, NULL_REVISION],
738 760 [95, 67],
739 761 [96, NULL_REVISION],
740 762 [97, NULL_REVISION],
741 763 [NULL_REVISION, NULL_REVISION],
742 764 ];
743 765 let problem_rev = 28 as Revision;
744 766 let problem_base = 70 as Revision;
745 767 // making the problem obvious: problem_rev is a parent of problem_base
746 768 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
747 769
748 770 let mut missing_ancestors: MissingAncestors<VecGraph> =
749 771 MissingAncestors::new(
750 772 graph,
751 773 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
752 774 .iter()
753 775 .cloned(),
754 776 );
755 777 assert!(missing_ancestors.bases.contains(&problem_base));
756 778
757 779 let mut revs: HashSet<Revision> =
758 780 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
759 781 .iter()
760 782 .cloned()
761 783 .collect();
762 784 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
763 785 assert!(!revs.contains(&problem_rev));
764 786 }
765 787
766 788 }
@@ -1,136 +1,140 b''
1 1 // dagops.rs
2 2 //
3 3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Miscellaneous DAG operations
9 9 //!
10 10 //! # Terminology
11 11 //! - By *relative heads* of a collection of revision numbers (`Revision`),
12 12 //! we mean those revisions that have no children among the collection.
13 13 //! - Similarly *relative roots* of a collection of `Revision`, we mean
14 14 //! those whose parents, if any, don't belong to the collection.
15 15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 16 use std::collections::HashSet;
17 17
18 18 fn remove_parents(
19 19 graph: &impl Graph,
20 20 rev: Revision,
21 21 set: &mut HashSet<Revision>,
22 22 ) -> Result<(), GraphError> {
23 23 for parent in graph.parents(rev)?.iter() {
24 24 if *parent != NULL_REVISION {
25 25 set.remove(parent);
26 26 }
27 27 }
28 28 Ok(())
29 29 }
30 30
31 31 /// Relative heads out of some revisions, passed as an iterator.
32 32 ///
33 33 /// These heads are defined as those revisions that have no children
34 34 /// among those emitted by the iterator.
35 35 ///
36 36 /// # Performance notes
37 37 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
38 38 ///
39 39 /// This function takes an `Iterator` instead of `impl IntoIterator` to
40 40 /// guarantee that cloning the iterator doesn't result in cloning the full
41 41 /// construct it comes from.
42 42 pub fn heads<'a>(
43 43 graph: &impl Graph,
44 44 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
45 45 ) -> Result<HashSet<Revision>, GraphError> {
46 46 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
47 47 heads.remove(&NULL_REVISION);
48 48 for rev in iter_revs {
49 remove_parents(graph, *rev, &mut heads)?;
49 if *rev != NULL_REVISION {
50 remove_parents(graph, *rev, &mut heads)?;
51 }
50 52 }
51 53 Ok(heads)
52 54 }
53 55
54 56 /// Retain in `revs` only its relative heads.
55 57 ///
56 58 /// This is an in-place operation, so that control of the incoming
57 59 /// set is left to the caller.
58 60 /// - a direct Python binding would probably need to build its own `HashSet`
59 61 /// from an incoming iterable, even if its sole purpose is to extract the
60 62 /// heads.
61 63 /// - a Rust caller can decide whether cloning beforehand is appropriate
62 64 ///
63 65 /// # Performance notes
64 66 /// Internally, this function will store a full copy of `revs` in a `Vec`.
65 67 pub fn retain_heads(
66 68 graph: &impl Graph,
67 69 revs: &mut HashSet<Revision>,
68 70 ) -> Result<(), GraphError> {
69 71 revs.remove(&NULL_REVISION);
70 72 // we need to construct an iterable copy of revs to avoid itering while
71 73 // mutating
72 74 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
73 75 for rev in as_vec {
74 remove_parents(graph, rev, revs)?;
76 if rev != NULL_REVISION {
77 remove_parents(graph, rev, revs)?;
78 }
75 79 }
76 80 Ok(())
77 81 }
78 82
79 83 #[cfg(test)]
80 84 mod tests {
81 85
82 86 use super::*;
83 87 use crate::testing::SampleGraph;
84 88
85 89 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
86 90 fn retain_heads_sorted(
87 91 graph: &impl Graph,
88 92 revs: &[Revision],
89 93 ) -> Result<Vec<Revision>, GraphError> {
90 94 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
91 95 retain_heads(graph, &mut revs)?;
92 96 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
93 97 as_vec.sort();
94 98 Ok(as_vec)
95 99 }
96 100
97 101 #[test]
98 102 fn test_retain_heads() -> Result<(), GraphError> {
99 103 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
100 104 assert_eq!(
101 105 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
102 106 vec![1, 6, 12]
103 107 );
104 108 assert_eq!(
105 109 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
106 110 vec![3, 5, 8, 9]
107 111 );
108 112 Ok(())
109 113 }
110 114
111 115 /// Apply `heads()` to the given slice and return as a sorted `Vec`
112 116 fn heads_sorted(
113 117 graph: &impl Graph,
114 118 revs: &[Revision],
115 119 ) -> Result<Vec<Revision>, GraphError> {
116 120 let heads = heads(graph, revs.iter())?;
117 121 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
118 122 as_vec.sort();
119 123 Ok(as_vec)
120 124 }
121 125
122 126 #[test]
123 127 fn test_heads() -> Result<(), GraphError> {
124 128 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
125 129 assert_eq!(
126 130 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
127 131 vec![1, 6, 12]
128 132 );
129 133 assert_eq!(
130 134 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
131 135 vec![3, 5, 8, 9]
132 136 );
133 137 Ok(())
134 138 }
135 139
136 140 }
@@ -1,36 +1,41 b''
1 1 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5 mod ancestors;
6 6 pub mod dagops;
7 7 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
8 8 pub mod testing; // unconditionally built, for use from integration tests
9 9
10 10 /// Mercurial revision numbers
11 11 ///
12 12 /// As noted in revlog.c, revision numbers are actually encoded in
13 13 /// 4 bytes, and are liberally converted to ints, whence the i32
14 14 pub type Revision = i32;
15 15
16
17 /// Marker expressing the absence of a parent
18 ///
19 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
20 /// to be smaller that all existing revisions.
16 21 pub const NULL_REVISION: Revision = -1;
17 22
18 23 /// Same as `mercurial.node.wdirrev`
19 24 ///
20 25 /// This is also equal to `i32::max_value()`, but it's better to spell
21 26 /// it out explicitely, same as in `mercurial.node`
22 27 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
23 28
24 29 /// The simplest expression of what we need of Mercurial DAGs.
25 30 pub trait Graph {
26 31 /// Return the two parents of the given `Revision`.
27 32 ///
28 33 /// Each of the parents can be independently `NULL_REVISION`
29 34 fn parents(&self, Revision) -> Result<[Revision; 2], GraphError>;
30 35 }
31 36
32 37 #[derive(Clone, Debug, PartialEq)]
33 38 pub enum GraphError {
34 39 ParentOutOfRange(Revision),
35 40 WorkingDirectoryUnsupported,
36 41 }
General Comments 0
You need to be logged in to leave comments. Login now