upstream/mercurial-mirror Commit - r52512:b08c5fbe

rust: blanket implementation of Graph for Graph references...

Georges Racinet -

r52512:b08c5fbe stable

parent child

rust/hg-core/src/ancestors.rs

0 +12 0

             // ancestors.rs
             //
             // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Rust versions of generic DAG ancestors algorithms for Mercurial
             use super::{Graph, GraphError, Revision, NULL_REVISION};
             use crate::dagops;
             use std::cmp::max;
             use std::collections::{BinaryHeap, HashSet};
             /// Iterator over the ancestors of a given list of revisions
             /// This is a generic type, defined and implemented for any Graph, so that
             /// it's easy to
             ///
             /// - unit test in pure Rust
             /// - bind to main Mercurial code, potentially in several ways and have these
             ///   bindings evolve over time
             pub struct AncestorsIterator<G: Graph> {
                 graph: G,
                 visit: BinaryHeap<Revision>,
                 seen: HashSet<Revision>,
                 stoprev: Revision,
             }
             pub struct MissingAncestors<G: Graph> {
                 graph: G,
                 bases: HashSet<Revision>,
                 max_base: Revision,
             }
             impl<G: Graph> AncestorsIterator<G> {
                 /// Constructor.
                 ///
                 /// if `inclusive` is true, then the init revisions are emitted in
                 /// particular, otherwise iteration starts from their parents.
                 pub fn new(
                     graph: G,
                     initrevs: impl IntoIterator<Item = Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Result<Self, GraphError> {
                     let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
                     if inclusive {
                         let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
                         let seen = visit.iter().cloned().collect();
                         return Ok(AncestorsIterator {
                             visit,
                             seen,
                             stoprev,
                             graph,
                         });
                     }
                     let mut this = AncestorsIterator {
                         visit: BinaryHeap::new(),
                         seen: HashSet::new(),
                         stoprev,
                         graph,
                     };
                     this.seen.insert(NULL_REVISION);
                     for rev in filtered_initrevs {
                         for parent in this.graph.parents(rev)?.iter().cloned() {
                             this.conditionally_push_rev(parent);
                         }
                     }
                     Ok(this)
                 }
                 #[inline]
                 fn conditionally_push_rev(&mut self, rev: Revision) {
                     if self.stoprev <= rev && self.seen.insert(rev) {
                         self.visit.push(rev);
                     }
                 }
                 /// Consumes partially the iterator to tell if the given target
                 /// revision
                 /// is in the ancestors it emits.
                 /// This is meant for iterators actually dedicated to that kind of
                 /// purpose
                 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
                     if self.seen.contains(&target) && target != NULL_REVISION {
                         return Ok(true);
                     }
                     for item in self {
                         let rev = item?;
                         if rev == target {
                             return Ok(true);
                         }
                         if rev < target {
                             return Ok(false);
                         }
                     }
                     Ok(false)
                 }
                 pub fn peek(&self) -> Option<Revision> {
                     self.visit.peek().cloned()
                 }
                 /// Tell if the iterator is about an empty set
                 ///
                 /// The result does not depend whether the iterator has been consumed
                 /// or not.
                 /// This is mostly meant for iterators backing a lazy ancestors set
                 pub fn is_empty(&self) -> bool {
                     if self.visit.len() > 0 {
                         return false;
                     }
                     if self.seen.len() > 1 {
                         return false;
                     }
                     // at this point, the seen set is at most a singleton.
                     // If not `self.inclusive`, it's still possible that it has only
                     // the null revision
                     self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
                 }
             }
             /// Main implementation for the iterator
             ///
             /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
             /// with a few non crucial differences:
             ///
             /// - there's no filtering of invalid parent revisions. Actually, it should be
             ///   consistent and more efficient to filter them from the end caller.
             /// - we don't have the optimization for adjacent revisions (i.e., the case
             ///   where `p1 == rev - 1`), because it amounts to update the first element of
             ///   the heap without sifting, which Rust's BinaryHeap doesn't let us do.
             /// - we save a few pushes by comparing with `stoprev` before pushing
             impl<G: Graph> Iterator for AncestorsIterator<G> {
                 type Item = Result<Revision, GraphError>;
                 fn next(&mut self) -> Option<Self::Item> {
                     let current = match self.visit.peek() {
                         None => {
                             return None;
                         }
                         Some(c) => *c,
                     };
                     let [p1, p2] = match self.graph.parents(current) {
                         Ok(ps) => ps,
                         Err(e) => return Some(Err(e)),
                     };
                     if p1 < self.stoprev || !self.seen.insert(p1) {
                         self.visit.pop();
                     } else {
                         *(self.visit.peek_mut().unwrap()) = p1;
                     };
                     self.conditionally_push_rev(p2);
                     Some(Ok(current))
                 }
             }
             impl<G: Graph> MissingAncestors<G> {
                 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
                     let mut created = MissingAncestors {
                         graph,
                         bases: HashSet::new(),
                         max_base: NULL_REVISION,
                     };
                     created.add_bases(bases);
                     created
                 }
                 pub fn has_bases(&self) -> bool {
                     !self.bases.is_empty()
                 }
                 /// Return a reference to current bases.
                 ///
                 /// This is useful in unit tests, but also setdiscovery.py does
                 /// read the bases attribute of a ancestor.missingancestors instance.
                 pub fn get_bases(&self) -> &HashSet<Revision> {
                     &self.bases
                 }
                 /// Computes the relative heads of current bases.
                 ///
                 /// The object is still usable after this.
                 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                     dagops::heads(&self.graph, self.bases.iter())
                 }
                 /// Consumes the object and returns the relative heads of its bases.
                 pub fn into_bases_heads(
                     mut self,
                 ) -> Result<HashSet<Revision>, GraphError> {
                     dagops::retain_heads(&self.graph, &mut self.bases)?;
                     Ok(self.bases)
                 }
                 /// Add some revisions to `self.bases`
                 ///
                 /// Takes care of keeping `self.max_base` up to date.
                 pub fn add_bases(
                     &mut self,
                     new_bases: impl IntoIterator<Item = Revision>,
                 ) {
                     let mut max_base = self.max_base;
                     self.bases.extend(
                         new_bases
                             .into_iter()
                             .filter(|&rev| rev != NULL_REVISION)
                             .map(|r| {
                                 if r > max_base {
                                     max_base = r;
                                 }
                                 r
                             }),
                     );
                     self.max_base = max_base;
                 }
                 /// Remove all ancestors of self.bases from the revs set (in place)
                 pub fn remove_ancestors_from(
                     &mut self,
                     revs: &mut HashSet<Revision>,
                 ) -> Result<(), GraphError> {
                     revs.retain(|r| !self.bases.contains(r));
                     // the null revision is always an ancestor. Logically speaking
                     // it's debatable in case bases is empty, but the Python
                     // implementation always adds NULL_REVISION to bases, making it
                     // unconditionnally true.
                     revs.remove(&NULL_REVISION);
                     if revs.is_empty() {
                         return Ok(());
                     }
                     // anything in revs > start is definitely not an ancestor of bases
                     // revs <= start need to be investigated
                     if self.max_base == NULL_REVISION {
                         return Ok(());
                     }
                     // whatever happens, we'll keep at least keepcount of them
                     // knowing this gives us a earlier stop condition than
                     // going all the way to the root
                     let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
                     let mut curr = self.max_base;
                     while curr != NULL_REVISION && revs.len() > keepcount {
                         if self.bases.contains(&curr) {
                             revs.remove(&curr);
                             self.add_parents(curr)?;
                         }
                         // We know this revision is safe because we've checked the bounds
                         // before.
                         curr = Revision(curr.0 - 1);
                     }
                     Ok(())
                 }
                 /// Add the parents of `rev` to `self.bases`
                 ///
                 /// This has no effect on `self.max_base`
                 #[inline]
                 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
                     if rev == NULL_REVISION {
                         return Ok(());
                     }
                     for p in self.graph.parents(rev)?.iter().cloned() {
                         // No need to bother the set with inserting NULL_REVISION over and
                         // over
                         if p != NULL_REVISION {
                             self.bases.insert(p);
                         }
                     }
                     Ok(())
                 }
                 /// Return all the ancestors of revs that are not ancestors of self.bases
                 ///
                 /// This may include elements from revs.
                 ///
                 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
                 /// revision number order, which is a topological order.
                 pub fn missing_ancestors(
                     &mut self,
                     revs: impl IntoIterator<Item = Revision>,
                 ) -> Result<Vec<Revision>, GraphError> {
                     // just for convenience and comparison with Python version
                     let bases_visit = &mut self.bases;
                     let mut revs: HashSet<Revision> = revs
                         .into_iter()
                         .filter(|r| !bases_visit.contains(r))
                         .collect();
                     let revs_visit = &mut revs;
                     let mut both_visit: HashSet<Revision> =
                         revs_visit.intersection(bases_visit).cloned().collect();
                     if revs_visit.is_empty() {
                         return Ok(Vec::new());
                     }
                     let max_revs = revs_visit.iter().cloned().max().unwrap();
                     let start = max(self.max_base, max_revs);
                     // TODO heuristics for with_capacity()?
                     let mut missing: Vec<Revision> = Vec::new();
                     for curr in (0..=start.0).rev() {
                         if revs_visit.is_empty() {
                             break;
                         }
                         if both_visit.remove(&Revision(curr)) {
                             // curr's parents might have made it into revs_visit through
                             // another path
                             for p in self.graph.parents(Revision(curr))?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 revs_visit.remove(&p);
                                 bases_visit.insert(p);
                                 both_visit.insert(p);
                             }
                         } else if revs_visit.remove(&Revision(curr)) {
                             missing.push(Revision(curr));
                             for p in self.graph.parents(Revision(curr))?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 if bases_visit.contains(&p) {
                                     // p is already known to be an ancestor of revs_visit
                                     revs_visit.remove(&p);
                                     both_visit.insert(p);
                                 } else if both_visit.contains(&p) {
                                     // p should have been in bases_visit
                                     revs_visit.remove(&p);
                                     bases_visit.insert(p);
                                 } else {
                                     // visit later
                                     revs_visit.insert(p);
                                 }
                             }
                         } else if bases_visit.contains(&Revision(curr)) {
                             for p in self.graph.parents(Revision(curr))?.iter().cloned() {
                                 if p == NULL_REVISION {
                                     continue;
                                 }
                                 if revs_visit.remove(&p) || both_visit.contains(&p) {
                                     // p is an ancestor of bases_visit, and is implicitly
                                     // in revs_visit, which means p is ::revs & ::bases.
                                     bases_visit.insert(p);
                                     both_visit.insert(p);
                                 } else {
                                     bases_visit.insert(p);
                                 }
                             }
                         }
                     }
                     missing.reverse();
                     Ok(missing)
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::{
                     testing::{SampleGraph, VecGraph},
                     BaseRevision,
                 };
                 impl From<BaseRevision> for Revision {
                     fn from(value: BaseRevision) -> Self {
                         if !cfg!(test) {
                             panic!("should only be used in tests")
                         }
                         Revision(value)
                     }
                 }
                 impl PartialEq<BaseRevision> for Revision {
                     fn eq(&self, other: &BaseRevision) -> bool {
                         if !cfg!(test) {
                             panic!("should only be used in tests")
                         }
                         self.0.eq(other)
                     }
                 }
                 impl PartialEq<u32> for Revision {
                     fn eq(&self, other: &u32) -> bool {
                         if !cfg!(test) {
                             panic!("should only be used in tests")
                         }
                         let check: Result<u32, _> = self.0.try_into();
                         match check {
                             Ok(value) => value.eq(other),
                             Err(_) => false,
                         }
                     }
                 }
                 fn list_ancestors<G: Graph>(
                     graph: G,
                     initrevs: Vec<Revision>,
                     stoprev: Revision,
                     inclusive: bool,
                 ) -> Vec<Revision> {
                     AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
                         .unwrap()
                         .map(|res| res.unwrap())
                         .collect()
                 }
                 #[test]
                 /// Same tests as test-ancestor.py, without membership
                 /// (see also test-ancestor.py.out)
                 fn test_list_ancestor() {
                     assert_eq!(
                         list_ancestors(SampleGraph, vec![], 0.into(), false),
                         Vec::<Revision>::new()
                     );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![11.into(), 13.into()],
 .into(),
                             false
                         ),
                         vec![8, 7, 4, 3, 2, 1, 0]
                     );
+                    // it works as well on references, because &Graph implements Graph
+                    // this is needed as of this writing by RHGitaly
+                    assert_eq!(
+                        list_ancestors(
+                            &SampleGraph,
+                            vec![11.into(), 13.into()],
+.into(),
+                            false
+                        ),
+                        vec![8, 7, 4, 3, 2, 1, 0]
+                    );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![1.into(), 3.into()],
 .into(),
                             false
                         ),
                         vec![1, 0]
                     );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![11.into(), 13.into()],
 .into(),
                             true
                         ),
                         vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
                     );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![11.into(), 13.into()],
 .into(),
                             false
                         ),
                         vec![8, 7]
                     );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![11.into(), 13.into()],
 .into(),
                             true
                         ),
                         vec![13, 11, 8, 7]
                     );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![11.into(), 13.into()],
 .into(),
                             true
                         ),
                         vec![13, 11]
                     );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![11.into(), 13.into()],
 .into(),
                             true
                         ),
                         vec![13]
                     );
                     assert_eq!(
                         list_ancestors(
                             SampleGraph,
                             vec![10.into(), 1.into()],
 .into(),
                             true
                         ),
                         vec![10, 5, 4, 2, 1, 0]
                     );
                 }
                 #[test]
                 /// Corner case that's not directly in test-ancestors.py, but
                 /// that happens quite often, as demonstrated by running the whole
                 /// suite.
                 /// For instance, run tests/test-obsolete-checkheads.t
                 fn test_nullrev_input() {
                     let mut iter = AncestorsIterator::new(
                         SampleGraph,
                         vec![Revision(-1)],
 .into(),
                         false,
                     )
                     .unwrap();
                     assert_eq!(iter.next(), None)
                 }
                 #[test]
                 fn test_contains() {
                     let mut lazy = AncestorsIterator::new(
                         SampleGraph,
                         vec![10.into(), 1.into()],
 .into(),
                         true,
                     )
                     .unwrap();
                     assert!(lazy.contains(1.into()).unwrap());
                     assert!(!lazy.contains(3.into()).unwrap());
                     let mut lazy = AncestorsIterator::new(
                         SampleGraph,
                         vec![0.into()],
 .into(),
                         false,
                     )
                     .unwrap();
                     assert!(!lazy.contains(NULL_REVISION).unwrap());
                 }
                 #[test]
                 fn test_peek() {
                     let mut iter = AncestorsIterator::new(
                         SampleGraph,
                         vec![10.into()],
 .into(),
                         true,
                     )
                     .unwrap();
                     // peek() gives us the next value
                     assert_eq!(iter.peek(), Some(10.into()));
                     // but it's not been consumed
                     assert_eq!(iter.next(), Some(Ok(10.into())));
                     // and iteration resumes normally
                     assert_eq!(iter.next(), Some(Ok(5.into())));
                     // let's drain the iterator to test peek() at the end
                     while iter.next().is_some() {}
                     assert_eq!(iter.peek(), None);
                 }
                 #[test]
                 fn test_empty() {
                     let mut iter = AncestorsIterator::new(
                         SampleGraph,
                         vec![10.into()],
 .into(),
                         true,
                     )
                     .unwrap();
                     assert!(!iter.is_empty());
                     while iter.next().is_some() {}
                     assert!(!iter.is_empty());
                     let iter = AncestorsIterator::new(SampleGraph, vec![], 0.into(), true)
                         .unwrap();
                     assert!(iter.is_empty());
                     // case where iter.seen == {NULL_REVISION}
                     let iter = AncestorsIterator::new(
                         SampleGraph,
                         vec![0.into()],
 .into(),
                         false,
                     )
                     .unwrap();
                     assert!(iter.is_empty());
                 }
                 /// A corrupted Graph, supporting error handling tests
                 #[derive(Clone, Debug)]
                 struct Corrupted;
                 impl Graph for Corrupted {
                     // FIXME what to do about this? Are we just not supposed to get them
                     // anymore?
                     fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                         match rev {
                             Revision(1) => Ok([0.into(), (-1).into()]),
                             r => Err(GraphError::ParentOutOfRange(r)),
                         }
                     }
                 }
                 #[test]
                 fn test_initrev_out_of_range() {
                     // inclusive=false looks up initrev's parents right away
                     match AncestorsIterator::new(
                         SampleGraph,
                         vec![25.into()],
 .into(),
                         false,
                     ) {
                         Ok(_) => panic!("Should have been ParentOutOfRange"),
                         Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25.into())),
                     }
                 }
                 #[test]
                 fn test_next_out_of_range() {
                     // inclusive=false looks up initrev's parents right away
                     let mut iter =
                         AncestorsIterator::new(Corrupted, vec![1.into()], 0.into(), false)
                             .unwrap();
                     assert_eq!(
                         iter.next(),
                         Some(Err(GraphError::ParentOutOfRange(0.into())))
                     );
                 }
                 #[test]
                 /// Test constructor, add/get bases and heads
                 fn test_missing_bases() -> Result<(), GraphError> {
                     let mut missing_ancestors = MissingAncestors::new(
                         SampleGraph,
                         [5.into(), 3.into(), 1.into(), 3.into()].iter().cloned(),
                     );
                     let mut as_vec: Vec<Revision> =
                         missing_ancestors.get_bases().iter().cloned().collect();
                     as_vec.sort_unstable();
                     assert_eq!(as_vec, [1, 3, 5]);
                     assert_eq!(missing_ancestors.max_base, 5);
                     missing_ancestors
                         .add_bases([3.into(), 7.into(), 8.into()].iter().cloned());
                     as_vec = missing_ancestors.get_bases().iter().cloned().collect();
                     as_vec.sort_unstable();
                     assert_eq!(as_vec, [1, 3, 5, 7, 8]);
                     assert_eq!(missing_ancestors.max_base, 8);
                     as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
                     as_vec.sort_unstable();
                     assert_eq!(as_vec, [3, 5, 7, 8]);
                     Ok(())
                 }
                 fn assert_missing_remove(
                     bases: &[BaseRevision],
                     revs: &[BaseRevision],
                     expected: &[BaseRevision],
                 ) {
                     let mut missing_ancestors = MissingAncestors::new(
                         SampleGraph,
                         bases.iter().map(|r| Revision(*r)),
                     );
                     let mut revset: HashSet<Revision> =
                         revs.iter().map(|r| Revision(*r)).collect();
                     missing_ancestors
                         .remove_ancestors_from(&mut revset)
                         .unwrap();
                     let mut as_vec: Vec<Revision> = revset.into_iter().collect();
                     as_vec.sort_unstable();
                     assert_eq!(as_vec.as_slice(), expected);
                 }
                 #[test]
                 fn test_missing_remove() {
                     assert_missing_remove(
                         &[1, 2, 3, 4, 7],
                         Vec::from_iter(1..10).as_slice(),
                         &[5, 6, 8, 9],
                     );
                     assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
                     assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
                 }
                 fn assert_missing_ancestors(
                     bases: &[BaseRevision],
                     revs: &[BaseRevision],
                     expected: &[BaseRevision],
                 ) {
                     let mut missing_ancestors = MissingAncestors::new(
                         SampleGraph,
                         bases.iter().map(|r| Revision(*r)),
                     );
                     let missing = missing_ancestors
                         .missing_ancestors(revs.iter().map(|r| Revision(*r)))
                         .unwrap();
                     assert_eq!(missing.as_slice(), expected);
                 }
                 #[test]
                 fn test_missing_ancestors() {
                     // examples taken from test-ancestors.py by having it run
                     // on the same graph (both naive and fast Python algs)
                     assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
                     assert_missing_ancestors(&[11], &[10], &[5, 10]);
                     assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
                 }
                 /// An interesting case found by a random generator similar to
                 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
                 /// failed this, yet none of the integration tests of the whole suite
                 /// catched it.
                 #[allow(clippy::unnecessary_cast)]
                 #[test]
                 fn test_remove_ancestors_from_case1() {
                     const FAKE_NULL_REVISION: BaseRevision = -1;
                     assert_eq!(FAKE_NULL_REVISION, NULL_REVISION.0);
                     let graph: VecGraph = vec![
                         [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                         [0, FAKE_NULL_REVISION],
                         [1, 0],
                         [2, 1],
                         [3, FAKE_NULL_REVISION],
                         [4, FAKE_NULL_REVISION],
                         [5, 1],
                         [2, FAKE_NULL_REVISION],
                         [7, FAKE_NULL_REVISION],
                         [8, FAKE_NULL_REVISION],
                         [9, FAKE_NULL_REVISION],
                         [10, 1],
                         [3, FAKE_NULL_REVISION],
                         [12, FAKE_NULL_REVISION],
                         [13, FAKE_NULL_REVISION],
                         [14, FAKE_NULL_REVISION],
                         [4, FAKE_NULL_REVISION],
                         [16, FAKE_NULL_REVISION],
                         [17, FAKE_NULL_REVISION],
                         [18, FAKE_NULL_REVISION],
                         [19, 11],
                         [20, FAKE_NULL_REVISION],
                         [21, FAKE_NULL_REVISION],
                         [22, FAKE_NULL_REVISION],
                         [23, FAKE_NULL_REVISION],
                         [2, FAKE_NULL_REVISION],
                         [3, FAKE_NULL_REVISION],
                         [26, 24],
                         [27, FAKE_NULL_REVISION],
                         [28, FAKE_NULL_REVISION],
                         [12, FAKE_NULL_REVISION],
                         [1, FAKE_NULL_REVISION],
                         [1, 9],
                         [32, FAKE_NULL_REVISION],
                         [33, FAKE_NULL_REVISION],
                         [34, 31],
                         [35, FAKE_NULL_REVISION],
                         [36, 26],
                         [37, FAKE_NULL_REVISION],
                         [38, FAKE_NULL_REVISION],
                         [39, FAKE_NULL_REVISION],
                         [40, FAKE_NULL_REVISION],
                         [41, FAKE_NULL_REVISION],
                         [42, 26],
                         [0, FAKE_NULL_REVISION],
                         [44, FAKE_NULL_REVISION],
                         [45, 4],
                         [40, FAKE_NULL_REVISION],
                         [47, FAKE_NULL_REVISION],
                         [36, 0],
                         [49, FAKE_NULL_REVISION],
                         [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                         [51, FAKE_NULL_REVISION],
                         [52, FAKE_NULL_REVISION],
                         [53, FAKE_NULL_REVISION],
                         [14, FAKE_NULL_REVISION],
                         [55, FAKE_NULL_REVISION],
                         [15, FAKE_NULL_REVISION],
                         [23, FAKE_NULL_REVISION],
                         [58, FAKE_NULL_REVISION],
                         [59, FAKE_NULL_REVISION],
                         [2, FAKE_NULL_REVISION],
                         [61, 59],
                         [62, FAKE_NULL_REVISION],
                         [63, FAKE_NULL_REVISION],
                         [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                         [65, FAKE_NULL_REVISION],
                         [66, FAKE_NULL_REVISION],
                         [67, FAKE_NULL_REVISION],
                         [68, FAKE_NULL_REVISION],
                         [37, 28],
                         [69, 25],
                         [71, FAKE_NULL_REVISION],
                         [72, FAKE_NULL_REVISION],
                         [50, 2],
                         [74, FAKE_NULL_REVISION],
                         [12, FAKE_NULL_REVISION],
                         [18, FAKE_NULL_REVISION],
                         [77, FAKE_NULL_REVISION],
                         [78, FAKE_NULL_REVISION],
                         [79, FAKE_NULL_REVISION],
                         [43, 33],
                         [81, FAKE_NULL_REVISION],
                         [82, FAKE_NULL_REVISION],
                         [83, FAKE_NULL_REVISION],
                         [84, 45],
                         [85, FAKE_NULL_REVISION],
                         [86, FAKE_NULL_REVISION],
                         [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                         [88, FAKE_NULL_REVISION],
                         [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                         [76, 83],
                         [44, FAKE_NULL_REVISION],
                         [92, FAKE_NULL_REVISION],
                         [93, FAKE_NULL_REVISION],
                         [9, FAKE_NULL_REVISION],
                         [95, 67],
                         [96, FAKE_NULL_REVISION],
                         [97, FAKE_NULL_REVISION],
                         [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                     ]
                     .into_iter()
                     .map(|[a, b]| [Revision(a), Revision(b)])
                     .collect();
                     let problem_rev = 28.into();
                     let problem_base = 70.into();
                     // making the problem obvious: problem_rev is a parent of problem_base
                     assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
                     let mut missing_ancestors: MissingAncestors<VecGraph> =
                         MissingAncestors::new(
                             graph,
                             [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
                                 .iter()
                                 .map(|r| Revision(*r)),
                         );
                     assert!(missing_ancestors.bases.contains(&problem_base));
                     let mut revs: HashSet<Revision> =
                         [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
                             .iter()
                             .map(|r| Revision(*r))
                             .collect();
                     missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
                     assert!(!revs.contains(&problem_rev));
                 }
             }

rust/hg-core/src/revlog/mod.rs

0 +6 0

             // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
             //           and Mercurial contributors
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             //! Mercurial concepts for handling revision history
             pub mod node;
             pub mod nodemap;
             mod nodemap_docket;
             pub mod path_encode;
             pub use node::{FromHexError, Node, NodePrefix};
             pub mod changelog;
             pub mod filelog;
             pub mod index;
             pub mod manifest;
             pub mod patch;
             use std::borrow::Cow;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::Path;
             use flate2::read::ZlibDecoder;
             use sha1::{Digest, Sha1};
             use std::cell::RefCell;
             use zstd;
             use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
             use self::nodemap_docket::NodeMapDocket;
             use super::index::Index;
             use super::nodemap::{NodeMap, NodeMapError};
             use crate::errors::HgError;
             use crate::vfs::Vfs;
             /// As noted in revlog.c, revision numbers are actually encoded in
             /// 4 bytes, and are liberally converted to ints, whence the i32
             pub type BaseRevision = i32;
             /// Mercurial revision numbers
             /// In contrast to the more general [`UncheckedRevision`], these are "checked"
             /// in the sense that they should only be used for revisions that are
             /// valid for a given index (i.e. in bounds).
             #[derive(
                 Debug,
                 derive_more::Display,
                 Clone,
                 Copy,
                 Hash,
                 PartialEq,
                 Eq,
                 PartialOrd,
                 Ord,
             )]
             pub struct Revision(pub BaseRevision);
             impl format_bytes::DisplayBytes for Revision {
                 fn display_bytes(
                     &self,
                     output: &mut dyn std::io::Write,
                 ) -> std::io::Result<()> {
                     self.0.display_bytes(output)
                 }
             }
             /// Unchecked Mercurial revision numbers.
             ///
             /// Values of this type have no guarantee of being a valid revision number
             /// in any context. Use method `check_revision` to get a valid revision within
             /// the appropriate index object.
             #[derive(
                 Debug,
                 derive_more::Display,
                 Clone,
                 Copy,
                 Hash,
                 PartialEq,
                 Eq,
                 PartialOrd,
                 Ord,
             )]
             pub struct UncheckedRevision(pub BaseRevision);
             impl format_bytes::DisplayBytes for UncheckedRevision {
                 fn display_bytes(
                     &self,
                     output: &mut dyn std::io::Write,
                 ) -> std::io::Result<()> {
                     self.0.display_bytes(output)
                 }
             }
             impl From<Revision> for UncheckedRevision {
                 fn from(value: Revision) -> Self {
                     Self(value.0)
                 }
             }
             impl From<BaseRevision> for UncheckedRevision {
                 fn from(value: BaseRevision) -> Self {
                     Self(value)
                 }
             }
             /// Marker expressing the absence of a parent
             ///
             /// Independently of the actual representation, `NULL_REVISION` is guaranteed
             /// to be smaller than all existing revisions.
             pub const NULL_REVISION: Revision = Revision(-1);
             /// Same as `mercurial.node.wdirrev`
             ///
             /// This is also equal to `i32::max_value()`, but it's better to spell
             /// it out explicitely, same as in `mercurial.node`
             #[allow(clippy::unreadable_literal)]
             pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
                 UncheckedRevision(0x7fffffff);
             pub const WORKING_DIRECTORY_HEX: &str =
                 "ffffffffffffffffffffffffffffffffffffffff";
             /// The simplest expression of what we need of Mercurial DAGs.
             pub trait Graph {
                 /// Return the two parents of the given `Revision`.
                 ///
                 /// Each of the parents can be independently `NULL_REVISION`
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
             }
             #[derive(Clone, Debug, PartialEq)]
             pub enum GraphError {
                 ParentOutOfRange(Revision),
             }
+            impl<T: Graph> Graph for &T {
+                fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
+                    (*self).parents(rev)
+                }
+            }
             /// The Mercurial Revlog Index
             ///
             /// This is currently limited to the minimal interface that is needed for
             /// the [`nodemap`](nodemap/index.html) module
             pub trait RevlogIndex {
                 /// Total number of Revisions referenced in this index
                 fn len(&self) -> usize;
                 fn is_empty(&self) -> bool {
                     self.len() == 0
                 }
                 /// Return a reference to the Node or `None` for `NULL_REVISION`
                 fn node(&self, rev: Revision) -> Option<&Node>;
                 /// Return a [`Revision`] if `rev` is a valid revision number for this
                 /// index.
                 ///
                 /// [`NULL_REVISION`] is considered to be valid.
                 #[inline(always)]
                 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
                     let rev = rev.0;
                     if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
                     {
                         Some(Revision(rev))
                     } else {
                         None
                     }
                 }
             }
             const REVISION_FLAG_CENSORED: u16 = 1 << 15;
             const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
             const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
             const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
             // Keep this in sync with REVIDX_KNOWN_FLAGS in
             // mercurial/revlogutils/flagutil.py
             const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
                 | REVISION_FLAG_ELLIPSIS
                 | REVISION_FLAG_EXTSTORED
                 | REVISION_FLAG_HASCOPIESINFO;
             const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
             #[derive(Debug, derive_more::From, derive_more::Display)]
             pub enum RevlogError {
                 InvalidRevision,
                 /// Working directory is not supported
                 WDirUnsupported,
                 /// Found more than one entry whose ID match the requested prefix
                 AmbiguousPrefix,
                 #[from]
                 Other(HgError),
             }
             impl From<NodeMapError> for RevlogError {
                 fn from(error: NodeMapError) -> Self {
                     match error {
                         NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                         NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
                             format!("nodemap point to revision {} not in index", rev),
                         ),
                     }
                 }
             }
             fn corrupted<S: AsRef<str>>(context: S) -> HgError {
                 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
             }
             impl RevlogError {
                 fn corrupted<S: AsRef<str>>(context: S) -> Self {
                     RevlogError::Other(corrupted(context))
                 }
             }
             /// Read only implementation of revlog.
             pub struct Revlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved: bytes of the revlog index and
                 /// data.
                 index: Index,
                 /// When index and data are not interleaved: bytes of the revlog data
                 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                 /// When present on disk: the persistent nodemap for this revlog
                 nodemap: Option<nodemap::NodeTree>,
             }
             impl Graph for Revlog {
                 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                     self.index.parents(rev)
                 }
             }
             #[derive(Debug, Copy, Clone)]
             pub enum RevlogVersionOptions {
                 V0,
                 V1 { generaldelta: bool },
                 V2,
                 ChangelogV2 { compute_rank: bool },
             }
             /// Options to govern how a revlog should be opened, usually from the
             /// repository configuration or requirements.
             #[derive(Debug, Copy, Clone)]
             pub struct RevlogOpenOptions {
                 /// The revlog version, along with any option specific to this version
                 pub version: RevlogVersionOptions,
                 /// Whether the revlog uses a persistent nodemap.
                 pub use_nodemap: bool,
                 // TODO other non-header/version options,
             }
             impl RevlogOpenOptions {
                 pub fn new() -> Self {
                     Self {
                         version: RevlogVersionOptions::V1 { generaldelta: true },
                         use_nodemap: false,
                     }
                 }
                 fn default_index_header(&self) -> index::IndexHeader {
                     index::IndexHeader {
                         header_bytes: match self.version {
                             RevlogVersionOptions::V0 => [0, 0, 0, 0],
                             RevlogVersionOptions::V1 { generaldelta } => {
                                 [0, if generaldelta { 3 } else { 1 }, 0, 1]
                             }
                             RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
                             RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
 xD34Du32.to_be_bytes()
                             }
                         },
                     }
                 }
             }
             impl Default for RevlogOpenOptions {
                 fn default() -> Self {
                     Self::new()
                 }
             }
             impl Revlog {
                 /// Open a revlog index file.
                 ///
                 /// It will also open the associated data file if index and data are not
                 /// interleaved.
                 pub fn open(
                     store_vfs: &Vfs,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                     options: RevlogOpenOptions,
                 ) -> Result<Self, HgError> {
                     Self::open_gen(store_vfs, index_path, data_path, options, None)
                 }
                 fn open_gen(
                     store_vfs: &Vfs,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                     options: RevlogOpenOptions,
                     nodemap_for_test: Option<nodemap::NodeTree>,
                 ) -> Result<Self, HgError> {
                     let index_path = index_path.as_ref();
                     let index = {
                         match store_vfs.mmap_open_opt(index_path)? {
                             None => Index::new(
                                 Box::<Vec<_>>::default(),
                                 options.default_index_header(),
                             ),
                             Some(index_mmap) => {
                                 let index = Index::new(
                                     Box::new(index_mmap),
                                     options.default_index_header(),
                                 )?;
                                 Ok(index)
                             }
                         }
                     }?;
                     let default_data_path = index_path.with_extension("d");
                     // type annotation required
                     // won't recognize Mmap as Deref<Target = [u8]>
                     let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                         if index.is_inline() {
                             None
                         } else {
                             let data_path = data_path.unwrap_or(&default_data_path);
                             let data_mmap = store_vfs.mmap_open(data_path)?;
                             Some(Box::new(data_mmap))
                         };
                     let nodemap = if index.is_inline() || !options.use_nodemap {
                         None
                     } else {
                         NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
                             |(docket, data)| {
                                 nodemap::NodeTree::load_bytes(
                                     Box::new(data),
                                     docket.data_length,
                                 )
                             },
                         )
                     };
                     let nodemap = nodemap_for_test.or(nodemap);
                     Ok(Revlog {
                         index,
                         data_bytes,
                         nodemap,
                     })
                 }
                 /// Return number of entries of the `Revlog`.
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Returns `true` if the `Revlog` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.index.is_empty()
                 }
                 /// Returns the node ID for the given revision number, if it exists in this
                 /// revlog
                 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                     if rev == NULL_REVISION.into() {
                         return Some(&NULL_NODE);
                     }
                     let rev = self.index.check_revision(rev)?;
                     Some(self.index.get_entry(rev)?.hash())
                 }
                 /// Return the revision number for the given node ID, if it exists in this
                 /// revlog
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     if let Some(nodemap) = &self.nodemap {
                         nodemap
                             .find_bin(&self.index, node)?
                             .ok_or(RevlogError::InvalidRevision)
                     } else {
                         self.rev_from_node_no_persistent_nodemap(node)
                     }
                 }
                 /// Same as `rev_from_node`, without using a persistent nodemap
                 ///
                 /// This is used as fallback when a persistent nodemap is not present.
                 /// This happens when the persistent-nodemap experimental feature is not
                 /// enabled, or for small revlogs.
                 fn rev_from_node_no_persistent_nodemap(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     // Linear scan of the revlog
                     // TODO: consider building a non-persistent nodemap in memory to
                     // optimize these cases.
                     let mut found_by_prefix = None;
                     for rev in (-1..self.len() as BaseRevision).rev() {
                         let rev = Revision(rev as BaseRevision);
                         let candidate_node = if rev == Revision(-1) {
                             NULL_NODE
                         } else {
                             let index_entry =
                                 self.index.get_entry(rev).ok_or_else(|| {
                                     HgError::corrupted(
                                         "revlog references a revision not in the index",
                                     )
                                 })?;
                             *index_entry.hash()
                         };
                         if node == candidate_node {
                             return Ok(rev);
                         }
                         if node.is_prefix_of(&candidate_node) {
                             if found_by_prefix.is_some() {
                                 return Err(RevlogError::AmbiguousPrefix);
                             }
                             found_by_prefix = Some(rev)
                         }
                     }
                     found_by_prefix.ok_or(RevlogError::InvalidRevision)
                 }
                 /// Returns whether the given revision exists in this revlog.
                 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
                     self.index.check_revision(rev).is_some()
                 }
                 /// Return the full data associated to a revision.
                 ///
                 /// All entries required to build the final data out of deltas will be
                 /// retrieved as needed, and the deltas will be applied to the inital
                 /// snapshot to rebuild the final data.
                 pub fn get_rev_data(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<Cow<[u8]>, RevlogError> {
                     if rev == NULL_REVISION.into() {
                         return Ok(Cow::Borrowed(&[]));
                     };
                     self.get_entry(rev)?.data()
                 }
                 /// [`Self::get_rev_data`] for checked revisions.
                 pub fn get_rev_data_for_checked_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<Cow<[u8]>, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(Cow::Borrowed(&[]));
                     };
                     self.get_entry_for_checked_rev(rev)?.data()
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                 }
                 /// Build the full data of a revision out its snapshot
                 /// and its deltas.
                 fn build_data_from_deltas(
                     snapshot: RevlogEntry,
                     deltas: &[RevlogEntry],
                 ) -> Result<Vec<u8>, HgError> {
                     let snapshot = snapshot.data_chunk()?;
                     let deltas = deltas
                         .iter()
                         .rev()
                         .map(RevlogEntry::data_chunk)
                         .collect::<Result<Vec<_>, _>>()?;
                     let patches: Vec<_> =
                         deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                     let patch = patch::fold_patch_lists(&patches);
                     Ok(patch.apply(&snapshot))
                 }
                 /// Return the revlog data.
                 fn data(&self) -> &[u8] {
                     match &self.data_bytes {
                         Some(data_bytes) => data_bytes,
                         None => panic!(
                             "forgot to load the data or trying to access inline data"
                         ),
                     }
                 }
                 pub fn make_null_entry(&self) -> RevlogEntry {
                     RevlogEntry {
                         revlog: self,
                         rev: NULL_REVISION,
                         bytes: b"",
                         compressed_len: 0,
                         uncompressed_len: 0,
                         base_rev_or_base_of_delta_chain: None,
                         p1: NULL_REVISION,
                         p2: NULL_REVISION,
                         flags: NULL_REVLOG_ENTRY_FLAGS,
                         hash: NULL_NODE,
                     }
                 }
                 fn get_entry_for_checked_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     if rev == NULL_REVISION {
                         return Ok(self.make_null_entry());
                     }
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let start = index_entry.offset();
                     let end = start + index_entry.compressed_len() as usize;
                     let data = if self.index.is_inline() {
                         self.index.data(start, end)
                     } else {
                         &self.data()[start..end]
                     };
                     let base_rev = self
                         .index
                         .check_revision(index_entry.base_revision_or_base_of_delta_chain())
                         .ok_or_else(|| {
                             RevlogError::corrupted(format!(
                                 "base revision for rev {} is invalid",
                                 rev
                             ))
                         })?;
                     let p1 =
                         self.index.check_revision(index_entry.p1()).ok_or_else(|| {
                             RevlogError::corrupted(format!(
                                 "p1 for rev {} is invalid",
                                 rev
                             ))
                         })?;
                     let p2 =
                         self.index.check_revision(index_entry.p2()).ok_or_else(|| {
                             RevlogError::corrupted(format!(
                                 "p2 for rev {} is invalid",
                                 rev
                             ))
                         })?;
                     let entry = RevlogEntry {
                         revlog: self,
                         rev,
                         bytes: data,
                         compressed_len: index_entry.compressed_len(),
                         uncompressed_len: index_entry.uncompressed_len(),
                         base_rev_or_base_of_delta_chain: if base_rev == rev {
                             None
                         } else {
                             Some(base_rev)
                         },
                         p1,
                         p2,
                         flags: index_entry.flags(),
                         hash: *index_entry.hash(),
                     };
                     Ok(entry)
                 }
                 /// Get an entry of the revlog.
                 pub fn get_entry(
                     &self,
                     rev: UncheckedRevision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     if rev == NULL_REVISION.into() {
                         return Ok(self.make_null_entry());
                     }
                     let rev = self.index.check_revision(rev).ok_or_else(|| {
                         RevlogError::corrupted(format!("rev {} is invalid", rev))
                     })?;
                     self.get_entry_for_checked_rev(rev)
                 }
             }
             /// The revlog entry's bytes and the necessary informations to extract
             /// the entry's data.
             #[derive(Clone)]
             pub struct RevlogEntry<'revlog> {
                 revlog: &'revlog Revlog,
                 rev: Revision,
                 bytes: &'revlog [u8],
                 compressed_len: u32,
                 uncompressed_len: i32,
                 base_rev_or_base_of_delta_chain: Option<Revision>,
                 p1: Revision,
                 p2: Revision,
                 flags: u16,
                 hash: Node,
             }
             thread_local! {
               // seems fine to [unwrap] here: this can only fail due to memory allocation
               // failing, and it's normal for that to cause panic.
               static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
                   RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
             }
             fn zstd_decompress_to_buffer(
                 bytes: &[u8],
                 buf: &mut Vec<u8>,
             ) -> Result<usize, std::io::Error> {
                 ZSTD_DECODER
                     .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
             }
             impl<'revlog> RevlogEntry<'revlog> {
                 pub fn revision(&self) -> Revision {
                     self.rev
                 }
                 pub fn node(&self) -> &Node {
                     &self.hash
                 }
                 pub fn uncompressed_len(&self) -> Option<u32> {
                     u32::try_from(self.uncompressed_len).ok()
                 }
                 pub fn has_p1(&self) -> bool {
                     self.p1 != NULL_REVISION
                 }
                 pub fn p1_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p1 == NULL_REVISION {
                         Ok(None)
                     } else {
                         Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
                     }
                 }
                 pub fn p2_entry(
                     &self,
                 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                     if self.p2 == NULL_REVISION {
                         Ok(None)
                     } else {
                         Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
                     }
                 }
                 pub fn p1(&self) -> Option<Revision> {
                     if self.p1 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p1)
                     }
                 }
                 pub fn p2(&self) -> Option<Revision> {
                     if self.p2 == NULL_REVISION {
                         None
                     } else {
                         Some(self.p2)
                     }
                 }
                 pub fn is_censored(&self) -> bool {
                     (self.flags & REVISION_FLAG_CENSORED) != 0
                 }
                 pub fn has_length_affecting_flag_processor(&self) -> bool {
                     // Relevant Python code: revlog.size()
                     // note: ELLIPSIS is known to not change the content
                     (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
                 }
                 /// The data for this entry, after resolving deltas if any.
                 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                     let mut entry = self.clone();
                     let mut delta_chain = vec![];
                     // The meaning of `base_rev_or_base_of_delta_chain` depends on
                     // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
                     // `mercurial/revlogutils/constants.py` and the code in
                     // [_chaininfo] and in [index_deltachain].
                     let uses_generaldelta = self.revlog.index.uses_generaldelta();
                     while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
                         entry = if uses_generaldelta {
                             delta_chain.push(entry);
                             self.revlog.get_entry_for_checked_rev(base_rev)?
                         } else {
                             let base_rev = UncheckedRevision(entry.rev.0 - 1);
                             delta_chain.push(entry);
                             self.revlog.get_entry(base_rev)?
                         };
                     }
                     let data = if delta_chain.is_empty() {
                         entry.data_chunk()?
                     } else {
                         Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
                     };
                     Ok(data)
                 }
                 fn check_data(
                     &self,
                     data: Cow<'revlog, [u8]>,
                 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                     if self.revlog.check_hash(
                         self.p1,
                         self.p2,
                         self.hash.as_bytes(),
                         &data,
                     ) {
                         Ok(data)
                     } else {
                         if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
                             return Err(HgError::unsupported(
                                 "ellipsis revisions are not supported by rhg",
                             )
                             .into());
                         }
                         Err(corrupted(format!(
                             "hash check failed for revision {}",
                             self.rev
                         ))
                         .into())
                     }
                 }
                 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                     let data = self.rawdata()?;
                     if self.rev == NULL_REVISION {
                         return Ok(data);
                     }
                     if self.is_censored() {
                         return Err(HgError::CensoredNodeError.into());
                     }
                     self.check_data(data)
                 }
                 /// Extract the data contained in the entry.
                 /// This may be a delta. (See `is_delta`.)
                 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                     if self.bytes.is_empty() {
                         return Ok(Cow::Borrowed(&[]));
                     }
                     match self.bytes[0] {
                         // Revision data is the entirety of the entry, including this
                         // header.
                         b'\0' => Ok(Cow::Borrowed(self.bytes)),
                         // Raw revision data follows.
                         b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                         // zlib (RFC 1950) data.
                         b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                         // zstd data.
                         b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                         // A proper new format should have had a repo/store requirement.
                         format_type => Err(corrupted(format!(
                             "unknown compression header '{}'",
                             format_type
                         ))),
                     }
                 }
                 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
                     let mut decoder = ZlibDecoder::new(self.bytes);
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len as usize);
                         decoder
                             .read_to_end(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     } else {
                         let cap = self.uncompressed_len.max(0) as usize;
                         let mut buf = vec![0; cap];
                         decoder
                             .read_exact(&mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         Ok(buf)
                     }
                 }
                 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
                     let cap = self.uncompressed_len.max(0) as usize;
                     if self.is_delta() {
                         // [cap] is usually an over-estimate of the space needed because
                         // it's the length of delta-decoded data, but we're interested
                         // in the size of the delta.
                         // This means we have to [shrink_to_fit] to avoid holding on
                         // to a large chunk of memory, but it also means we must have a
                         // fallback branch, for the case when the delta is longer than
                         // the original data (surprisingly, this does happen in practice)
                         let mut buf = Vec::with_capacity(cap);
                         match zstd_decompress_to_buffer(self.bytes, &mut buf) {
                             Ok(_) => buf.shrink_to_fit(),
                             Err(_) => {
                                 buf.clear();
                                 zstd::stream::copy_decode(self.bytes, &mut buf)
                                     .map_err(|e| corrupted(e.to_string()))?;
                             }
                         };
                         Ok(buf)
                     } else {
                         let mut buf = Vec::with_capacity(cap);
                         let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
                             .map_err(|e| corrupted(e.to_string()))?;
                         if len != self.uncompressed_len as usize {
                             Err(corrupted("uncompressed length does not match"))
                         } else {
                             Ok(buf)
                         }
                     }
                 }
                 /// Tell if the entry is a snapshot or a delta
                 /// (influences on decompression).
                 fn is_delta(&self) -> bool {
                     self.base_rev_or_base_of_delta_chain.is_some()
                 }
             }
             /// Calculate the hash of a revision given its data and its parents.
             fn hash(
                 data: &[u8],
                 p1_hash: &[u8],
                 p2_hash: &[u8],
             ) -> [u8; NODE_BYTES_LENGTH] {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.update(b);
                     hasher.update(a);
                 } else {
                     hasher.update(a);
                     hasher.update(b);
                 }
                 hasher.update(data);
                 *hasher.finalize().as_ref()
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
                 use itertools::Itertools;
                 #[test]
                 fn test_empty() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                     let revlog =
                         Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
                             .unwrap();
                     assert!(revlog.is_empty());
                     assert_eq!(revlog.len(), 0);
                     assert!(revlog.get_entry(0.into()).is_err());
                     assert!(!revlog.has_rev(0.into()));
                     assert_eq!(
                         revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                         NULL_REVISION
                     );
                     let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
                     assert_eq!(null_entry.revision(), NULL_REVISION);
                     assert!(null_entry.data().unwrap().is_empty());
                 }
                 #[test]
                 fn test_inline() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let entry2_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_p1(Revision(0))
                         .with_p2(Revision(1))
                         .with_node(node2)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                     let revlog =
                         Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
                             .unwrap();
                     let entry0 = revlog.get_entry(0.into()).ok().unwrap();
                     assert_eq!(entry0.revision(), Revision(0));
                     assert_eq!(*entry0.node(), node0);
                     assert!(!entry0.has_p1());
                     assert_eq!(entry0.p1(), None);
                     assert_eq!(entry0.p2(), None);
                     let p1_entry = entry0.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry0.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
                     let entry1 = revlog.get_entry(1.into()).ok().unwrap();
                     assert_eq!(entry1.revision(), Revision(1));
                     assert_eq!(*entry1.node(), node1);
                     assert!(!entry1.has_p1());
                     assert_eq!(entry1.p1(), None);
                     assert_eq!(entry1.p2(), None);
                     let p1_entry = entry1.p1_entry().unwrap();
                     assert!(p1_entry.is_none());
                     let p2_entry = entry1.p2_entry().unwrap();
                     assert!(p2_entry.is_none());
                     let entry2 = revlog.get_entry(2.into()).ok().unwrap();
                     assert_eq!(entry2.revision(), Revision(2));
                     assert_eq!(*entry2.node(), node2);
                     assert!(entry2.has_p1());
                     assert_eq!(entry2.p1(), Some(Revision(0)));
                     assert_eq!(entry2.p2(), Some(Revision(1)));
                     let p1_entry = entry2.p1_entry().unwrap();
                     assert!(p1_entry.is_some());
                     assert_eq!(p1_entry.unwrap().revision(), Revision(0));
                     let p2_entry = entry2.p2_entry().unwrap();
                     assert!(p2_entry.is_some());
                     assert_eq!(p2_entry.unwrap().revision(), Revision(1));
                 }
                 #[test]
                 fn test_nodemap() {
                     let temp = tempfile::tempdir().unwrap();
                     let vfs = Vfs { base: temp.path() };
                     // building a revlog with a forced Node starting with zeros
                     // This is a corruption, but it does not preclude using the nodemap
                     // if we don't try and access the data
                     let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
                         .unwrap();
                     let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                         .unwrap();
                     let entry0_bytes = IndexEntryBuilder::new()
                         .is_first(true)
                         .with_version(1)
                         .with_inline(true)
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node0)
                         .build();
                     let entry1_bytes = IndexEntryBuilder::new()
                         .with_offset(INDEX_ENTRY_SIZE)
                         .with_node(node1)
                         .build();
                     let contents = vec![entry0_bytes, entry1_bytes]
                         .into_iter()
                         .flatten()
                         .collect_vec();
                     std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                     let mut idx = nodemap::tests::TestNtIndex::new();
                     idx.insert_node(Revision(0), node0).unwrap();
                     idx.insert_node(Revision(1), node1).unwrap();
                     let revlog = Revlog::open_gen(
                         &vfs,
                         "foo.i",
                         None,
                         RevlogOpenOptions::new(),
                         Some(idx.nt),
                     )
                     .unwrap();
                     // accessing the data shows the corruption
                     revlog.get_entry(0.into()).unwrap().data().unwrap_err();
                     assert_eq!(
                         revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                         Revision(-1)
                     );
                     assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
                     assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("000").unwrap())
                             .unwrap(),
                         Revision(-1)
                     );
                     assert_eq!(
                         revlog
                             .rev_from_node(NodePrefix::from_hex("b00").unwrap())
                             .unwrap(),
                         Revision(1)
                     );
                     // RevlogError does not implement PartialEq
                     // (ultimately because io::Error does not)
                     match revlog
                         .rev_from_node(NodePrefix::from_hex("00").unwrap())
                         .expect_err("Expected to give AmbiguousPrefix error")
                     {
                         RevlogError::AmbiguousPrefix => (),
                         e => {
                             panic!("Got another error than AmbiguousPrefix: {:?}", e);
                         }
                     };
                 }
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages