upstream/mercurial-mirror Commit - r53188:f90796d3

rust: fix clippy lints...

Raphaël Gomès -

r53188:f90796d3 default

parent child

rust/hg-core/src/ancestors.rs

0 +1 -2

              // ancestors.rs
              //
              // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Rust versions of generic DAG ancestors algorithms for Mercurial
              use super::{Graph, GraphError, Revision, NULL_REVISION};
              use crate::dagops;
              use std::cmp::max;
              use std::collections::{BinaryHeap, HashSet};
              /// Iterator over the ancestors of a given list of revisions
              /// This is a generic type, defined and implemented for any Graph, so that
              /// it's easy to
              ///
              /// - unit test in pure Rust
              /// - bind to main Mercurial code, potentially in several ways and have these
              ///   bindings evolve over time
              pub struct AncestorsIterator<G: Graph> {
                  graph: G,
                  visit: BinaryHeap<Revision>,
                  seen: HashSet<Revision>,
                  stoprev: Revision,
              }
              pub struct MissingAncestors<G: Graph> {
                  graph: G,
                  bases: HashSet<Revision>,
                  max_base: Revision,
              }
              impl<G: Graph> AncestorsIterator<G> {
                  /// Constructor.
                  ///
                  /// if `inclusive` is true, then the init revisions are emitted in
                  /// particular, otherwise iteration starts from their parents.
                  pub fn new(
                      graph: G,
                      initrevs: impl IntoIterator<Item = Revision>,
                      stoprev: Revision,
                      inclusive: bool,
                  ) -> Result<Self, GraphError> {
                      let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
                      if inclusive {
                          let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
                          let seen = visit.iter().cloned().collect();
                          return Ok(AncestorsIterator {
                              visit,
                              seen,
                              stoprev,
                              graph,
                          });
                      }
                      let mut this = AncestorsIterator {
                          visit: BinaryHeap::new(),
                          seen: HashSet::new(),
                          stoprev,
                          graph,
                      };
                      this.seen.insert(NULL_REVISION);
                      for rev in filtered_initrevs {
                          for parent in this.graph.parents(rev)?.iter().cloned() {
                              this.conditionally_push_rev(parent);
                          }
                      }
                      Ok(this)
                  }
                  #[inline]
                  fn conditionally_push_rev(&mut self, rev: Revision) {
                      if self.stoprev <= rev && self.seen.insert(rev) {
                          self.visit.push(rev);
                      }
                  }
                  /// Consumes partially the iterator to tell if the given target
                  /// revision
                  /// is in the ancestors it emits.
                  /// This is meant for iterators actually dedicated to that kind of
                  /// purpose
                  pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
                      if self.seen.contains(&target) && target != NULL_REVISION {
                          return Ok(true);
                      }
                      for item in self {
                          let rev = item?;
                          if rev == target {
                              return Ok(true);
                          }
                          if rev < target {
                              return Ok(false);
                          }
                      }
                      Ok(false)
                  }
                  pub fn peek(&self) -> Option<Revision> {
                      self.visit.peek().cloned()
                  }
                  /// Tell if the iterator is about an empty set
                  ///
                  /// The result does not depend whether the iterator has been consumed
                  /// or not.
                  /// This is mostly meant for iterators backing a lazy ancestors set
                  pub fn is_empty(&self) -> bool {
                      if self.visit.len() > 0 {
                          return false;
                      }
                      if self.seen.len() > 1 {
                          return false;
                      }
                      // at this point, the seen set is at most a singleton.
                      // If not `self.inclusive`, it's still possible that it has only
                      // the null revision
                      self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
                  }
              }
              /// Main implementation for the iterator
              ///
              /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
              /// with a few non crucial differences:
              ///
              /// - there's no filtering of invalid parent revisions. Actually, it should be
              ///   consistent and more efficient to filter them from the end caller.
              /// - we don't have the optimization for adjacent revisions (i.e., the case
              ///   where `p1 == rev - 1`), because it amounts to update the first element of
              ///   the heap without sifting, which Rust's BinaryHeap doesn't let us do.
              /// - we save a few pushes by comparing with `stoprev` before pushing
              impl<G: Graph> Iterator for AncestorsIterator<G> {
                  type Item = Result<Revision, GraphError>;
                  fn next(&mut self) -> Option<Self::Item> {
                      let current = match self.visit.peek() {
                          None => {
                              return None;
                          }
                          Some(c) => *c,
                      };
                      let [p1, p2] = match self.graph.parents(current) {
                          Ok(ps) => ps,
                          Err(e) => return Some(Err(e)),
                      };
                      if p1 < self.stoprev || !self.seen.insert(p1) {
                          self.visit.pop();
                      } else {
                          *(self.visit.peek_mut().unwrap()) = p1;
                      };
                      self.conditionally_push_rev(p2);
                      Some(Ok(current))
                  }
              }
              impl<G: Graph> MissingAncestors<G> {
                  pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
                      let mut created = MissingAncestors {
                          graph,
                          bases: HashSet::new(),
                          max_base: NULL_REVISION,
                      };
                      created.add_bases(bases);
                      created
                  }
                  pub fn has_bases(&self) -> bool {
                      !self.bases.is_empty()
                  }
                  /// Return a reference to current bases.
                  ///
                  /// This is useful in unit tests, but also setdiscovery.py does
                  /// read the bases attribute of a ancestor.missingancestors instance.
                  pub fn get_bases(&self) -> &HashSet<Revision> {
                      &self.bases
                  }
                  /// Computes the relative heads of current bases.
                  ///
                  /// The object is still usable after this.
                  pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                      dagops::heads(&self.graph, self.bases.iter())
                  }
                  /// Consumes the object and returns the relative heads of its bases.
                  pub fn into_bases_heads(
                      mut self,
                  ) -> Result<HashSet<Revision>, GraphError> {
                      dagops::retain_heads(&self.graph, &mut self.bases)?;
                      Ok(self.bases)
                  }
                  /// Add some revisions to `self.bases`
                  ///
                  /// Takes care of keeping `self.max_base` up to date.
                  pub fn add_bases(
                      &mut self,
                      new_bases: impl IntoIterator<Item = Revision>,
                  ) {
                      let mut max_base = self.max_base;
                      self.bases.extend(
                          new_bases
                              .into_iter()
                              .filter(|&rev| rev != NULL_REVISION)
-                             .map(|r| {
+                             .inspect(|&r| {
                                  if r > max_base {
                                      max_base = r;
                                  }
+                                 r
                              }),
                      );
                      self.max_base = max_base;
                  }
                  /// Remove all ancestors of self.bases from the revs set (in place)
                  pub fn remove_ancestors_from(
                      &mut self,
                      revs: &mut HashSet<Revision>,
                  ) -> Result<(), GraphError> {
                      revs.retain(|r| !self.bases.contains(r));
                      // the null revision is always an ancestor. Logically speaking
                      // it's debatable in case bases is empty, but the Python
                      // implementation always adds NULL_REVISION to bases, making it
                      // unconditionnally true.
                      revs.remove(&NULL_REVISION);
                      if revs.is_empty() {
                          return Ok(());
                      }
                      // anything in revs > start is definitely not an ancestor of bases
                      // revs <= start need to be investigated
                      if self.max_base == NULL_REVISION {
                          return Ok(());
                      }
                      // whatever happens, we'll keep at least keepcount of them
                      // knowing this gives us a earlier stop condition than
                      // going all the way to the root
                      let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
                      let mut curr = self.max_base;
                      while curr != NULL_REVISION && revs.len() > keepcount {
                          if self.bases.contains(&curr) {
                              revs.remove(&curr);
                              self.add_parents(curr)?;
                          }
                          // We know this revision is safe because we've checked the bounds
                          // before.
                          curr = Revision(curr.0 - 1);
                      }
                      Ok(())
                  }
                  /// Add the parents of `rev` to `self.bases`
                  ///
                  /// This has no effect on `self.max_base`
                  #[inline]
                  fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
                      if rev == NULL_REVISION {
                          return Ok(());
                      }
                      for p in self.graph.parents(rev)?.iter().cloned() {
                          // No need to bother the set with inserting NULL_REVISION over and
                          // over
                          if p != NULL_REVISION {
                              self.bases.insert(p);
                          }
                      }
                      Ok(())
                  }
                  /// Return all the ancestors of revs that are not ancestors of self.bases
                  ///
                  /// This may include elements from revs.
                  ///
                  /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
                  /// revision number order, which is a topological order.
                  pub fn missing_ancestors(
                      &mut self,
                      revs: impl IntoIterator<Item = Revision>,
                  ) -> Result<Vec<Revision>, GraphError> {
                      // just for convenience and comparison with Python version
                      let bases_visit = &mut self.bases;
                      let mut revs: HashSet<Revision> = revs
                          .into_iter()
                          .filter(|r| !bases_visit.contains(r))
                          .collect();
                      let revs_visit = &mut revs;
                      let mut both_visit: HashSet<Revision> =
                          revs_visit.intersection(bases_visit).cloned().collect();
                      if revs_visit.is_empty() {
                          return Ok(Vec::new());
                      }
                      let max_revs = revs_visit.iter().cloned().max().unwrap();
                      let start = max(self.max_base, max_revs);
                      // TODO heuristics for with_capacity()?
                      let mut missing: Vec<Revision> = Vec::new();
                      for curr in (0..=start.0).rev() {
                          if revs_visit.is_empty() {
                              break;
                          }
                          if both_visit.remove(&Revision(curr)) {
                              // curr's parents might have made it into revs_visit through
                              // another path
                              for p in self.graph.parents(Revision(curr))?.iter().cloned() {
                                  if p == NULL_REVISION {
                                      continue;
                                  }
                                  revs_visit.remove(&p);
                                  bases_visit.insert(p);
                                  both_visit.insert(p);
                              }
                          } else if revs_visit.remove(&Revision(curr)) {
                              missing.push(Revision(curr));
                              for p in self.graph.parents(Revision(curr))?.iter().cloned() {
                                  if p == NULL_REVISION {
                                      continue;
                                  }
                                  if bases_visit.contains(&p) {
                                      // p is already known to be an ancestor of revs_visit
                                      revs_visit.remove(&p);
                                      both_visit.insert(p);
                                  } else if both_visit.contains(&p) {
                                      // p should have been in bases_visit
                                      revs_visit.remove(&p);
                                      bases_visit.insert(p);
                                  } else {
                                      // visit later
                                      revs_visit.insert(p);
                                  }
                              }
                          } else if bases_visit.contains(&Revision(curr)) {
                              for p in self.graph.parents(Revision(curr))?.iter().cloned() {
                                  if p == NULL_REVISION {
                                      continue;
                                  }
                                  if revs_visit.remove(&p) || both_visit.contains(&p) {
                                      // p is an ancestor of bases_visit, and is implicitly
                                      // in revs_visit, which means p is ::revs & ::bases.
                                      bases_visit.insert(p);
                                      both_visit.insert(p);
                                  } else {
                                      bases_visit.insert(p);
                                  }
                              }
                          }
                      }
                      missing.reverse();
                      Ok(missing)
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::{
                      testing::{SampleGraph, VecGraph},
                      BaseRevision,
                  };
                  impl From<BaseRevision> for Revision {
                      fn from(value: BaseRevision) -> Self {
                          if !cfg!(test) {
                              panic!("should only be used in tests")
                          }
                          Revision(value)
                      }
                  }
                  impl PartialEq<BaseRevision> for Revision {
                      fn eq(&self, other: &BaseRevision) -> bool {
                          if !cfg!(test) {
                              panic!("should only be used in tests")
                          }
                          self.0.eq(other)
                      }
                  }
                  impl PartialEq<u32> for Revision {
                      fn eq(&self, other: &u32) -> bool {
                          if !cfg!(test) {
                              panic!("should only be used in tests")
                          }
                          let check: Result<u32, _> = self.0.try_into();
                          match check {
                              Ok(value) => value.eq(other),
                              Err(_) => false,
                          }
                      }
                  }
                  fn list_ancestors<G: Graph>(
                      graph: G,
                      initrevs: Vec<Revision>,
                      stoprev: Revision,
                      inclusive: bool,
                  ) -> Vec<Revision> {
                      AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
                          .unwrap()
                          .map(|res| res.unwrap())
                          .collect()
                  }
                  #[test]
                  /// Same tests as test-ancestor.py, without membership
                  /// (see also test-ancestor.py.out)
                  fn test_list_ancestor() {
                      assert_eq!(
                          list_ancestors(SampleGraph, vec![], 0.into(), false),
                          Vec::<Revision>::new()
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![11.into(), 13.into()],
 .into(),
                              false
                          ),
                          vec![8, 7, 4, 3, 2, 1, 0]
                      );
                      // it works as well on references, because &Graph implements Graph
                      // this is needed as of this writing by RHGitaly
                      assert_eq!(
                          list_ancestors(
                              &SampleGraph,
                              vec![11.into(), 13.into()],
 .into(),
                              false
                          ),
                          vec![8, 7, 4, 3, 2, 1, 0]
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![1.into(), 3.into()],
 .into(),
                              false
                          ),
                          vec![1, 0]
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![11.into(), 13.into()],
 .into(),
                              true
                          ),
                          vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![11.into(), 13.into()],
 .into(),
                              false
                          ),
                          vec![8, 7]
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![11.into(), 13.into()],
 .into(),
                              true
                          ),
                          vec![13, 11, 8, 7]
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![11.into(), 13.into()],
 .into(),
                              true
                          ),
                          vec![13, 11]
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![11.into(), 13.into()],
 .into(),
                              true
                          ),
                          vec![13]
                      );
                      assert_eq!(
                          list_ancestors(
                              SampleGraph,
                              vec![10.into(), 1.into()],
 .into(),
                              true
                          ),
                          vec![10, 5, 4, 2, 1, 0]
                      );
                  }
                  #[test]
                  /// Corner case that's not directly in test-ancestors.py, but
                  /// that happens quite often, as demonstrated by running the whole
                  /// suite.
                  /// For instance, run tests/test-obsolete-checkheads.t
                  fn test_nullrev_input() {
                      let mut iter = AncestorsIterator::new(
                          SampleGraph,
                          vec![Revision(-1)],
 .into(),
                          false,
                      )
                      .unwrap();
                      assert_eq!(iter.next(), None)
                  }
                  #[test]
                  fn test_contains() {
                      let mut lazy = AncestorsIterator::new(
                          SampleGraph,
                          vec![10.into(), 1.into()],
 .into(),
                          true,
                      )
                      .unwrap();
                      assert!(lazy.contains(1.into()).unwrap());
                      assert!(!lazy.contains(3.into()).unwrap());
                      let mut lazy = AncestorsIterator::new(
                          SampleGraph,
                          vec![0.into()],
 .into(),
                          false,
                      )
                      .unwrap();
                      assert!(!lazy.contains(NULL_REVISION).unwrap());
                  }
                  #[test]
                  fn test_peek() {
                      let mut iter = AncestorsIterator::new(
                          SampleGraph,
                          vec![10.into()],
 .into(),
                          true,
                      )
                      .unwrap();
                      // peek() gives us the next value
                      assert_eq!(iter.peek(), Some(10.into()));
                      // but it's not been consumed
                      assert_eq!(iter.next(), Some(Ok(10.into())));
                      // and iteration resumes normally
                      assert_eq!(iter.next(), Some(Ok(5.into())));
                      // let's drain the iterator to test peek() at the end
                      while iter.next().is_some() {}
                      assert_eq!(iter.peek(), None);
                  }
                  #[test]
                  fn test_empty() {
                      let mut iter = AncestorsIterator::new(
                          SampleGraph,
                          vec![10.into()],
 .into(),
                          true,
                      )
                      .unwrap();
                      assert!(!iter.is_empty());
                      while iter.next().is_some() {}
                      assert!(!iter.is_empty());
                      let iter = AncestorsIterator::new(SampleGraph, vec![], 0.into(), true)
                          .unwrap();
                      assert!(iter.is_empty());
                      // case where iter.seen == {NULL_REVISION}
                      let iter = AncestorsIterator::new(
                          SampleGraph,
                          vec![0.into()],
 .into(),
                          false,
                      )
                      .unwrap();
                      assert!(iter.is_empty());
                  }
                  /// A corrupted Graph, supporting error handling tests
                  #[derive(Clone, Debug)]
                  struct Corrupted;
                  impl Graph for Corrupted {
                      // FIXME what to do about this? Are we just not supposed to get them
                      // anymore?
                      fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                          match rev {
                              Revision(1) => Ok([0.into(), (-1).into()]),
                              r => Err(GraphError::ParentOutOfRange(r)),
                          }
                      }
                  }
                  #[test]
                  fn test_initrev_out_of_range() {
                      // inclusive=false looks up initrev's parents right away
                      match AncestorsIterator::new(
                          SampleGraph,
                          vec![25.into()],
 .into(),
                          false,
                      ) {
                          Ok(_) => panic!("Should have been ParentOutOfRange"),
                          Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25.into())),
                      }
                  }
                  #[test]
                  fn test_next_out_of_range() {
                      // inclusive=false looks up initrev's parents right away
                      let mut iter =
                          AncestorsIterator::new(Corrupted, vec![1.into()], 0.into(), false)
                              .unwrap();
                      assert_eq!(
                          iter.next(),
                          Some(Err(GraphError::ParentOutOfRange(0.into())))
                      );
                  }
                  #[test]
                  /// Test constructor, add/get bases and heads
                  fn test_missing_bases() -> Result<(), GraphError> {
                      let mut missing_ancestors = MissingAncestors::new(
                          SampleGraph,
                          [5.into(), 3.into(), 1.into(), 3.into()].iter().cloned(),
                      );
                      let mut as_vec: Vec<Revision> =
                          missing_ancestors.get_bases().iter().cloned().collect();
                      as_vec.sort_unstable();
                      assert_eq!(as_vec, [1, 3, 5]);
                      assert_eq!(missing_ancestors.max_base, 5);
                      missing_ancestors
                          .add_bases([3.into(), 7.into(), 8.into()].iter().cloned());
                      as_vec = missing_ancestors.get_bases().iter().cloned().collect();
                      as_vec.sort_unstable();
                      assert_eq!(as_vec, [1, 3, 5, 7, 8]);
                      assert_eq!(missing_ancestors.max_base, 8);
                      as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
                      as_vec.sort_unstable();
                      assert_eq!(as_vec, [3, 5, 7, 8]);
                      Ok(())
                  }
                  fn assert_missing_remove(
                      bases: &[BaseRevision],
                      revs: &[BaseRevision],
                      expected: &[BaseRevision],
                  ) {
                      let mut missing_ancestors = MissingAncestors::new(
                          SampleGraph,
                          bases.iter().map(|r| Revision(*r)),
                      );
                      let mut revset: HashSet<Revision> =
                          revs.iter().map(|r| Revision(*r)).collect();
                      missing_ancestors
                          .remove_ancestors_from(&mut revset)
                          .unwrap();
                      let mut as_vec: Vec<Revision> = revset.into_iter().collect();
                      as_vec.sort_unstable();
                      assert_eq!(as_vec.as_slice(), expected);
                  }
                  #[test]
                  fn test_missing_remove() {
                      assert_missing_remove(
                          &[1, 2, 3, 4, 7],
                          Vec::from_iter(1..10).as_slice(),
                          &[5, 6, 8, 9],
                      );
                      assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
                      assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
                  }
                  fn assert_missing_ancestors(
                      bases: &[BaseRevision],
                      revs: &[BaseRevision],
                      expected: &[BaseRevision],
                  ) {
                      let mut missing_ancestors = MissingAncestors::new(
                          SampleGraph,
                          bases.iter().map(|r| Revision(*r)),
                      );
                      let missing = missing_ancestors
                          .missing_ancestors(revs.iter().map(|r| Revision(*r)))
                          .unwrap();
                      assert_eq!(missing.as_slice(), expected);
                  }
                  #[test]
                  fn test_missing_ancestors() {
                      // examples taken from test-ancestors.py by having it run
                      // on the same graph (both naive and fast Python algs)
                      assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
                      assert_missing_ancestors(&[11], &[10], &[5, 10]);
                      assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
                  }
                  /// An interesting case found by a random generator similar to
                  /// the one in test-ancestor.py. An early version of Rust MissingAncestors
                  /// failed this, yet none of the integration tests of the whole suite
                  /// catched it.
                  #[allow(clippy::unnecessary_cast)]
                  #[test]
                  fn test_remove_ancestors_from_case1() {
                      const FAKE_NULL_REVISION: BaseRevision = -1;
                      assert_eq!(FAKE_NULL_REVISION, NULL_REVISION.0);
                      let graph: VecGraph = vec![
                          [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                          [0, FAKE_NULL_REVISION],
                          [1, 0],
                          [2, 1],
                          [3, FAKE_NULL_REVISION],
                          [4, FAKE_NULL_REVISION],
                          [5, 1],
                          [2, FAKE_NULL_REVISION],
                          [7, FAKE_NULL_REVISION],
                          [8, FAKE_NULL_REVISION],
                          [9, FAKE_NULL_REVISION],
                          [10, 1],
                          [3, FAKE_NULL_REVISION],
                          [12, FAKE_NULL_REVISION],
                          [13, FAKE_NULL_REVISION],
                          [14, FAKE_NULL_REVISION],
                          [4, FAKE_NULL_REVISION],
                          [16, FAKE_NULL_REVISION],
                          [17, FAKE_NULL_REVISION],
                          [18, FAKE_NULL_REVISION],
                          [19, 11],
                          [20, FAKE_NULL_REVISION],
                          [21, FAKE_NULL_REVISION],
                          [22, FAKE_NULL_REVISION],
                          [23, FAKE_NULL_REVISION],
                          [2, FAKE_NULL_REVISION],
                          [3, FAKE_NULL_REVISION],
                          [26, 24],
                          [27, FAKE_NULL_REVISION],
                          [28, FAKE_NULL_REVISION],
                          [12, FAKE_NULL_REVISION],
                          [1, FAKE_NULL_REVISION],
                          [1, 9],
                          [32, FAKE_NULL_REVISION],
                          [33, FAKE_NULL_REVISION],
                          [34, 31],
                          [35, FAKE_NULL_REVISION],
                          [36, 26],
                          [37, FAKE_NULL_REVISION],
                          [38, FAKE_NULL_REVISION],
                          [39, FAKE_NULL_REVISION],
                          [40, FAKE_NULL_REVISION],
                          [41, FAKE_NULL_REVISION],
                          [42, 26],
                          [0, FAKE_NULL_REVISION],
                          [44, FAKE_NULL_REVISION],
                          [45, 4],
                          [40, FAKE_NULL_REVISION],
                          [47, FAKE_NULL_REVISION],
                          [36, 0],
                          [49, FAKE_NULL_REVISION],
                          [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                          [51, FAKE_NULL_REVISION],
                          [52, FAKE_NULL_REVISION],
                          [53, FAKE_NULL_REVISION],
                          [14, FAKE_NULL_REVISION],
                          [55, FAKE_NULL_REVISION],
                          [15, FAKE_NULL_REVISION],
                          [23, FAKE_NULL_REVISION],
                          [58, FAKE_NULL_REVISION],
                          [59, FAKE_NULL_REVISION],
                          [2, FAKE_NULL_REVISION],
                          [61, 59],
                          [62, FAKE_NULL_REVISION],
                          [63, FAKE_NULL_REVISION],
                          [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                          [65, FAKE_NULL_REVISION],
                          [66, FAKE_NULL_REVISION],
                          [67, FAKE_NULL_REVISION],
                          [68, FAKE_NULL_REVISION],
                          [37, 28],
                          [69, 25],
                          [71, FAKE_NULL_REVISION],
                          [72, FAKE_NULL_REVISION],
                          [50, 2],
                          [74, FAKE_NULL_REVISION],
                          [12, FAKE_NULL_REVISION],
                          [18, FAKE_NULL_REVISION],
                          [77, FAKE_NULL_REVISION],
                          [78, FAKE_NULL_REVISION],
                          [79, FAKE_NULL_REVISION],
                          [43, 33],
                          [81, FAKE_NULL_REVISION],
                          [82, FAKE_NULL_REVISION],
                          [83, FAKE_NULL_REVISION],
                          [84, 45],
                          [85, FAKE_NULL_REVISION],
                          [86, FAKE_NULL_REVISION],
                          [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                          [88, FAKE_NULL_REVISION],
                          [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                          [76, 83],
                          [44, FAKE_NULL_REVISION],
                          [92, FAKE_NULL_REVISION],
                          [93, FAKE_NULL_REVISION],
                          [9, FAKE_NULL_REVISION],
                          [95, 67],
                          [96, FAKE_NULL_REVISION],
                          [97, FAKE_NULL_REVISION],
                          [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
                      ]
                      .into_iter()
                      .map(|[a, b]| [Revision(a), Revision(b)])
                      .collect();
                      let problem_rev = 28.into();
                      let problem_base = 70.into();
                      // making the problem obvious: problem_rev is a parent of problem_base
                      assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
                      let mut missing_ancestors: MissingAncestors<VecGraph> =
                          MissingAncestors::new(
                              graph,
                              [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
                                  .iter()
                                  .map(|r| Revision(*r)),
                          );
                      assert!(missing_ancestors.bases.contains(&problem_base));
                      let mut revs: HashSet<Revision> =
                          [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
                              .iter()
                              .map(|r| Revision(*r))
                              .collect();
                      missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
                      assert!(!revs.contains(&problem_rev));
                  }
              }

rust/hg-core/src/revlog/filelog.rs

0 +1 -1

              use crate::errors::HgError;
              use crate::exit_codes;
              use crate::repo::Repo;
              use crate::revlog::path_encode::path_encode;
              use crate::revlog::NodePrefix;
              use crate::revlog::Revision;
              use crate::revlog::RevlogEntry;
              use crate::revlog::{Revlog, RevlogError};
              use crate::utils::files::get_path_from_bytes;
              use crate::utils::hg_path::HgPath;
              use crate::utils::SliceExt;
              use crate::Graph;
              use crate::GraphError;
              use crate::UncheckedRevision;
              use std::path::PathBuf;
              use super::options::RevlogOpenOptions;
              /// A specialized `Revlog` to work with file data logs.
              pub struct Filelog {
                  /// The generic `revlog` format.
                  revlog: Revlog,
              }
              impl Graph for Filelog {
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                      self.revlog.parents(rev)
                  }
              }
              impl Filelog {
                  pub fn open_vfs(
                      store_vfs: &crate::vfs::VfsImpl,
                      file_path: &HgPath,
                      options: RevlogOpenOptions,
                  ) -> Result<Self, HgError> {
                      let index_path = store_path(file_path, b".i");
                      let data_path = store_path(file_path, b".d");
                      let revlog =
                          Revlog::open(store_vfs, index_path, Some(&data_path), options)?;
                      Ok(Self { revlog })
                  }
                  pub fn open(
                      repo: &Repo,
                      file_path: &HgPath,
                      options: RevlogOpenOptions,
                  ) -> Result<Self, HgError> {
                      Self::open_vfs(&repo.store_vfs(), file_path, options)
                  }
                  /// The given node ID is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn data_for_node(
                      &self,
                      file_node: impl Into<NodePrefix>,
                  ) -> Result<FilelogRevisionData, RevlogError> {
                      let file_rev = self.revlog.rev_from_node(file_node.into())?;
                      self.data_for_unchecked_rev(file_rev.into())
                  }
                  /// The given revision is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn data_for_unchecked_rev(
                      &self,
                      file_rev: UncheckedRevision,
                  ) -> Result<FilelogRevisionData, RevlogError> {
                      let data: Vec<u8> = self
                          .revlog
                          .get_data_for_unchecked_rev(file_rev)?
                          .into_owned();
                      Ok(FilelogRevisionData(data))
                  }
                  /// The given node ID is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn entry_for_node(
                      &self,
                      file_node: impl Into<NodePrefix>,
                  ) -> Result<FilelogEntry, RevlogError> {
                      let file_rev = self.revlog.rev_from_node(file_node.into())?;
                      self.entry(file_rev)
                  }
                  /// The given revision is that of the file as found in a filelog, not of a
                  /// changeset.
                  pub fn entry_for_unchecked_rev(
                      &self,
                      file_rev: UncheckedRevision,
                  ) -> Result<FilelogEntry, RevlogError> {
                      Ok(FilelogEntry(
                          self.revlog.get_entry_for_unchecked_rev(file_rev)?,
                      ))
                  }
                  /// Same as [`Self::entry_for_unchecked_rev`] for a checked revision.
                  pub fn entry(
                      &self,
                      file_rev: Revision,
                  ) -> Result<FilelogEntry, RevlogError> {
                      Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
                  }
              }
              fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
                  let encoded_bytes =
                      path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
                  get_path_from_bytes(&encoded_bytes).into()
              }
              pub struct FilelogEntry<'a>(RevlogEntry<'a>);
              impl FilelogEntry<'_> {
                  /// `self.data()` can be expensive, with decompression and delta
                  /// resolution.
                  ///
                  /// *Without* paying this cost, based on revlog index information
                  /// including `RevlogEntry::uncompressed_len`:
                  ///
                  /// * Returns `true` if the length that `self.data().file_data().len()`
                  ///   would return is definitely **not equal** to `other_len`.
                  /// * Returns `false` if available information is inconclusive.
                  pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
                      // Relevant code that implement this behavior in Python code:
                      // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
                      // revlog.size, revlog.rawsize
                      // Let’s call `file_data_len` what would be returned by
                      // `self.data().file_data().len()`.
                      if self.0.is_censored() {
                          let file_data_len = 0;
                          return other_len != file_data_len;
                      }
                      if self.0.has_length_affecting_flag_processor() {
                          // We can’t conclude anything about `file_data_len`.
                          return false;
                      }
                      // Revlog revisions (usually) have metadata for the size of
                      // their data after decompression and delta resolution
                      // as would be returned by `Revlog::get_rev_data`.
                      //
                      // For filelogs this is the file’s contents preceded by an optional
                      // metadata block.
                      let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
                          l as u64
                      } else {
                          // The field was set to -1, the actual uncompressed len is unknown.
                          // We need to decompress to say more.
                          return false;
                      };
                      // `uncompressed_len = file_data_len + optional_metadata_len`,
                      // so `file_data_len <= uncompressed_len`.
                      if uncompressed_len < other_len {
                          // Transitively, `file_data_len < other_len`.
                          // So `other_len != file_data_len` definitely.
                          return true;
                      }
                      if uncompressed_len == other_len + 4 {
                          // It’s possible that `file_data_len == other_len` with an empty
                          // metadata block (2 start marker bytes + 2 end marker bytes).
                          // This happens when there wouldn’t otherwise be metadata, but
                          // the first 2 bytes of file data happen to match a start marker
                          // and would be ambiguous.
                          return false;
                      }
                      if !self.0.has_p1() {
                          // There may or may not be copy metadata, so we can’t deduce more
                          // about `file_data_len` without computing file data.
                          return false;
                      }
                      // Filelog ancestry is not meaningful in the way changelog ancestry is.
                      // It only provides hints to delta generation.
                      // p1 and p2 are set to null when making a copy or rename since
                      // contents are likely unrelatedto what might have previously existed
                      // at the destination path.
                      //
                      // Conversely, since here p1 is non-null, there is no copy metadata.
                      // Note that this reasoning may be invalidated in the presence of
                      // merges made by some previous versions of Mercurial that
                      // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
                      // and `tests/test-issue6528.t`.
                      //
                      // Since copy metadata is currently the only kind of metadata
                      // kept in revlog data of filelogs,
                      // this `FilelogEntry` does not have such metadata:
                      let file_data_len = uncompressed_len;
                      file_data_len != other_len
                  }
                  pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
                      let data = self.0.data();
                      match data {
                          Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
                          // Errors other than `HgError` should not happen at this point
                          Err(e) => match e {
                              RevlogError::Other(hg_error) => Err(hg_error),
                              revlog_error => Err(HgError::abort(
                                  revlog_error.to_string(),
                                  exit_codes::ABORT,
                                  None,
                              )),
                          },
                      }
                  }
              }
              /// The data for one revision in a filelog, uncompressed and delta-resolved.
              pub struct FilelogRevisionData(Vec<u8>);
              impl FilelogRevisionData {
                  /// Split into metadata and data
                  pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
-                     const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
+                     const DELIMITER: &[u8; 2] = b"\x01\n";
                      if let Some(rest) = self.0.drop_prefix(DELIMITER) {
                          if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
                              Ok((Some(metadata), data))
                          } else {
                              Err(HgError::corrupted(
                                  "Missing metadata end delimiter in filelog entry",
                              ))
                          }
                      } else {
                          Ok((None, &self.0))
                      }
                  }
                  /// Returns the file contents at this revision, stripped of any metadata
                  pub fn file_data(&self) -> Result<&[u8], HgError> {
                      let (_metadata, data) = self.split()?;
                      Ok(data)
                  }
                  /// Consume the entry, and convert it into data, discarding any metadata,
                  /// if present.
                  pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
                      if let (Some(_metadata), data) = self.split()? {
                          Ok(data.to_owned())
                      } else {
                          Ok(self.0)
                      }
                  }
              }

rust/hg-core/src/revlog/inner_revlog.rs

0 +1 -2

              //! A layer of lower-level revlog functionality to encapsulate most of the
              //! IO work and expensive operations.
              use std::{
                  borrow::Cow,
                  cell::RefCell,
                  io::{ErrorKind, Seek, SeekFrom, Write},
                  ops::Deref,
                  path::PathBuf,
                  sync::{Arc, Mutex},
              };
              use schnellru::{ByMemoryUsage, LruMap};
              use sha1::{Digest, Sha1};
              use crate::{
                  errors::{HgError, IoResultExt},
                  exit_codes,
                  transaction::Transaction,
                  vfs::Vfs,
              };
              use super::{
                  compression::{
                      uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
                      ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
                  },
                  file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
                  index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
                  node::{NODE_BYTES_LENGTH, NULL_NODE},
                  options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
                  BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
                  UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
              };
              /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
              /// boundary to incrementally rewrite higher-level revlog functionality in
              /// Rust.
              pub struct InnerRevlog {
                  /// When index and data are not interleaved: bytes of the revlog index.
                  /// When index and data are interleaved (inline revlog): bytes of the
                  /// revlog index and data.
                  pub index: Index,
                  /// The store vfs that is used to interact with the filesystem
                  vfs: Box<dyn Vfs>,
                  /// The index file path, relative to the vfs root
                  pub index_file: PathBuf,
                  /// The data file path, relative to the vfs root (same as `index_file`
                  /// if inline)
                  data_file: PathBuf,
                  /// Data config that applies to this revlog
                  data_config: RevlogDataConfig,
                  /// Delta config that applies to this revlog
                  delta_config: RevlogDeltaConfig,
                  /// Feature config that applies to this revlog
                  feature_config: RevlogFeatureConfig,
                  /// A view into this revlog's data file
                  segment_file: RandomAccessFile,
                  /// A cache of uncompressed chunks that have previously been restored.
                  /// Its eviction policy is defined in [`Self::new`].
                  uncompressed_chunk_cache: Option<UncompressedChunkCache>,
                  /// Used to keep track of the actual target during diverted writes
                  /// for the changelog
                  original_index_file: Option<PathBuf>,
                  /// Write handles to the index and data files
                  /// XXX why duplicate from `index` and `segment_file`?
                  writing_handles: Option<WriteHandles>,
                  /// See [`DelayedBuffer`].
                  delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
                  /// Whether this revlog is inline. XXX why duplicate from `index`?
                  pub inline: bool,
                  /// A cache of the last revision, which is usually accessed multiple
                  /// times.
                  pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
              }
              impl InnerRevlog {
                  pub fn new(
                      vfs: Box<dyn Vfs>,
                      index: Index,
                      index_file: PathBuf,
                      data_file: PathBuf,
                      data_config: RevlogDataConfig,
                      delta_config: RevlogDeltaConfig,
                      feature_config: RevlogFeatureConfig,
                  ) -> Self {
                      assert!(index_file.is_relative());
                      assert!(data_file.is_relative());
                      let segment_file = RandomAccessFile::new(
                          dyn_clone::clone_box(&*vfs),
                          if index.is_inline() {
                              index_file.to_owned()
                          } else {
                              data_file.to_owned()
                          },
                      );
                      let uncompressed_chunk_cache =
                          data_config.uncompressed_cache_factor.map(
                              // Arbitrary initial value
                              // TODO check if using a hasher specific to integers is useful
                              |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
                          );
                      let inline = index.is_inline();
                      Self {
                          index,
                          vfs,
                          index_file,
                          data_file,
                          data_config,
                          delta_config,
                          feature_config,
                          segment_file,
                          uncompressed_chunk_cache,
                          original_index_file: None,
                          writing_handles: None,
                          delayed_buffer: None,
                          inline,
                          last_revision_cache: Mutex::new(None),
                      }
                  }
                  /// Return number of entries of the revlog index
                  pub fn len(&self) -> usize {
                      self.index.len()
                  }
                  /// Return `true` if this revlog has no entries
                  pub fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  /// Return whether this revlog is inline (mixed index and data)
                  pub fn is_inline(&self) -> bool {
                      self.inline
                  }
                  /// Clear all caches from this revlog
                  pub fn clear_cache(&mut self) {
                      assert!(!self.is_delaying());
                      if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                          // We don't clear the allocation here because it's probably faster.
                          // We could change our minds later if this ends up being a problem
                          // with regards to memory consumption.
                          cache.borrow_mut().clear();
                      }
                  }
                  /// Return an entry for the null revision
                  pub fn make_null_entry(&self) -> RevlogEntry {
                      RevlogEntry {
                          revlog: self,
                          rev: NULL_REVISION,
                          uncompressed_len: 0,
                          p1: NULL_REVISION,
                          p2: NULL_REVISION,
                          flags: NULL_REVLOG_ENTRY_FLAGS,
                          hash: NULL_NODE,
                      }
                  }
                  /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
                  pub fn get_entry(
                      &self,
                      rev: Revision,
                  ) -> Result<RevlogEntry, RevlogError> {
                      if rev == NULL_REVISION {
                          return Ok(self.make_null_entry());
                      }
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
                      let p1 =
                          self.index.check_revision(index_entry.p1()).ok_or_else(|| {
                              RevlogError::corrupted(format!(
                                  "p1 for rev {} is invalid",
                                  rev
                              ))
                          })?;
                      let p2 =
                          self.index.check_revision(index_entry.p2()).ok_or_else(|| {
                              RevlogError::corrupted(format!(
                                  "p2 for rev {} is invalid",
                                  rev
                              ))
                          })?;
                      let entry = RevlogEntry {
                          revlog: self,
                          rev,
                          uncompressed_len: index_entry.uncompressed_len(),
                          p1,
                          p2,
                          flags: index_entry.flags(),
                          hash: *index_entry.hash(),
                      };
                      Ok(entry)
                  }
                  /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
                  /// returns a [`RevlogError`].
                  pub fn get_entry_for_unchecked_rev(
                      &self,
                      rev: UncheckedRevision,
                  ) -> Result<RevlogEntry, RevlogError> {
                      if rev == NULL_REVISION.into() {
                          return Ok(self.make_null_entry());
                      }
                      let rev = self.index.check_revision(rev).ok_or_else(|| {
                          RevlogError::corrupted(format!("rev {} is invalid", rev))
                      })?;
                      self.get_entry(rev)
                  }
                  /// Is the revlog currently delaying the visibility of written data?
                  ///
                  /// The delaying mechanism can be either in-memory or written on disk in a
                  /// side-file.
                  pub fn is_delaying(&self) -> bool {
                      self.delayed_buffer.is_some() || self.original_index_file.is_some()
                  }
                  /// The offset of the data chunk for this revision
                  #[inline(always)]
                  pub fn start(&self, rev: Revision) -> usize {
                      self.index.start(
                          rev,
                          &self
                              .index
                              .get_entry(rev)
                              .unwrap_or_else(|| self.index.make_null_entry()),
                      )
                  }
                  /// The length of the data chunk for this revision
                  /// TODO rename this method and others to more explicit names than the
                  /// existing ones that were copied over from Python
                  #[inline(always)]
                  pub fn length(&self, rev: Revision) -> usize {
                      self.index
                          .get_entry(rev)
                          .unwrap_or_else(|| self.index.make_null_entry())
                          .compressed_len() as usize
                  }
                  /// The end of the data chunk for this revision
                  #[inline(always)]
                  pub fn end(&self, rev: Revision) -> usize {
                      self.start(rev) + self.length(rev)
                  }
                  /// Return the delta parent of the given revision
                  pub fn delta_parent(&self, rev: Revision) -> Revision {
                      let base = self
                          .index
                          .get_entry(rev)
                          .unwrap()
                          .base_revision_or_base_of_delta_chain();
                      if base.0 == rev.0 {
                          NULL_REVISION
                      } else if self.delta_config.general_delta {
                          Revision(base.0)
                      } else {
                          Revision(rev.0 - 1)
                      }
                  }
                  /// Return whether `rev` points to a snapshot revision (i.e. does not have
                  /// a delta base).
                  pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
                      if !self.delta_config.sparse_revlog {
                          return Ok(self.delta_parent(rev) == NULL_REVISION);
                      }
                      self.index.is_snapshot_unchecked(rev)
                  }
                  /// Return the delta chain for `rev` according to this revlog's config.
                  /// See [`Index::delta_chain`] for more information.
                  pub fn delta_chain(
                      &self,
                      rev: Revision,
                      stop_rev: Option<Revision>,
                  ) -> Result<(Vec<Revision>, bool), HgError> {
                      self.index.delta_chain(
                          rev,
                          stop_rev,
                          self.delta_config.general_delta.into(),
                      )
                  }
                  fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
                      // TODO cache the compressor?
                      Ok(match self.feature_config.compression_engine {
                          CompressionConfig::Zlib { level } => {
                              Box::new(ZlibCompressor::new(level))
                          }
                          CompressionConfig::Zstd { level, threads } => {
                              Box::new(ZstdCompressor::new(level, threads))
                          }
                          CompressionConfig::None => Box::new(NoneCompressor),
                      })
                  }
                  /// Generate a possibly-compressed representation of data.
                  /// Returns `None` if the data was not compressed.
                  pub fn compress<'data>(
                      &self,
                      data: &'data [u8],
                  ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
                      if data.is_empty() {
                          return Ok(Some(data.into()));
                      }
                      let res = self.compressor()?.compress(data)?;
                      if let Some(compressed) = res {
                          // The revlog compressor added the header in the returned data.
                          return Ok(Some(compressed.into()));
                      }
                      if data[0] == b'\0' {
                          return Ok(Some(data.into()));
                      }
                      Ok(None)
                  }
                  /// Decompress a revlog chunk.
                  ///
                  /// The chunk is expected to begin with a header identifying the
                  /// format type so it can be routed to an appropriate decompressor.
                  pub fn decompress<'a>(
                      &'a self,
                      data: &'a [u8],
                  ) -> Result<Cow<[u8]>, RevlogError> {
                      if data.is_empty() {
                          return Ok(data.into());
                      }
                      // Revlogs are read much more frequently than they are written and many
                      // chunks only take microseconds to decompress, so performance is
                      // important here.
                      let header = data[0];
                      match header {
                          // Settings don't matter as they only affect compression
                          ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
                          // Settings don't matter as they only affect compression
                          ZSTD_BYTE => {
                              Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
                          }
                          b'\0' => Ok(data.into()),
                          b'u' => Ok((&data[1..]).into()),
                          other => Err(HgError::UnsupportedFeature(format!(
                              "unknown compression header '{}'",
                              other
                          ))
                          .into()),
                      }
                  }
                  /// Obtain a segment of raw data corresponding to a range of revisions.
                  ///
                  /// Requests for data may be satisfied by a cache.
                  ///
                  /// Returns a 2-tuple of (offset, data) for the requested range of
                  /// revisions. Offset is the integer offset from the beginning of the
                  /// revlog and data is a slice of the raw byte data.
                  ///
                  /// Callers will need to call `self.start(rev)` and `self.length(rev)`
                  /// to determine where each revision's data begins and ends.
                  pub fn get_segment_for_revs(
                      &self,
                      start_rev: Revision,
                      end_rev: Revision,
                  ) -> Result<(usize, Vec<u8>), HgError> {
                      let start = if start_rev == NULL_REVISION {
 
                      } else {
                          let start_entry = self
                              .index
                              .get_entry(start_rev)
                              .expect("null revision segment");
                          self.index.start(start_rev, &start_entry)
                      };
                      let end_entry = self
                          .index
                          .get_entry(end_rev)
                          .expect("null revision segment");
                      let end = self.index.start(end_rev, &end_entry) + self.length(end_rev);
                      let length = end - start;
                      // XXX should we use mmap instead of doing this for platforms that
                      // support madvise/populate?
                      Ok((start, self.segment_file.read_chunk(start, length)?))
                  }
                  /// Return the uncompressed raw data for `rev`
                  pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
                      if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                          if let Some(chunk) = cache.borrow_mut().get(&rev) {
                              return Ok(chunk.clone());
                          }
                      }
                      // TODO revlogv2 should check the compression mode
                      let data = self.get_segment_for_revs(rev, rev)?.1;
                      let uncompressed = self.decompress(&data).map_err(|e| {
                          HgError::abort(
                              format!("revlog decompression error: {}", e),
                              exit_codes::ABORT,
                              None,
                          )
                      })?;
                      let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
                      if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                          cache.borrow_mut().insert(rev, uncompressed.clone());
                      }
                      Ok(uncompressed)
                  }
                  /// Execute `func` within a read context for the data file, meaning that
                  /// the read handle will be taken and discarded after the operation.
                  pub fn with_read<R>(
                      &self,
                      func: impl FnOnce() -> Result<R, RevlogError>,
                  ) -> Result<R, RevlogError> {
                      self.enter_reading_context()?;
                      let res = func();
                      self.exit_reading_context();
                      res.map_err(Into::into)
                  }
                  /// `pub` only for use in hg-cpython
                  #[doc(hidden)]
                  pub fn enter_reading_context(&self) -> Result<(), HgError> {
                      if self.is_empty() {
                          // Nothing to be read
                          return Ok(());
                      }
                      if self.delayed_buffer.is_some() && self.is_inline() {
                          return Err(HgError::abort(
                              "revlog with delayed write should not be inline",
                              exit_codes::ABORT,
                              None,
                          ));
                      }
                      self.segment_file.get_read_handle()?;
                      Ok(())
                  }
                  /// `pub` only for use in hg-cpython
                  #[doc(hidden)]
                  pub fn exit_reading_context(&self) {
                      self.segment_file.exit_reading_context()
                  }
                  /// Fill the buffer returned by `get_buffer` with the possibly un-validated
                  /// raw text for a revision. It can be already validated if it comes
                  /// from the cache.
                  pub fn raw_text<G, T>(
                      &self,
                      rev: Revision,
                      get_buffer: G,
                  ) -> Result<(), RevlogError>
                  where
                      G: FnOnce(
                          usize,
                          &mut dyn FnMut(
                              &mut dyn RevisionBuffer<Target = T>,
                          ) -> Result<(), RevlogError>,
                      ) -> Result<(), RevlogError>,
                  {
                      let entry = &self.get_entry(rev)?;
                      let raw_size = entry.uncompressed_len();
                      let mut mutex_guard = self
                          .last_revision_cache
                          .lock()
                          .expect("lock should not be held");
                      let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
                          Some((*rev, data.deref().as_ref()))
                      } else {
                          None
                      };
                      if let Some(cache) = &self.uncompressed_chunk_cache {
                          let cache = &mut cache.borrow_mut();
                          if let Some(size) = raw_size {
                              // Dynamically update the uncompressed_chunk_cache size to the
                              // largest revision we've seen in this revlog.
                              // Do it *before* restoration in case the current revision
                              // is the largest.
                              let factor = self
                                  .data_config
                                  .uncompressed_cache_factor
                                  .expect("cache should not exist without factor");
                              let candidate_size = (size as f64 * factor) as usize;
                              let limiter_mut = cache.limiter_mut();
                              if candidate_size > limiter_mut.max_memory_usage() {
                                  std::mem::swap(
                                      limiter_mut,
                                      &mut ByMemoryUsage::new(candidate_size),
                                  );
                              }
                          }
                      }
                      entry.rawdata(cached_rev, get_buffer)?;
                      // drop cache to save memory, the caller is expected to update
                      // the revision cache after validating the text
                      mutex_guard.take();
                      Ok(())
                  }
                  /// Only `pub` for `hg-cpython`.
                  /// Obtain decompressed raw data for the specified revisions that are
                  /// assumed to be in ascending order.
                  ///
                  /// Returns a list with decompressed data for each requested revision.
                  #[doc(hidden)]
                  pub fn chunks(
                      &self,
                      revs: Vec<Revision>,
                      target_size: Option<u64>,
                  ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
                      if revs.is_empty() {
                          return Ok(vec![]);
                      }
                      let mut fetched_revs = vec![];
                      let mut chunks = Vec::with_capacity(revs.len());
                      match self.uncompressed_chunk_cache.as_ref() {
                          Some(cache) => {
                              let mut cache = cache.borrow_mut();
                              for rev in revs.iter() {
                                  match cache.get(rev) {
                                      Some(hit) => chunks.push((*rev, hit.to_owned())),
                                      None => fetched_revs.push(*rev),
                                  }
                              }
                          }
                          None => fetched_revs = revs,
                      }
                      let already_cached = chunks.len();
                      let sliced_chunks = if fetched_revs.is_empty() {
                          vec![]
                      } else if !self.data_config.with_sparse_read || self.is_inline() {
                          vec![fetched_revs]
                      } else {
                          self.slice_chunk(&fetched_revs, target_size)?
                      };
                      self.with_read(|| {
                          for revs_chunk in sliced_chunks {
                              let first_rev = revs_chunk[0];
                              // Skip trailing revisions with empty diff
                              let last_rev_idx = revs_chunk
                                  .iter()
                                  .rposition(|r| self.length(*r) != 0)
                                  .unwrap_or(revs_chunk.len() - 1);
                              let last_rev = revs_chunk[last_rev_idx];
                              let (offset, data) =
                                  self.get_segment_for_revs(first_rev, last_rev)?;
                              let revs_chunk = &revs_chunk[..=last_rev_idx];
                              for rev in revs_chunk {
                                  let chunk_start = self.start(*rev);
                                  let chunk_length = self.length(*rev);
                                  // TODO revlogv2 should check the compression mode
                                  let bytes = &data[chunk_start - offset..][..chunk_length];
                                  let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
                                      // If we're using `zstd`, we want to try a more
                                      // specialized decompression
                                      let entry = self.index.get_entry(*rev).unwrap();
                                      let is_delta = entry
                                          .base_revision_or_base_of_delta_chain()
                                          != (*rev).into();
                                      let uncompressed = uncompressed_zstd_data(
                                          bytes,
                                          is_delta,
                                          entry.uncompressed_len(),
                                      )?;
                                      Cow::Owned(uncompressed)
                                  } else {
                                      // Otherwise just fallback to generic decompression.
                                      self.decompress(bytes)?
                                  };
                                  chunks.push((*rev, chunk.into()));
                              }
                          }
                          Ok(())
                      })?;
                      if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
                          let mut cache = cache.borrow_mut();
                          for (rev, chunk) in chunks.iter().skip(already_cached) {
                              cache.insert(*rev, chunk.clone());
                          }
                      }
                      // Use stable sort here since it's *mostly* sorted
                      chunks.sort_by(|a, b| a.0.cmp(&b.0));
                      Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
                  }
                  /// Slice revs to reduce the amount of unrelated data to be read from disk.
                  ///
                  /// ``revs`` is sliced into groups that should be read in one time.
                  /// Assume that revs are sorted.
                  ///
                  /// The initial chunk is sliced until the overall density
                  /// (payload/chunks-span ratio) is above
                  /// `revlog.data_config.sr_density_threshold`.
                  /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
                  ///
                  /// If `target_size` is set, no chunk larger than `target_size`
                  /// will be returned.
                  /// For consistency with other slicing choices, this limit won't go lower
                  /// than `revlog.data_config.sr_min_gap_size`.
                  ///
                  /// If individual revision chunks are larger than this limit, they will
                  /// still be raised individually.
                  pub fn slice_chunk(
                      &self,
                      revs: &[Revision],
                      target_size: Option<u64>,
                  ) -> Result<Vec<Vec<Revision>>, RevlogError> {
                      let target_size =
                          target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
                      let target_density = self.data_config.sr_density_threshold;
                      let min_gap_size = self.data_config.sr_min_gap_size as usize;
                      let to_density = self.index.slice_chunk_to_density(
                          revs,
                          target_density,
                          min_gap_size,
                      );
                      let mut sliced = vec![];
                      for chunk in to_density {
                          sliced.extend(
                              self.slice_chunk_to_size(&chunk, target_size)?
                                  .into_iter()
                                  .map(ToOwned::to_owned),
                          );
                      }
                      Ok(sliced)
                  }
                  /// Slice revs to match the target size
                  ///
                  /// This is intended to be used on chunks that density slicing selected,
                  /// but that are still too large compared to the read guarantee of revlogs.
                  /// This might happen when the "minimal gap size" interrupted the slicing
                  /// or when chains are built in a way that create large blocks next to
                  /// each other.
                  fn slice_chunk_to_size<'a>(
                      &self,
                      revs: &'a [Revision],
                      target_size: Option<u64>,
                  ) -> Result<Vec<&'a [Revision]>, RevlogError> {
                      let mut start_data = self.start(revs[0]);
                      let end_data = self.end(revs[revs.len() - 1]);
                      let full_span = end_data - start_data;
                      let nothing_to_do = target_size
                          .map(|size| full_span <= size as usize)
                          .unwrap_or(true);
                      if nothing_to_do {
                          return Ok(vec![revs]);
                      }
                      let target_size = target_size.expect("target_size is set") as usize;
                      let mut start_rev_idx = 0;
                      let mut end_rev_idx = 1;
                      let mut chunks = vec![];
                      for (idx, rev) in revs.iter().enumerate().skip(1) {
                          let span = self.end(*rev) - start_data;
                          let is_snapshot = self.is_snapshot(*rev)?;
                          if span <= target_size && is_snapshot {
                              end_rev_idx = idx + 1;
                          } else {
                              let chunk =
                                  self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
                              if !chunk.is_empty() {
                                  chunks.push(chunk);
                              }
                              start_rev_idx = idx;
                              start_data = self.start(*rev);
                              end_rev_idx = idx + 1;
                          }
                          if !is_snapshot {
                              break;
                          }
                      }
                      // For the others, we use binary slicing to quickly converge towards
                      // valid chunks (otherwise, we might end up looking for the start/end
                      // of many revisions). This logic is not looking for the perfect
                      // slicing point, it quickly converges towards valid chunks.
                      let number_of_items = revs.len();
                      while (end_data - start_data) > target_size {
                          end_rev_idx = number_of_items;
                          if number_of_items - start_rev_idx <= 1 {
                              // Protect against individual chunks larger than the limit
                              break;
                          }
                          let mut local_end_data = self.end(revs[end_rev_idx - 1]);
                          let mut span = local_end_data - start_data;
                          while span > target_size {
                              if end_rev_idx - start_rev_idx <= 1 {
                                  // Protect against individual chunks larger than the limit
                                  break;
                              }
                              end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
                              local_end_data = self.end(revs[end_rev_idx - 1]);
                              span = local_end_data - start_data;
                          }
                          let chunk =
                              self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
                          if !chunk.is_empty() {
                              chunks.push(chunk);
                          }
                          start_rev_idx = end_rev_idx;
                          start_data = self.start(revs[start_rev_idx]);
                      }
                      let chunk = self.trim_chunk(revs, start_rev_idx, None);
                      if !chunk.is_empty() {
                          chunks.push(chunk);
                      }
                      Ok(chunks)
                  }
                  /// Returns `revs[startidx..endidx]` without empty trailing revs
                  fn trim_chunk<'a>(
                      &self,
                      revs: &'a [Revision],
                      start_rev_idx: usize,
                      end_rev_idx: Option<usize>,
                  ) -> &'a [Revision] {
                      let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
                      // If we have a non-empty delta candidate, there is nothing to trim
                      if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
                          // Trim empty revs at the end, except the very first rev of a chain
                          while end_rev_idx > 1
                              && end_rev_idx > start_rev_idx
                              && self.length(revs[end_rev_idx - 1]) == 0
                          {
                              end_rev_idx -= 1
                          }
                      }
                      &revs[start_rev_idx..end_rev_idx]
                  }
                  /// Check the hash of some given data against the recorded hash.
                  pub fn check_hash(
                      &self,
                      p1: Revision,
                      p2: Revision,
                      expected: &[u8],
                      data: &[u8],
                  ) -> bool {
                      let e1 = self.index.get_entry(p1);
                      let h1 = match e1 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      let e2 = self.index.get_entry(p2);
                      let h2 = match e2 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                  }
                  /// Returns whether we are currently in a [`Self::with_write`] context
                  pub fn is_writing(&self) -> bool {
                      self.writing_handles.is_some()
                  }
                  /// Open the revlog files for writing
                  ///
                  /// Adding content to a revlog should be done within this context.
                  /// TODO try using `BufRead` and `BufWrite` and see if performance improves
                  pub fn with_write<R>(
                      &mut self,
                      transaction: &mut impl Transaction,
                      data_end: Option<usize>,
                      func: impl FnOnce() -> R,
                  ) -> Result<R, HgError> {
                      if self.is_writing() {
                          return Ok(func());
                      }
                      self.enter_writing_context(data_end, transaction)
-                         .map_err(|e| {
+                         .inspect_err(|_| {
                              self.exit_writing_context();
+                             e
                          })?;
                      let res = func();
                      self.exit_writing_context();
                      Ok(res)
                  }
                  /// `pub` only for use in hg-cpython
                  #[doc(hidden)]
                  pub fn exit_writing_context(&mut self) {
                      self.writing_handles.take();
                      self.segment_file.writing_handle.take();
                      self.segment_file.reading_handle.take();
                  }
                  /// `pub` only for use in hg-cpython
                  #[doc(hidden)]
                  pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
                      self.writing_handles.as_ref()
                  }
                  /// `pub` only for use in hg-cpython
                  #[doc(hidden)]
                  pub fn enter_writing_context(
                      &mut self,
                      data_end: Option<usize>,
                      transaction: &mut impl Transaction,
                  ) -> Result<(), HgError> {
                      let data_size = if self.is_empty() {
 
                      } else {
                          self.end(Revision((self.len() - 1) as BaseRevision))
                      };
                      let data_handle = if !self.is_inline() {
                          let data_handle = match self.vfs.open(&self.data_file) {
                              Ok(mut f) => {
                                  if let Some(end) = data_end {
                                      f.seek(SeekFrom::Start(end as u64))
                                          .when_reading_file(&self.data_file)?;
                                  } else {
                                      f.seek(SeekFrom::End(0))
                                          .when_reading_file(&self.data_file)?;
                                  }
                                  f
                              }
                              Err(e) => match e {
                                  HgError::IoError { error, context } => {
                                      if error.kind() != ErrorKind::NotFound {
                                          return Err(HgError::IoError { error, context });
                                      }
                                      self.vfs.create(&self.data_file, true)?
                                  }
                                  e => return Err(e),
                              },
                          };
                          transaction.add(&self.data_file, data_size);
                          Some(FileHandle::from_file(
                              data_handle,
                              dyn_clone::clone_box(&*self.vfs),
                              &self.data_file,
                          ))
                      } else {
                          None
                      };
                      let index_size = self.len() * INDEX_ENTRY_SIZE;
                      let index_handle = self.index_write_handle()?;
                      if self.is_inline() {
                          transaction.add(&self.index_file, data_size);
                      } else {
                          transaction.add(&self.index_file, index_size);
                      }
                      self.writing_handles = Some(WriteHandles {
                          index_handle: index_handle.clone(),
                          data_handle: data_handle.clone(),
                      });
                      *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
                          Some(index_handle)
                      } else {
                          data_handle
                      };
                      Ok(())
                  }
                  /// Get a write handle to the index, sought to the end of its data.
                  fn index_write_handle(&self) -> Result<FileHandle, HgError> {
                      let res = if self.delayed_buffer.is_none() {
                          if self.data_config.check_ambig {
                              self.vfs.open_check_ambig(&self.index_file)
                          } else {
                              self.vfs.open(&self.index_file)
                          }
                      } else {
                          self.vfs.open(&self.index_file)
                      };
                      match res {
                          Ok(mut handle) => {
                              handle
                                  .seek(SeekFrom::End(0))
                                  .when_reading_file(&self.index_file)?;
                              Ok(
                                  if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
                                  {
                                      FileHandle::from_file_delayed(
                                          handle,
                                          dyn_clone::clone_box(&*self.vfs),
                                          &self.index_file,
                                          delayed_buffer.clone(),
                                      )?
                                  } else {
                                      FileHandle::from_file(
                                          handle,
                                          dyn_clone::clone_box(&*self.vfs),
                                          &self.index_file,
                                      )
                                  },
                              )
                          }
                          Err(e) => match e {
                              HgError::IoError { error, context } => {
                                  if error.kind() != ErrorKind::NotFound {
                                      return Err(HgError::IoError { error, context });
                                  };
                                  if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
                                  {
                                      FileHandle::new_delayed(
                                          dyn_clone::clone_box(&*self.vfs),
                                          &self.index_file,
                                          true,
                                          delayed_buffer.clone(),
                                      )
                                  } else {
                                      FileHandle::new(
                                          dyn_clone::clone_box(&*self.vfs),
                                          &self.index_file,
                                          true,
                                          true,
                                      )
                                  }
                              }
                              e => Err(e),
                          },
                      }
                  }
                  /// Split the data of an inline revlog into an index and a data file
                  pub fn split_inline(
                      &mut self,
                      header: IndexHeader,
                      new_index_file_path: Option<PathBuf>,
                  ) -> Result<PathBuf, RevlogError> {
                      assert!(self.delayed_buffer.is_none());
                      let existing_handles = self.writing_handles.is_some();
                      if let Some(handles) = &mut self.writing_handles {
                          handles.index_handle.flush()?;
                          self.writing_handles.take();
                          self.segment_file.writing_handle.take();
                      }
                      let mut new_data_file_handle =
                          self.vfs.create(&self.data_file, true)?;
                      // Drop any potential data, possibly redundant with the VFS impl.
                      new_data_file_handle
                          .set_len(0)
                          .when_writing_file(&self.data_file)?;
                      self.with_read(|| -> Result<(), RevlogError> {
                          for r in 0..self.index.len() {
                              let rev = Revision(r as BaseRevision);
                              let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
                              new_data_file_handle
                                  .write_all(&rev_segment)
                                  .when_writing_file(&self.data_file)?;
                          }
                          new_data_file_handle
                              .flush()
                              .when_writing_file(&self.data_file)?;
                          Ok(())
                      })?;
                      if let Some(index_path) = new_index_file_path {
                          self.index_file = index_path
                      }
                      let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
                      let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
                      for r in 0..self.len() {
                          let rev = Revision(r as BaseRevision);
                          let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
                              panic!(
                                  "entry {} should exist in {}",
                                  r,
                                  self.index_file.display()
                              )
                          });
                          if r == 0 {
                              new_data.extend(header.header_bytes);
                          }
                          new_data.extend(entry);
                      }
                      new_index_handle
                          .write_all(&new_data)
                          .when_writing_file(&self.index_file)?;
                      // Replace the index with a new one because the buffer contains inline
                      // data
                      self.index = Index::new(Box::new(new_data), header)?;
                      self.inline = false;
                      self.segment_file = RandomAccessFile::new(
                          dyn_clone::clone_box(&*self.vfs),
                          self.data_file.to_owned(),
                      );
                      if existing_handles {
                          // Switched from inline to conventional, reopen the index
                          let new_data_handle = Some(FileHandle::from_file(
                              new_data_file_handle,
                              dyn_clone::clone_box(&*self.vfs),
                              &self.data_file,
                          ));
                          self.writing_handles = Some(WriteHandles {
                              index_handle: self.index_write_handle()?,
                              data_handle: new_data_handle.clone(),
                          });
                          *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
                      }
                      Ok(self.index_file.to_owned())
                  }
                  /// Write a new entry to this revlog.
                  /// - `entry` is the index bytes
                  /// - `header_and_data` is the compression header and the revision data
                  /// - `offset` is the position in the data file to write to
                  /// - `index_end` is the overwritten position in the index in revlog-v2,
                  ///   since the format may allow a rewrite of garbage data at the end.
                  /// - `data_end` is the overwritten position in the data-file in revlog-v2,
                  ///   since the format may allow a rewrite of garbage data at the end.
                  ///
                  /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
                  pub fn write_entry(
                      &mut self,
                      mut transaction: impl Transaction,
                      entry: &[u8],
                      header_and_data: (&[u8], &[u8]),
                      mut offset: usize,
                      index_end: Option<u64>,
                      data_end: Option<u64>,
                  ) -> Result<(u64, Option<u64>), HgError> {
                      let current_revision = self.len() - 1;
                      let canonical_index_file = self.canonical_index_file();
                      let is_inline = self.is_inline();
                      let handles = match &mut self.writing_handles {
                          None => {
                              return Err(HgError::abort(
                                  "adding revision outside of the `with_write` context",
                                  exit_codes::ABORT,
                                  None,
                              ));
                          }
                          Some(handles) => handles,
                      };
                      let index_handle = &mut handles.index_handle;
                      let data_handle = &mut handles.data_handle;
                      if let Some(end) = index_end {
                          index_handle
                              .seek(SeekFrom::Start(end))
                              .when_reading_file(&self.index_file)?;
                      } else {
                          index_handle
                              .seek(SeekFrom::End(0))
                              .when_reading_file(&self.index_file)?;
                      }
                      if let Some(data_handle) = data_handle {
                          if let Some(end) = data_end {
                              data_handle
                                  .seek(SeekFrom::Start(end))
                                  .when_reading_file(&self.data_file)?;
                          } else {
                              data_handle
                                  .seek(SeekFrom::End(0))
                                  .when_reading_file(&self.data_file)?;
                          }
                      }
                      let (header, data) = header_and_data;
                      if !is_inline {
                          transaction.add(&self.data_file, offset);
                          transaction
                              .add(&canonical_index_file, current_revision * entry.len());
                          let data_handle = data_handle
                              .as_mut()
                              .expect("data handle should exist when not inline");
                          if !header.is_empty() {
                              data_handle.write_all(header)?;
                          }
                          data_handle.write_all(data)?;
                          match &mut self.delayed_buffer {
                              Some(buf) => {
                                  buf.lock()
                                      .expect("propagate the panic")
                                      .buffer
                                      .write_all(entry)
                                      .expect("write to delay buffer should succeed");
                              }
                              None => index_handle.write_all(entry)?,
                          }
                      } else if self.delayed_buffer.is_some() {
                          return Err(HgError::abort(
                              "invalid delayed write on inline revlog",
                              exit_codes::ABORT,
                              None,
                          ));
                      } else {
                          offset += current_revision * entry.len();
                          transaction.add(&canonical_index_file, offset);
                          index_handle.write_all(entry)?;
                          index_handle.write_all(header)?;
                          index_handle.write_all(data)?;
                      }
                      let data_position = match data_handle {
                          Some(h) => Some(h.position()?),
                          None => None,
                      };
                      Ok((index_handle.position()?, data_position))
                  }
                  /// Return the real target index file and not the temporary when diverting
                  pub fn canonical_index_file(&self) -> PathBuf {
                      self.original_index_file
                          .as_ref()
                          .map(ToOwned::to_owned)
                          .unwrap_or_else(|| self.index_file.to_owned())
                  }
                  /// Return the path to the diverted index
                  fn diverted_index(&self) -> PathBuf {
                      self.index_file.with_extension("i.a")
                  }
                  /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
                  pub fn is_open(&self) -> bool {
                      self.segment_file.is_open()
                  }
                  /// Set this revlog to delay its writes to a buffer
                  pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
                      assert!(!self.is_open());
                      if self.is_inline() {
                          return Err(HgError::abort(
                              "revlog with delayed write should not be inline",
                              exit_codes::ABORT,
                              None,
                          ));
                      }
                      if self.delayed_buffer.is_some() || self.original_index_file.is_some()
                      {
                          // Delay or divert already happening
                          return Ok(None);
                      }
                      if self.is_empty() {
                          self.original_index_file = Some(self.index_file.to_owned());
                          self.index_file = self.diverted_index();
                          if self.vfs.exists(&self.index_file) {
                              self.vfs.unlink(&self.index_file)?;
                          }
                          Ok(Some(self.index_file.to_owned()))
                      } else {
                          self.delayed_buffer =
                              Some(Arc::new(Mutex::new(DelayedBuffer::default())));
                          Ok(None)
                      }
                  }
                  /// Write the pending data (in memory) if any to the diverted index file
                  /// (on disk temporary file)
                  pub fn write_pending(
                      &mut self,
                  ) -> Result<(Option<PathBuf>, bool), HgError> {
                      assert!(!self.is_open());
                      if self.is_inline() {
                          return Err(HgError::abort(
                              "revlog with delayed write should not be inline",
                              exit_codes::ABORT,
                              None,
                          ));
                      }
                      if self.original_index_file.is_some() {
                          return Ok((None, true));
                      }
                      let mut any_pending = false;
                      let pending_index_file = self.diverted_index();
                      if self.vfs.exists(&pending_index_file) {
                          self.vfs.unlink(&pending_index_file)?;
                      }
                      self.vfs.copy(&self.index_file, &pending_index_file)?;
                      if let Some(delayed_buffer) = self.delayed_buffer.take() {
                          let mut index_file_handle = self.vfs.open(&pending_index_file)?;
                          index_file_handle
                              .seek(SeekFrom::End(0))
                              .when_writing_file(&pending_index_file)?;
                          let delayed_data =
                              &delayed_buffer.lock().expect("propagate the panic").buffer;
                          index_file_handle
                              .write_all(delayed_data)
                              .when_writing_file(&pending_index_file)?;
                          any_pending = true;
                      }
                      self.original_index_file = Some(self.index_file.to_owned());
                      self.index_file = pending_index_file;
                      Ok((Some(self.index_file.to_owned()), any_pending))
                  }
                  /// Overwrite the canonical file with the diverted file, or write out the
                  /// delayed buffer.
                  /// Returns an error if the revlog is neither diverted nor delayed.
                  pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
                      assert!(!self.is_open());
                      if self.is_inline() {
                          return Err(HgError::abort(
                              "revlog with delayed write should not be inline",
                              exit_codes::ABORT,
                              None,
                          ));
                      }
                      match (
                          self.delayed_buffer.as_ref(),
                          self.original_index_file.as_ref(),
                      ) {
                          (None, None) => {
                              return Err(HgError::abort(
                                  "neither delay nor divert found on this revlog",
                                  exit_codes::ABORT,
                                  None,
                              ));
                          }
                          (Some(delay), None) => {
                              let mut index_file_handle = self.vfs.open(&self.index_file)?;
                              index_file_handle
                                  .seek(SeekFrom::End(0))
                                  .when_writing_file(&self.index_file)?;
                              index_file_handle
                                  .write_all(
                                      &delay.lock().expect("propagate the panic").buffer,
                                  )
                                  .when_writing_file(&self.index_file)?;
                              self.delayed_buffer = None;
                          }
                          (None, Some(divert)) => {
                              if self.vfs.exists(&self.index_file) {
                                  self.vfs.rename(&self.index_file, divert, true)?;
                              }
                              divert.clone_into(&mut self.index_file);
                              self.original_index_file = None;
                          }
                          (Some(_), Some(_)) => unreachable!(
                              "{} is in an inconsistent state of both delay and divert",
                              self.canonical_index_file().display(),
                          ),
                      }
                      Ok(self.canonical_index_file())
                  }
                  /// `pub` only for `hg-cpython`. This is made a different method than
                  /// [`Revlog::index`] in case there is a different invariant that pops up
                  /// later.
                  #[doc(hidden)]
                  pub fn shared_index(&self) -> &Index {
                      &self.index
                  }
              }
              /// The use of a [`Refcell`] assumes that a given revlog will only
              /// be accessed (read or write) by a single thread.
              type UncompressedChunkCache =
                  RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
              /// The node, revision and data for the last revision we've seen. Speeds up
              /// a lot of sequential operations of the revlog.
              ///
              /// The data is not just bytes since it can come from Python and we want to
              /// avoid copies if possible.
              type SingleRevisionCache =
                  (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
              /// A way of progressively filling a buffer with revision data, then return
              /// that buffer. Used to abstract away Python-allocated code to reduce copying
              /// for performance reasons.
              pub trait RevisionBuffer {
                  /// The owned buffer type to return
                  type Target;
                  /// Copies the slice into the buffer
                  fn extend_from_slice(&mut self, slice: &[u8]);
                  /// Returns the now finished owned buffer
                  fn finish(self) -> Self::Target;
              }
              /// A simple vec-based buffer. This is uselessly complicated for the pure Rust
              /// case, but it's the price to pay for Python compatibility.
              #[derive(Debug)]
              pub(super) struct CoreRevisionBuffer {
                  buf: Vec<u8>,
              }
              impl CoreRevisionBuffer {
                  pub fn new() -> Self {
                      Self { buf: vec![] }
                  }
                  #[inline]
                  pub fn resize(&mut self, size: usize) {
                      self.buf.reserve_exact(size - self.buf.capacity());
                  }
              }
              impl RevisionBuffer for CoreRevisionBuffer {
                  type Target = Vec<u8>;
                  #[inline]
                  fn extend_from_slice(&mut self, slice: &[u8]) {
                      self.buf.extend_from_slice(slice);
                  }
                  #[inline]
                  fn finish(self) -> Self::Target {
                      self.buf
                  }
              }
              /// Calculate the hash of a revision given its data and its parents.
              pub fn hash(
                  data: &[u8],
                  p1_hash: &[u8],
                  p2_hash: &[u8],
              ) -> [u8; NODE_BYTES_LENGTH] {
                  let mut hasher = Sha1::new();
                  let (a, b) = (p1_hash, p2_hash);
                  if a > b {
                      hasher.update(b);
                      hasher.update(a);
                  } else {
                      hasher.update(a);
                      hasher.update(b);
                  }
                  hasher.update(data);
                  *hasher.finalize().as_ref()
              }

rust/hg-cpython/src/ref_sharing.rs

0 +2 -2

              // ref_sharing.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // Permission is hereby granted, free of charge, to any person obtaining a copy
              // of this software and associated documentation files (the "Software"), to
              // deal in the Software without restriction, including without limitation the
              // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
              // sell copies of the Software, and to permit persons to whom the Software is
              // furnished to do so, subject to the following conditions:
              //
              // The above copyright notice and this permission notice shall be included in
              // all copies or substantial portions of the Software.
              //
              // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
              // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
              // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
              // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
              // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
              // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
              // IN THE SOFTWARE.
              //! Macros for use in the `hg-cpython` bridge library.
              /// Defines a `py_class!` that acts as a Python iterator over a Rust iterator.
              ///
              /// TODO: this is a bit awkward to use, and a better (more complicated)
              ///     procedural macro would simplify the interface a lot.
              ///
              /// # Parameters
              ///
              /// * `$name` is the identifier to give to the resulting Rust struct.
              /// * `$leaked` corresponds to `UnsafePyLeaked` in the matching `@shared data`
-             /// declaration.
+             ///   declaration.
              /// * `$iterator_type` is the type of the Rust iterator.
              /// * `$success_func` is a function for processing the Rust `(key, value)`
-             /// tuple on iteration success, turning it into something Python understands.
+             ///   tuple on iteration success, turning it into something Python understands.
              /// * `$success_func` is the return type of `$success_func`
              ///
              /// # Safety
              ///
              /// `$success_func` may take a reference, but it's lifetime may be cheated.
              /// Do not copy it out of the function call.
              ///
              /// # Example
              ///
              /// ```
              /// struct MyStruct {
              ///     inner: HashMap<Vec<u8>, Vec<u8>>;
              /// }
              ///
              /// py_class!(pub class MyType |py| {
              ///     @shared data inner: MyStruct;
              ///
              ///     def __iter__(&self) -> PyResult<MyTypeItemsIterator> {
              ///         let leaked_ref = self.inner_shared(py).leak_immutable();
              ///         MyTypeItemsIterator::from_inner(
              ///             py,
              ///             unsafe { leaked_ref.map(py, |o| o.iter()) },
              ///         )
              ///     }
              /// });
              ///
              /// impl MyType {
              ///     fn translate_key_value(
              ///         py: Python,
              ///         res: (&Vec<u8>, &Vec<u8>),
              ///     ) -> PyResult<Option<(PyBytes, PyBytes)>> {
              ///         let (f, entry) = res;
              ///         Ok(Some((
              ///             PyBytes::new(py, f),
              ///             PyBytes::new(py, entry),
              ///         )))
              ///     }
              /// }
              ///
              /// py_shared_iterator!(
              ///     MyTypeItemsIterator,
              ///     UnsafePyLeaked<HashMap<'static, Vec<u8>, Vec<u8>>>,
              ///     MyType::translate_key_value,
              ///     Option<(PyBytes, PyBytes)>
              /// );
              /// ```
              macro_rules! py_shared_iterator {
                  (
                      $name: ident,
                      $leaked: ty,
                      $success_func: expr,
                      $success_type: ty
                  ) => {
                      py_class!(pub class $name |py| {
                          data inner: RefCell<$leaked>;
                          def __next__(&self) -> PyResult<$success_type> {
                              let mut leaked = self.inner(py).borrow_mut();
                              let mut iter = unsafe { leaked.try_borrow_mut(py)? };
                              match iter.next() {
                                  None => Ok(None),
                                  // res may be a reference of cheated 'static lifetime
                                  Some(res) => $success_func(py, res),
                              }
                          }
                          def __iter__(&self) -> PyResult<Self> {
                              Ok(self.clone_ref(py))
                          }
                      });
                      impl $name {
                          pub fn from_inner(
                              py: Python,
                              leaked: $leaked,
                          ) -> PyResult<Self> {
                              Self::create_instance(
                                  py,
                                  RefCell::new(leaked),
                              )
                          }
                      }
                  };
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages