upstream/mercurial-mirror Commit - r53432:507fec66

rust-pyo3: MissingAncestors

Georges Racinet -

r53432:507fec66 default

parent child

rust/hg-pyo3/src/ancestors.rs

0 +128 0

              // ancestors.rs
              //
              // Copyright 2024 Georges Racinet <georges.racinet@cloudcrane.io>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings for the `hg::ancestors` module provided by the
              //! `hg-core` crate. From Python, this will be seen as `pyo3_rustext.ancestor`
              //! and can be used as replacement for the the pure `ancestor` Python module.
              use cpython::UnsafePyLeaked;
              use pyo3::prelude::*;
+             use pyo3::types::PyTuple;
+             use std::collections::HashSet;
              use std::sync::RwLock;
+             use hg::MissingAncestors as CoreMissing;
              use vcsgraph::lazy_ancestors::{
                  AncestorsIterator as VCGAncestorsIterator,
                  LazyAncestors as VCGLazyAncestors,
              };
              use crate::convert_cpython::{
                  proxy_index_py_leak, py_leaked_borrow, py_leaked_borrow_mut,
                  py_leaked_or_map_err,
              };
              use crate::exceptions::{map_lock_error, GraphError};
              use crate::revision::{rev_pyiter_collect_with_py_index, PyRevision};
              use crate::util::new_submodule;
              use rusthg::revlog::PySharedIndex;
              #[pyclass]
              struct AncestorsIterator {
                  inner: RwLock<UnsafePyLeaked<VCGAncestorsIterator<PySharedIndex>>>,
              }
              #[pymethods]
              impl AncestorsIterator {
                  #[new]
                  fn new(
                      index_proxy: &Bound<'_, PyAny>,
                      initrevs: &Bound<'_, PyAny>,
                      stoprev: PyRevision,
                      inclusive: bool,
                  ) -> PyResult<Self> {
                      let initvec: Vec<_> =
                          rev_pyiter_collect_with_py_index(initrevs, index_proxy)?;
                      let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
                      let res_ait = unsafe {
                          leaked_idx.map(py, |idx| {
                              VCGAncestorsIterator::new(
                                  idx,
                                  initvec.into_iter().map(|r| r.0),
                                  stoprev.0,
                                  inclusive,
                              )
                          })
                      };
                      let ait =
                          py_leaked_or_map_err(py, res_ait, GraphError::from_vcsgraph)?;
                      let inner = ait.into();
                      Ok(Self { inner })
                  }
                  fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
                      slf
                  }
                  fn __next__(slf: PyRefMut<'_, Self>) -> PyResult<Option<PyRevision>> {
                      let mut leaked = slf.inner.write().map_err(map_lock_error)?;
                      // Safety: we don't leak the inner 'static ref out of UnsafePyLeaked
                      let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked)? };
                      match inner.next() {
                          Some(Err(e)) => Err(GraphError::from_vcsgraph(e)),
                          None => Ok(None),
                          Some(Ok(r)) => Ok(Some(PyRevision(r))),
                      }
                  }
              }
              #[pyclass(sequence)]
              struct LazyAncestors {
                  inner: RwLock<UnsafePyLeaked<VCGLazyAncestors<PySharedIndex>>>,
                  proxy_index: PyObject,
                  initrevs: PyObject,
                  stoprev: PyRevision,
                  inclusive: bool,
              }
              #[pymethods]
              impl LazyAncestors {
                  #[new]
                  fn new(
                      index_proxy: &Bound<'_, PyAny>,
                      initrevs: &Bound<'_, PyAny>,
                      stoprev: PyRevision,
                      inclusive: bool,
                  ) -> PyResult<Self> {
                      let cloned_proxy = index_proxy.clone().unbind();
                      let initvec: Vec<_> =
                          rev_pyiter_collect_with_py_index(initrevs, index_proxy)?;
                      let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
                      // Safety: we don't leak the "faked" reference out of
                      // `UnsafePyLeaked`
                      let res_lazy = unsafe {
                          leaked_idx.map(py, |idx| {
                              VCGLazyAncestors::new(
                                  idx,
                                  initvec.into_iter().map(|r| r.0),
                                  stoprev.0,
                                  inclusive,
                              )
                          })
                      };
                      let lazy =
                          py_leaked_or_map_err(py, res_lazy, GraphError::from_vcsgraph)?;
                      Ok(Self {
                          inner: lazy.into(),
                          proxy_index: cloned_proxy,
                          initrevs: initrevs.clone().unbind(),
                          stoprev,
                          inclusive,
                      })
                  }
                  fn __bool__(slf: PyRef<'_, Self>) -> PyResult<bool> {
                      let leaked = slf.inner.read().map_err(map_lock_error)?;
                      // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
                      let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
                      Ok(!inner.is_empty())
                  }
                  fn __contains__(
                      slf: PyRefMut<'_, Self>,
                      obj: &Bound<'_, PyAny>,
                  ) -> PyResult<bool> {
                      PyRevision::extract_bound(obj).map_or(Ok(false), |rev| {
                          let mut leaked = slf.inner.write().map_err(map_lock_error)?;
                          // Safety: we don't leak the "faked" reference out of
                          // `UnsafePyLeaked`
                          let mut inner =
                              unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
                          inner.contains(rev.0).map_err(GraphError::from_vcsgraph)
                      })
                  }
                  fn __iter__(slf: PyRef<'_, Self>) -> PyResult<AncestorsIterator> {
                      let py = slf.py();
                      AncestorsIterator::new(
                          slf.proxy_index.clone_ref(py).bind(py),
                          slf.initrevs.clone_ref(py).bind(py),
                          slf.stoprev,
                          slf.inclusive,
                      )
                  }
              }
+             #[pyclass]
+             struct MissingAncestors {
+                 inner: RwLock<UnsafePyLeaked<CoreMissing<PySharedIndex>>>,
+                 proxy_index: PyObject,
+             }
+             #[pymethods]
+             impl MissingAncestors {
+                 #[new]
+                 fn new(
+                     index_proxy: &Bound<'_, PyAny>,
+                     bases: &Bound<'_, PyAny>,
+                 ) -> PyResult<Self> {
+                     let cloned_proxy = index_proxy.clone().unbind();
+                     let bases_vec: Vec<_> =
+                         rev_pyiter_collect_with_py_index(bases, index_proxy)?;
+                     let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
+                     // Safety: we don't leak the "faked" reference out of
+                     // `UnsafePyLeaked`
+                     let inner = unsafe {
+                         leaked_idx.map(py, |idx| CoreMissing::new(idx, bases_vec))
+                     };
+                     Ok(Self {
+                         inner: inner.into(),
+                         proxy_index: cloned_proxy,
+                     })
+                 }
+                 fn hasbases(slf: PyRef<'_, Self>) -> PyResult<bool> {
+                     let leaked = slf.inner.read().map_err(map_lock_error)?;
+                     // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+                     let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
+                     Ok(inner.has_bases())
+                 }
+                 fn addbases(
+                     slf: PyRefMut<'_, Self>,
+                     bases: &Bound<'_, PyAny>,
+                 ) -> PyResult<()> {
+                     let index_proxy = slf.proxy_index.bind(slf.py());
+                     let bases_vec: Vec<_> =
+                         rev_pyiter_collect_with_py_index(bases, index_proxy)?;
+                     let mut leaked = slf.inner.write().map_err(map_lock_error)?;
+                     // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+                     let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
+                     inner.add_bases(bases_vec);
+                     Ok(())
+                 }
+                 fn bases(slf: PyRef<'_, Self>) -> PyResult<HashSet<PyRevision>> {
+                     let leaked = slf.inner.read().map_err(map_lock_error)?;
+                     // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+                     let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
+                     Ok(inner.get_bases().iter().map(|r| PyRevision(r.0)).collect())
+                 }
+                 fn basesheads(slf: PyRef<'_, Self>) -> PyResult<HashSet<PyRevision>> {
+                     let leaked = slf.inner.read().map_err(map_lock_error)?;
+                     // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+                     let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
+                     Ok(inner
+                         .bases_heads()
+                         .map_err(GraphError::from_hg)?
+                         .iter()
+                         .map(|r| PyRevision(r.0))
+                         .collect())
+                 }
+                 fn removeancestorsfrom(
+                     slf: PyRef<'_, Self>,
+                     revs: &Bound<'_, PyAny>,
+                 ) -> PyResult<()> {
+                     // Original comment from hg-cpython:
+                     //   this is very lame: we convert to a Rust set, update it in place
+                     //   and then convert back to Python, only to have Python remove the
+                     //   excess (thankfully, Python is happy with a list or even an
+                     //   iterator)
+                     //   Leads to improve this:
+                     //    - have the CoreMissing instead do something emit revisions to
+                     //      discard
+                     //    - define a trait for sets of revisions in the core and implement
+                     //      it for a Python set rewrapped with the GIL marker
+                     // PyO3 additional comment: the trait approach would probably be
+                     // simpler because we can implement it without a Py wrappper, just
+                     // on &Bound<'py, PySet>
+                     let index_proxy = slf.proxy_index.bind(slf.py());
+                     let mut revs_set: HashSet<_> =
+                         rev_pyiter_collect_with_py_index(revs, index_proxy)?;
+                     let mut leaked = slf.inner.write().map_err(map_lock_error)?;
+                     // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+                     let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
+                     inner
+                         .remove_ancestors_from(&mut revs_set)
+                         .map_err(GraphError::from_hg)?;
+                     // convert as Python tuple and discard from original `revs`
+                     let remaining_tuple =
+                         PyTuple::new(slf.py(), revs_set.iter().map(|r| PyRevision(r.0)))?;
+                     revs.call_method("intersection_update", (remaining_tuple,), None)?;
+                     Ok(())
+                 }
+                 fn missingancestors(
+                     slf: PyRefMut<'_, Self>,
+                     bases: &Bound<'_, PyAny>,
+                 ) -> PyResult<Vec<PyRevision>> {
+                     let index_proxy = slf.proxy_index.bind(slf.py());
+                     let revs_vec: Vec<_> =
+                         rev_pyiter_collect_with_py_index(bases, index_proxy)?;
+                     let mut leaked = slf.inner.write().map_err(map_lock_error)?;
+                     // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+                     let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
+                     let missing_vec = inner
+                         .missing_ancestors(revs_vec)
+                         .map_err(GraphError::from_hg)?;
+                     Ok(missing_vec.iter().map(|r| PyRevision(r.0)).collect())
+                 }
+             }
              pub fn init_module<'py>(
                  py: Python<'py>,
                  package: &str,
              ) -> PyResult<Bound<'py, PyModule>> {
                  let m = new_submodule(py, package, "ancestor")?;
                  m.add_class::<AncestorsIterator>()?;
                  m.add_class::<LazyAncestors>()?;
+                 m.add_class::<MissingAncestors>()?;
                  Ok(m)
              }

tests/test-rust-ancestor.py

0 +6 -6

              import sys
              from mercurial.node import wdirrev
              from mercurial.testing import revlog as revlogtesting
              try:
                  from mercurial import pyo3_rustext, rustext
                  rustext.__name__  # trigger immediate actual import
                  pyo3_rustext.__name__
              except ImportError:
                  rustext = pyo3_rustext = None
              try:
                  from mercurial.cext import parsers as cparsers
              except ImportError:
                  cparsers = None
              class RustAncestorsTestMixin:
                  """Test the correctness of binding to Rust code.
                  This test is merely for the binding to Rust itself: extraction of
                  Python variable, giving back the results etc.
                  It is not meant to test the algorithmic correctness of the operations
                  on ancestors it provides. Hence the very simple embedded index data is
                  good enough.
                  Algorithmic correctness is asserted by the Rust unit tests.
                  At this point, we have two sets of bindings, in `hg-cpython` and
                  `hg-pyo3`. This class used to be for the first and now contains
                  the tests that are identical in both bindings. As of this writing,
                  there are more implementations in `hg-cpython` than `hg-pyo3`, hence
                  some more tests in the subclass for `hg-cpython`. When the work on PyO3
                  is complete, the subclasses for `hg-cpython` should have no specific
                  test left. Later on, when we remove the dead code in `hg-cpython`, the tests
                  should migrate from the mixin to the class for `hg-pyo3`, until we can
                  simply remove the mixin.
                  """
                  @classmethod
                  def ancestors_mod(cls):
                      return cls.rustext_pkg.ancestor
                  @classmethod
                  def dagop_mod(cls):
                      return cls.rustext_pkg.dagop
                  @classmethod
                  def graph_error(cls):
                      return cls.rustext_pkg.GraphError
                  def testiteratorrevlist(self):
                      AncestorsIterator = self.ancestors_mod().AncestorsIterator
                      idx = self.parserustindex()
                      # checking test assumption about the index binary data:
                      self.assertEqual(
                          {i: (r[5], r[6]) for i, r in enumerate(idx)},
                          {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
                      )
                      ait = AncestorsIterator(idx, [3], 0, True)
                      self.assertEqual([r for r in ait], [3, 2, 1, 0])
                      ait = AncestorsIterator(idx, [3], 0, False)
                      self.assertEqual([r for r in ait], [2, 1, 0])
                      ait = AncestorsIterator(idx, [3], 0, False)
                      # tainting the index with a mutation, let's see what happens
                      # (should be more critical with AncestorsIterator)
                      del idx[0:2]
                      try:
                          next(ait)
                      except RuntimeError as exc:
                          assert "leaked reference after mutation" in exc.args[0]
                      else:
                          raise AssertionError("Expected an exception")
                  def testlazyancestors(self):
                      LazyAncestors = self.ancestors_mod().LazyAncestors
                      idx = self.parserustindex()
                      start_count = sys.getrefcount(idx.inner)  # should be 2 (see Python doc)
                      self.assertEqual(
                          {i: (r[5], r[6]) for i, r in enumerate(idx)},
                          {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
                      )
                      lazy = LazyAncestors(idx, [3], 0, True)
                      # the LazyAncestors instance holds just one reference to the
                      # inner revlog. TODO check that this is normal
                      self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
                      self.assertTrue(2 in lazy)
                      self.assertTrue(bool(lazy))
                      self.assertFalse(None in lazy)
                      self.assertEqual(list(lazy), [3, 2, 1, 0])
                      # a second time to validate that we spawn new iterators
                      self.assertEqual(list(lazy), [3, 2, 1, 0])
                      # now let's watch the refcounts closer
                      ait = iter(lazy)
                      self.assertEqual(sys.getrefcount(idx.inner), start_count + 2)
                      del ait
                      self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
                      del lazy
                      self.assertEqual(sys.getrefcount(idx.inner), start_count)
                      # let's check bool for an empty one
                      self.assertFalse(LazyAncestors(idx, [0], 0, False))
                  def testrefcount(self):
                      AncestorsIterator = self.ancestors_mod().AncestorsIterator
                      idx = self.parserustindex()
                      start_count = sys.getrefcount(idx.inner)
                      # refcount increases upon iterator init...
                      ait = AncestorsIterator(idx, [3], 0, True)
                      self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
                      self.assertEqual(next(ait), 3)
                      # and decreases once the iterator is removed
                      del ait
                      self.assertEqual(sys.getrefcount(idx.inner), start_count)
                      # and removing ref to the index after iterator init is no issue
                      ait = AncestorsIterator(idx, [3], 0, True)
                      del idx
                      self.assertEqual(list(ait), [3, 2, 1, 0])
                      # the index is not tracked by the GC, hence there is nothing more
                      # we can assert to check that it is properly deleted once its refcount
                      # drops to 0
                  def testgrapherror(self):
                      AncestorsIterator = self.ancestors_mod().AncestorsIterator
                      GraphError = self.graph_error()
                      data = (
                          revlogtesting.data_non_inlined[: 64 + 27]
                          + b'\xf2'
                          + revlogtesting.data_non_inlined[64 + 28 :]
                      )
                      idx = self.parserustindex(data=data)
                      with self.assertRaises(GraphError) as arc:
                          AncestorsIterator(idx, [1], -1, False)
                      exc = arc.exception
                      self.assertIsInstance(exc, ValueError)
                      # rust-cpython issues appropriate str instances for Python 2 and 3
                      self.assertEqual(exc.args, ('ParentOutOfRange', 1))
                  def testwdirunsupported(self):
                      AncestorsIterator = self.ancestors_mod().AncestorsIterator
                      GraphError = self.graph_error()
                      # trying to access ancestors of the working directory raises
                      idx = self.parserustindex()
                      with self.assertRaises(GraphError) as arc:
                          list(AncestorsIterator(idx, [wdirrev], -1, False))
                      exc = arc.exception
                      self.assertIsInstance(exc, ValueError)
                      # rust-cpython issues appropriate str instances for Python 2 and 3
                      self.assertEqual(exc.args, ('InvalidRevision', wdirrev))
                  def testheadrevs(self):
                      dagop = self.dagop_mod()
                      idx = self.parserustindex()
                      self.assertEqual(dagop.headrevs(idx, [1, 2, 3]), {3})
-             class RustCPythonAncestorsTest(
-                 revlogtesting.RustRevlogBasedTestBase, RustAncestorsTestMixin
-             ):
-                 rustext_pkg = rustext
                  def testmissingancestors(self):
                      MissingAncestors = self.ancestors_mod().MissingAncestors
                      idx = self.parserustindex()
                      missanc = MissingAncestors(idx, [1])
                      self.assertTrue(missanc.hasbases())
                      self.assertEqual(missanc.missingancestors([3]), [2, 3])
                      missanc.addbases({2})
                      self.assertEqual(missanc.bases(), {1, 2})
                      self.assertEqual(missanc.missingancestors([3]), [3])
                      self.assertEqual(missanc.basesheads(), {2})
                  def testmissingancestorsremove(self):
                      MissingAncestors = self.ancestors_mod().MissingAncestors
                      idx = self.parserustindex()
                      missanc = MissingAncestors(idx, [1])
                      revs = {0, 1, 2, 3}
                      missanc.removeancestorsfrom(revs)
                      self.assertEqual(revs, {2, 3})
+             class RustCPythonAncestorsTest(
+                 revlogtesting.RustRevlogBasedTestBase, RustAncestorsTestMixin
+             ):
+                 rustext_pkg = rustext
              class PyO3AncestorsTest(
                  revlogtesting.RustRevlogBasedTestBase, RustAncestorsTestMixin
              ):
                  rustext_pkg = pyo3_rustext
                  def test_rank(self):
                      dagop = self.dagop_mod()
                      idx = self.parserustindex()
                      try:
                          dagop.rank(idx, 1, 2)
                      except pyo3_rustext.GraphError as exc:
                          self.assertEqual(exc.args, ("InconsistentGraphData",))
              if __name__ == '__main__':
                  import silenttestrunner
                  silenttestrunner.main(__name__)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages