##// END OF EJS Templates
rust-pyo3: MissingAncestors
Georges Racinet -
r53432:507fec66 default
parent child Browse files
Show More
@@ -1,164 +1,292
1 1 // ancestors.rs
2 2 //
3 3 // Copyright 2024 Georges Racinet <georges.racinet@cloudcrane.io>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::ancestors` module provided by the
9 9 //! `hg-core` crate. From Python, this will be seen as `pyo3_rustext.ancestor`
10 10 //! and can be used as replacement for the the pure `ancestor` Python module.
11 11 use cpython::UnsafePyLeaked;
12 12 use pyo3::prelude::*;
13 use pyo3::types::PyTuple;
13 14
15 use std::collections::HashSet;
14 16 use std::sync::RwLock;
15 17
18 use hg::MissingAncestors as CoreMissing;
16 19 use vcsgraph::lazy_ancestors::{
17 20 AncestorsIterator as VCGAncestorsIterator,
18 21 LazyAncestors as VCGLazyAncestors,
19 22 };
20 23
21 24 use crate::convert_cpython::{
22 25 proxy_index_py_leak, py_leaked_borrow, py_leaked_borrow_mut,
23 26 py_leaked_or_map_err,
24 27 };
25 28 use crate::exceptions::{map_lock_error, GraphError};
26 29 use crate::revision::{rev_pyiter_collect_with_py_index, PyRevision};
27 30 use crate::util::new_submodule;
28 31 use rusthg::revlog::PySharedIndex;
29 32
30 33 #[pyclass]
31 34 struct AncestorsIterator {
32 35 inner: RwLock<UnsafePyLeaked<VCGAncestorsIterator<PySharedIndex>>>,
33 36 }
34 37
35 38 #[pymethods]
36 39 impl AncestorsIterator {
37 40 #[new]
38 41 fn new(
39 42 index_proxy: &Bound<'_, PyAny>,
40 43 initrevs: &Bound<'_, PyAny>,
41 44 stoprev: PyRevision,
42 45 inclusive: bool,
43 46 ) -> PyResult<Self> {
44 47 let initvec: Vec<_> =
45 48 rev_pyiter_collect_with_py_index(initrevs, index_proxy)?;
46 49 let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
47 50 let res_ait = unsafe {
48 51 leaked_idx.map(py, |idx| {
49 52 VCGAncestorsIterator::new(
50 53 idx,
51 54 initvec.into_iter().map(|r| r.0),
52 55 stoprev.0,
53 56 inclusive,
54 57 )
55 58 })
56 59 };
57 60 let ait =
58 61 py_leaked_or_map_err(py, res_ait, GraphError::from_vcsgraph)?;
59 62 let inner = ait.into();
60 63 Ok(Self { inner })
61 64 }
62 65
63 66 fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
64 67 slf
65 68 }
66 69
67 70 fn __next__(slf: PyRefMut<'_, Self>) -> PyResult<Option<PyRevision>> {
68 71 let mut leaked = slf.inner.write().map_err(map_lock_error)?;
69 72 // Safety: we don't leak the inner 'static ref out of UnsafePyLeaked
70 73 let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked)? };
71 74 match inner.next() {
72 75 Some(Err(e)) => Err(GraphError::from_vcsgraph(e)),
73 76 None => Ok(None),
74 77 Some(Ok(r)) => Ok(Some(PyRevision(r))),
75 78 }
76 79 }
77 80 }
78 81
79 82 #[pyclass(sequence)]
80 83 struct LazyAncestors {
81 84 inner: RwLock<UnsafePyLeaked<VCGLazyAncestors<PySharedIndex>>>,
82 85 proxy_index: PyObject,
83 86 initrevs: PyObject,
84 87 stoprev: PyRevision,
85 88 inclusive: bool,
86 89 }
87 90
88 91 #[pymethods]
89 92 impl LazyAncestors {
90 93 #[new]
91 94 fn new(
92 95 index_proxy: &Bound<'_, PyAny>,
93 96 initrevs: &Bound<'_, PyAny>,
94 97 stoprev: PyRevision,
95 98 inclusive: bool,
96 99 ) -> PyResult<Self> {
97 100 let cloned_proxy = index_proxy.clone().unbind();
98 101 let initvec: Vec<_> =
99 102 rev_pyiter_collect_with_py_index(initrevs, index_proxy)?;
100 103 let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
101 104 // Safety: we don't leak the "faked" reference out of
102 105 // `UnsafePyLeaked`
103 106 let res_lazy = unsafe {
104 107 leaked_idx.map(py, |idx| {
105 108 VCGLazyAncestors::new(
106 109 idx,
107 110 initvec.into_iter().map(|r| r.0),
108 111 stoprev.0,
109 112 inclusive,
110 113 )
111 114 })
112 115 };
113 116 let lazy =
114 117 py_leaked_or_map_err(py, res_lazy, GraphError::from_vcsgraph)?;
115 118 Ok(Self {
116 119 inner: lazy.into(),
117 120 proxy_index: cloned_proxy,
118 121 initrevs: initrevs.clone().unbind(),
119 122 stoprev,
120 123 inclusive,
121 124 })
122 125 }
123 126
124 127 fn __bool__(slf: PyRef<'_, Self>) -> PyResult<bool> {
125 128 let leaked = slf.inner.read().map_err(map_lock_error)?;
126 129 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
127 130 let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
128 131 Ok(!inner.is_empty())
129 132 }
130 133
131 134 fn __contains__(
132 135 slf: PyRefMut<'_, Self>,
133 136 obj: &Bound<'_, PyAny>,
134 137 ) -> PyResult<bool> {
135 138 PyRevision::extract_bound(obj).map_or(Ok(false), |rev| {
136 139 let mut leaked = slf.inner.write().map_err(map_lock_error)?;
137 140 // Safety: we don't leak the "faked" reference out of
138 141 // `UnsafePyLeaked`
139 142 let mut inner =
140 143 unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
141 144 inner.contains(rev.0).map_err(GraphError::from_vcsgraph)
142 145 })
143 146 }
144 147
145 148 fn __iter__(slf: PyRef<'_, Self>) -> PyResult<AncestorsIterator> {
146 149 let py = slf.py();
147 150 AncestorsIterator::new(
148 151 slf.proxy_index.clone_ref(py).bind(py),
149 152 slf.initrevs.clone_ref(py).bind(py),
150 153 slf.stoprev,
151 154 slf.inclusive,
152 155 )
153 156 }
154 157 }
155 158
159 #[pyclass]
160 struct MissingAncestors {
161 inner: RwLock<UnsafePyLeaked<CoreMissing<PySharedIndex>>>,
162 proxy_index: PyObject,
163 }
164
165 #[pymethods]
166 impl MissingAncestors {
167 #[new]
168 fn new(
169 index_proxy: &Bound<'_, PyAny>,
170 bases: &Bound<'_, PyAny>,
171 ) -> PyResult<Self> {
172 let cloned_proxy = index_proxy.clone().unbind();
173 let bases_vec: Vec<_> =
174 rev_pyiter_collect_with_py_index(bases, index_proxy)?;
175 let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
176
177 // Safety: we don't leak the "faked" reference out of
178 // `UnsafePyLeaked`
179 let inner = unsafe {
180 leaked_idx.map(py, |idx| CoreMissing::new(idx, bases_vec))
181 };
182 Ok(Self {
183 inner: inner.into(),
184 proxy_index: cloned_proxy,
185 })
186 }
187
188 fn hasbases(slf: PyRef<'_, Self>) -> PyResult<bool> {
189 let leaked = slf.inner.read().map_err(map_lock_error)?;
190 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
191 let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
192 Ok(inner.has_bases())
193 }
194
195 fn addbases(
196 slf: PyRefMut<'_, Self>,
197 bases: &Bound<'_, PyAny>,
198 ) -> PyResult<()> {
199 let index_proxy = slf.proxy_index.bind(slf.py());
200 let bases_vec: Vec<_> =
201 rev_pyiter_collect_with_py_index(bases, index_proxy)?;
202
203 let mut leaked = slf.inner.write().map_err(map_lock_error)?;
204 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
205 let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
206 inner.add_bases(bases_vec);
207 Ok(())
208 }
209
210 fn bases(slf: PyRef<'_, Self>) -> PyResult<HashSet<PyRevision>> {
211 let leaked = slf.inner.read().map_err(map_lock_error)?;
212 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
213 let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
214 Ok(inner.get_bases().iter().map(|r| PyRevision(r.0)).collect())
215 }
216
217 fn basesheads(slf: PyRef<'_, Self>) -> PyResult<HashSet<PyRevision>> {
218 let leaked = slf.inner.read().map_err(map_lock_error)?;
219 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
220 let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
221 Ok(inner
222 .bases_heads()
223 .map_err(GraphError::from_hg)?
224 .iter()
225 .map(|r| PyRevision(r.0))
226 .collect())
227 }
228
229 fn removeancestorsfrom(
230 slf: PyRef<'_, Self>,
231 revs: &Bound<'_, PyAny>,
232 ) -> PyResult<()> {
233 // Original comment from hg-cpython:
234 // this is very lame: we convert to a Rust set, update it in place
235 // and then convert back to Python, only to have Python remove the
236 // excess (thankfully, Python is happy with a list or even an
237 // iterator)
238 // Leads to improve this:
239 // - have the CoreMissing instead do something emit revisions to
240 // discard
241 // - define a trait for sets of revisions in the core and implement
242 // it for a Python set rewrapped with the GIL marker
243 // PyO3 additional comment: the trait approach would probably be
244 // simpler because we can implement it without a Py wrappper, just
245 // on &Bound<'py, PySet>
246 let index_proxy = slf.proxy_index.bind(slf.py());
247 let mut revs_set: HashSet<_> =
248 rev_pyiter_collect_with_py_index(revs, index_proxy)?;
249
250 let mut leaked = slf.inner.write().map_err(map_lock_error)?;
251 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
252 let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
253
254 inner
255 .remove_ancestors_from(&mut revs_set)
256 .map_err(GraphError::from_hg)?;
257 // convert as Python tuple and discard from original `revs`
258 let remaining_tuple =
259 PyTuple::new(slf.py(), revs_set.iter().map(|r| PyRevision(r.0)))?;
260 revs.call_method("intersection_update", (remaining_tuple,), None)?;
261 Ok(())
262 }
263
264 fn missingancestors(
265 slf: PyRefMut<'_, Self>,
266 bases: &Bound<'_, PyAny>,
267 ) -> PyResult<Vec<PyRevision>> {
268 let index_proxy = slf.proxy_index.bind(slf.py());
269 let revs_vec: Vec<_> =
270 rev_pyiter_collect_with_py_index(bases, index_proxy)?;
271
272 let mut leaked = slf.inner.write().map_err(map_lock_error)?;
273 // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
274 let mut inner = unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
275
276 let missing_vec = inner
277 .missing_ancestors(revs_vec)
278 .map_err(GraphError::from_hg)?;
279 Ok(missing_vec.iter().map(|r| PyRevision(r.0)).collect())
280 }
281 }
282
156 283 pub fn init_module<'py>(
157 284 py: Python<'py>,
158 285 package: &str,
159 286 ) -> PyResult<Bound<'py, PyModule>> {
160 287 let m = new_submodule(py, package, "ancestor")?;
161 288 m.add_class::<AncestorsIterator>()?;
162 289 m.add_class::<LazyAncestors>()?;
290 m.add_class::<MissingAncestors>()?;
163 291 Ok(m)
164 292 }
@@ -1,221 +1,221
1 1 import sys
2 2
3 3 from mercurial.node import wdirrev
4 4
5 5 from mercurial.testing import revlog as revlogtesting
6 6
7 7 try:
8 8 from mercurial import pyo3_rustext, rustext
9 9
10 10 rustext.__name__ # trigger immediate actual import
11 11 pyo3_rustext.__name__
12 12 except ImportError:
13 13 rustext = pyo3_rustext = None
14 14
15 15 try:
16 16 from mercurial.cext import parsers as cparsers
17 17 except ImportError:
18 18 cparsers = None
19 19
20 20
21 21 class RustAncestorsTestMixin:
22 22 """Test the correctness of binding to Rust code.
23 23
24 24 This test is merely for the binding to Rust itself: extraction of
25 25 Python variable, giving back the results etc.
26 26
27 27 It is not meant to test the algorithmic correctness of the operations
28 28 on ancestors it provides. Hence the very simple embedded index data is
29 29 good enough.
30 30
31 31 Algorithmic correctness is asserted by the Rust unit tests.
32 32
33 33 At this point, we have two sets of bindings, in `hg-cpython` and
34 34 `hg-pyo3`. This class used to be for the first and now contains
35 35 the tests that are identical in both bindings. As of this writing,
36 36 there are more implementations in `hg-cpython` than `hg-pyo3`, hence
37 37 some more tests in the subclass for `hg-cpython`. When the work on PyO3
38 38 is complete, the subclasses for `hg-cpython` should have no specific
39 39 test left. Later on, when we remove the dead code in `hg-cpython`, the tests
40 40 should migrate from the mixin to the class for `hg-pyo3`, until we can
41 41 simply remove the mixin.
42 42 """
43 43
44 44 @classmethod
45 45 def ancestors_mod(cls):
46 46 return cls.rustext_pkg.ancestor
47 47
48 48 @classmethod
49 49 def dagop_mod(cls):
50 50 return cls.rustext_pkg.dagop
51 51
52 52 @classmethod
53 53 def graph_error(cls):
54 54 return cls.rustext_pkg.GraphError
55 55
56 56 def testiteratorrevlist(self):
57 57 AncestorsIterator = self.ancestors_mod().AncestorsIterator
58 58
59 59 idx = self.parserustindex()
60 60 # checking test assumption about the index binary data:
61 61 self.assertEqual(
62 62 {i: (r[5], r[6]) for i, r in enumerate(idx)},
63 63 {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
64 64 )
65 65 ait = AncestorsIterator(idx, [3], 0, True)
66 66 self.assertEqual([r for r in ait], [3, 2, 1, 0])
67 67
68 68 ait = AncestorsIterator(idx, [3], 0, False)
69 69 self.assertEqual([r for r in ait], [2, 1, 0])
70 70
71 71 ait = AncestorsIterator(idx, [3], 0, False)
72 72 # tainting the index with a mutation, let's see what happens
73 73 # (should be more critical with AncestorsIterator)
74 74 del idx[0:2]
75 75 try:
76 76 next(ait)
77 77 except RuntimeError as exc:
78 78 assert "leaked reference after mutation" in exc.args[0]
79 79 else:
80 80 raise AssertionError("Expected an exception")
81 81
82 82 def testlazyancestors(self):
83 83 LazyAncestors = self.ancestors_mod().LazyAncestors
84 84
85 85 idx = self.parserustindex()
86 86 start_count = sys.getrefcount(idx.inner) # should be 2 (see Python doc)
87 87 self.assertEqual(
88 88 {i: (r[5], r[6]) for i, r in enumerate(idx)},
89 89 {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
90 90 )
91 91 lazy = LazyAncestors(idx, [3], 0, True)
92 92 # the LazyAncestors instance holds just one reference to the
93 93 # inner revlog. TODO check that this is normal
94 94 self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
95 95
96 96 self.assertTrue(2 in lazy)
97 97 self.assertTrue(bool(lazy))
98 98 self.assertFalse(None in lazy)
99 99 self.assertEqual(list(lazy), [3, 2, 1, 0])
100 100 # a second time to validate that we spawn new iterators
101 101 self.assertEqual(list(lazy), [3, 2, 1, 0])
102 102
103 103 # now let's watch the refcounts closer
104 104 ait = iter(lazy)
105 105 self.assertEqual(sys.getrefcount(idx.inner), start_count + 2)
106 106 del ait
107 107 self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
108 108 del lazy
109 109 self.assertEqual(sys.getrefcount(idx.inner), start_count)
110 110
111 111 # let's check bool for an empty one
112 112 self.assertFalse(LazyAncestors(idx, [0], 0, False))
113 113
114 114 def testrefcount(self):
115 115 AncestorsIterator = self.ancestors_mod().AncestorsIterator
116 116
117 117 idx = self.parserustindex()
118 118 start_count = sys.getrefcount(idx.inner)
119 119
120 120 # refcount increases upon iterator init...
121 121 ait = AncestorsIterator(idx, [3], 0, True)
122 122 self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
123 123 self.assertEqual(next(ait), 3)
124 124
125 125 # and decreases once the iterator is removed
126 126 del ait
127 127 self.assertEqual(sys.getrefcount(idx.inner), start_count)
128 128
129 129 # and removing ref to the index after iterator init is no issue
130 130 ait = AncestorsIterator(idx, [3], 0, True)
131 131 del idx
132 132 self.assertEqual(list(ait), [3, 2, 1, 0])
133 133
134 134 # the index is not tracked by the GC, hence there is nothing more
135 135 # we can assert to check that it is properly deleted once its refcount
136 136 # drops to 0
137 137
138 138 def testgrapherror(self):
139 139 AncestorsIterator = self.ancestors_mod().AncestorsIterator
140 140 GraphError = self.graph_error()
141 141
142 142 data = (
143 143 revlogtesting.data_non_inlined[: 64 + 27]
144 144 + b'\xf2'
145 145 + revlogtesting.data_non_inlined[64 + 28 :]
146 146 )
147 147 idx = self.parserustindex(data=data)
148 148 with self.assertRaises(GraphError) as arc:
149 149 AncestorsIterator(idx, [1], -1, False)
150 150 exc = arc.exception
151 151 self.assertIsInstance(exc, ValueError)
152 152 # rust-cpython issues appropriate str instances for Python 2 and 3
153 153 self.assertEqual(exc.args, ('ParentOutOfRange', 1))
154 154
155 155 def testwdirunsupported(self):
156 156 AncestorsIterator = self.ancestors_mod().AncestorsIterator
157 157 GraphError = self.graph_error()
158 158
159 159 # trying to access ancestors of the working directory raises
160 160 idx = self.parserustindex()
161 161 with self.assertRaises(GraphError) as arc:
162 162 list(AncestorsIterator(idx, [wdirrev], -1, False))
163 163
164 164 exc = arc.exception
165 165 self.assertIsInstance(exc, ValueError)
166 166 # rust-cpython issues appropriate str instances for Python 2 and 3
167 167 self.assertEqual(exc.args, ('InvalidRevision', wdirrev))
168 168
169 169 def testheadrevs(self):
170 170 dagop = self.dagop_mod()
171 171
172 172 idx = self.parserustindex()
173 173 self.assertEqual(dagop.headrevs(idx, [1, 2, 3]), {3})
174 174
175
176 class RustCPythonAncestorsTest(
177 revlogtesting.RustRevlogBasedTestBase, RustAncestorsTestMixin
178 ):
179 rustext_pkg = rustext
180
181 175 def testmissingancestors(self):
182 176 MissingAncestors = self.ancestors_mod().MissingAncestors
183 177
184 178 idx = self.parserustindex()
185 179 missanc = MissingAncestors(idx, [1])
186 180 self.assertTrue(missanc.hasbases())
187 181 self.assertEqual(missanc.missingancestors([3]), [2, 3])
188 182 missanc.addbases({2})
189 183 self.assertEqual(missanc.bases(), {1, 2})
190 184 self.assertEqual(missanc.missingancestors([3]), [3])
191 185 self.assertEqual(missanc.basesheads(), {2})
192 186
193 187 def testmissingancestorsremove(self):
194 188 MissingAncestors = self.ancestors_mod().MissingAncestors
195 189
196 190 idx = self.parserustindex()
197 191 missanc = MissingAncestors(idx, [1])
198 192 revs = {0, 1, 2, 3}
199 193 missanc.removeancestorsfrom(revs)
200 194 self.assertEqual(revs, {2, 3})
201 195
202 196
197 class RustCPythonAncestorsTest(
198 revlogtesting.RustRevlogBasedTestBase, RustAncestorsTestMixin
199 ):
200 rustext_pkg = rustext
201
202
203 203 class PyO3AncestorsTest(
204 204 revlogtesting.RustRevlogBasedTestBase, RustAncestorsTestMixin
205 205 ):
206 206 rustext_pkg = pyo3_rustext
207 207
208 208 def test_rank(self):
209 209 dagop = self.dagop_mod()
210 210
211 211 idx = self.parserustindex()
212 212 try:
213 213 dagop.rank(idx, 1, 2)
214 214 except pyo3_rustext.GraphError as exc:
215 215 self.assertEqual(exc.args, ("InconsistentGraphData",))
216 216
217 217
218 218 if __name__ == '__main__':
219 219 import silenttestrunner
220 220
221 221 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now