# HG changeset patch # User Simon Sapin # Date 2021-09-06 11:39:54 # Node ID 8f031a274cd60c6833b142f8ea274bcf8274b43f # Parent 9cd35c8c60449273adcbdf2373b8604f7775a0f1 rust: Move PyBytesWithData out of copy-tracing code So we can use it in other places to. Replace its `.data()` method with the `Deref` trait, allowing this type to be used in generic contexts. Rename the type accordingly. Differential Revision: https://phab.mercurial-scm.org/D11395 diff --git a/rust/hg-cpython/src/copy_tracing.rs b/rust/hg-cpython/src/copy_tracing.rs --- a/rust/hg-cpython/src/copy_tracing.rs +++ b/rust/hg-cpython/src/copy_tracing.rs @@ -13,58 +13,7 @@ use hg::copy_tracing::ChangedFiles; use hg::copy_tracing::CombineChangesetCopies; use hg::Revision; -use self::pybytes_with_data::PyBytesWithData; - -// Module to encapsulate private fields -mod pybytes_with_data { - use cpython::{PyBytes, Python}; - - /// Safe abstraction over a `PyBytes` together with the `&[u8]` slice - /// that borrows it. - /// - /// Calling `PyBytes::data` requires a GIL marker but we want to access the - /// data in a thread that (ideally) does not need to acquire the GIL. - /// This type allows separating the call an the use. - pub(super) struct PyBytesWithData { - #[allow(unused)] - keep_alive: PyBytes, - - /// Borrows the buffer inside `self.keep_alive`, - /// but the borrow-checker cannot express self-referential structs. - data: *const [u8], - } - - fn require_send() {} - - #[allow(unused)] - fn static_assert_pybytes_is_send() { - require_send::; - } - - // Safety: PyBytes is Send. Raw pointers are not by default, - // but here sending one to another thread is fine since we ensure it stays - // valid. - unsafe impl Send for PyBytesWithData {} - - impl PyBytesWithData { - pub fn new(py: Python, bytes: PyBytes) -> Self { - Self { - data: bytes.data(py), - keep_alive: bytes, - } - } - - pub fn data(&self) -> &[u8] { - // Safety: the raw pointer is valid as long as the PyBytes is still - // alive, and the returned slice borrows `self`. - unsafe { &*self.data } - } - - pub fn unwrap(self) -> PyBytes { - self.keep_alive - } - } -} +use crate::pybytes_deref::PyBytesDeref; /// Combines copies information contained into revision `revs` to build a copy /// map. @@ -123,7 +72,7 @@ pub fn combine_changeset_copies_wrapper( // // TODO: tweak the bound? let (rev_info_sender, rev_info_receiver) = - crossbeam_channel::bounded::>(1000); + crossbeam_channel::bounded::>(1000); // This channel (going the other way around) however is unbounded. // If they were both bounded, there might potentially be deadlocks @@ -143,7 +92,7 @@ pub fn combine_changeset_copies_wrapper( CombineChangesetCopies::new(children_count); for (rev, p1, p2, opt_bytes) in rev_info_receiver { let files = match &opt_bytes { - Some(raw) => ChangedFiles::new(raw.data()), + Some(raw) => ChangedFiles::new(raw.as_ref()), // Python None was extracted to Option::None, // meaning there was no copy data. None => ChangedFiles::new_empty(), @@ -169,7 +118,7 @@ pub fn combine_changeset_copies_wrapper( for rev_info in revs_info { let (rev, p1, p2, opt_bytes) = rev_info?; - let opt_bytes = opt_bytes.map(|b| PyBytesWithData::new(py, b)); + let opt_bytes = opt_bytes.map(|b| PyBytesDeref::new(py, b)); // We’d prefer to avoid the child thread calling into Python code, // but this avoids a potential deadlock on the GIL if it does: diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs +++ b/rust/hg-cpython/src/lib.rs @@ -36,6 +36,7 @@ pub mod dirstate; pub mod discovery; pub mod exceptions; pub mod parsers; +mod pybytes_deref; pub mod revlog; pub mod utils; diff --git a/rust/hg-cpython/src/pybytes_deref.rs b/rust/hg-cpython/src/pybytes_deref.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/pybytes_deref.rs @@ -0,0 +1,53 @@ +use cpython::{PyBytes, Python}; + +/// Safe abstraction over a `PyBytes` together with the `&[u8]` slice +/// that borrows it. Implements `Deref`. +/// +/// Calling `PyBytes::data` requires a GIL marker but we want to access the +/// data in a thread that (ideally) does not need to acquire the GIL. +/// This type allows separating the call an the use. +/// +/// It also enables using a (wrapped) `PyBytes` in GIL-unaware generic code. +pub struct PyBytesDeref { + #[allow(unused)] + keep_alive: PyBytes, + + /// Borrows the buffer inside `self.keep_alive`, + /// but the borrow-checker cannot express self-referential structs. + data: *const [u8], +} + +impl PyBytesDeref { + pub fn new(py: Python, bytes: PyBytes) -> Self { + Self { + data: bytes.data(py), + keep_alive: bytes, + } + } + + pub fn unwrap(self) -> PyBytes { + self.keep_alive + } +} + +impl std::ops::Deref for PyBytesDeref { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + // Safety: the raw pointer is valid as long as the PyBytes is still + // alive, and the returned slice borrows `self`. + unsafe { &*self.data } + } +} + +fn require_send() {} + +#[allow(unused)] +fn static_assert_pybytes_is_send() { + require_send::; +} + +// Safety: PyBytes is Send. Raw pointers are not by default, +// but here sending one to another thread is fine since we ensure it stays +// valid. +unsafe impl Send for PyBytesDeref {}