diff --git a/rust/hg-core/src/revlog/index.rs b/rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs +++ b/rust/hg-core/src/revlog/index.rs @@ -123,6 +123,10 @@ impl IndexData { } Ok(()) } + + fn is_new(&self) -> bool { + self.bytes.is_empty() + } } impl std::ops::Index> for IndexData { @@ -146,6 +150,7 @@ impl std::ops::Index Self { + Self { + flags: 0, + data_offset: 0, + data_compressed_length: 0, + data_uncompressed_length: 0, + data_delta_base: -1, + link_rev: -1, + parent_rev_1: -1, + parent_rev_2: -1, + node_id: [0; NODE_BYTES_LENGTH], + _sidedata_offset: 0, + _sidedata_compressed_length: 0, + data_compression_mode: COMPRESSION_MODE_INLINE, + _sidedata_compression_mode: COMPRESSION_MODE_INLINE, + _rank: -1, + } + } +} + #[derive(BytesCast)] #[repr(C)] pub struct RevisionDataV1 { @@ -397,6 +423,29 @@ impl Index { }) } + pub fn entry_as_params( + &self, + rev: UncheckedRevision, + ) -> Option { + let rev = self.check_revision(rev)?; + self.get_entry(rev).map(|e| RevisionDataParams { + flags: e.flags(), + data_offset: if rev.0 == 0 && !self.bytes.is_new() { + e.flags() as u64 + } else { + e.raw_offset() + }, + data_compressed_length: e.compressed_len().try_into().unwrap(), + data_uncompressed_length: e.uncompressed_len(), + data_delta_base: e.base_revision_or_base_of_delta_chain().0, + link_rev: e.link_revision().0, + parent_rev_1: e.p1().0, + parent_rev_2: e.p2().0, + node_id: e.hash().as_bytes().try_into().unwrap(), + ..Default::default() + }) + } + fn get_entry_inline( &self, rev: Revision, @@ -519,6 +568,9 @@ impl<'a> IndexEntry<'a> { BigEndian::read_u64(&bytes[..]) as usize } } + pub fn raw_offset(&self) -> u64 { + BigEndian::read_u64(&self.bytes[0..8]) + } pub fn flags(&self) -> u16 { BigEndian::read_u16(&self.bytes[6..=7]) diff --git a/rust/hg-cpython/src/revlog.rs b/rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs +++ b/rust/hg-cpython/src/revlog.rs @@ -17,11 +17,10 @@ use cpython::{ PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, }; use hg::{ - index::IndexHeader, - index::{RevisionDataParams, COMPRESSION_MODE_INLINE}, + index::{IndexHeader, RevisionDataParams}, nodemap::{Block, NodeMapError, NodeTree}, revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, - BaseRevision, Revision, UncheckedRevision, + BaseRevision, Revision, UncheckedRevision, NULL_REVISION, }; use std::cell::RefCell; @@ -237,11 +236,6 @@ py_class!(pub class MixedIndex |py| { Ok(rust_res) } - /// get an index entry - def get(&self, *args, **kw) -> PyResult { - self.call_cindex(py, "get", args, kw) - } - /// compute phases def computephasesmapsets(&self, *args, **kw) -> PyResult { self.call_cindex(py, "computephasesmapsets", args, kw) @@ -292,23 +286,32 @@ py_class!(pub class MixedIndex |py| { // Since we call back through the high level Python API, // there's no point making a distinction between index_get // and index_getitem. + // gracinet 2023: this above is no longer true for the pure Rust impl def __len__(&self) -> PyResult { self.len(py) } def __getitem__(&self, key: PyObject) -> PyResult { + let rust_res = self.inner_getitem(py, key.clone_ref(py))?; + // this conversion seems needless, but that's actually because // `index_getitem` does not handle conversion from PyLong, // which expressions such as [e for e in index] internally use. // Note that we don't seem to have a direct way to call // PySequence_GetItem (does the job), which would possibly be better // for performance - let key = match key.extract::(py) { + // gracinet 2023: the above comment can be removed when we use + // the pure Rust impl only. Note also that `key` can be a binary + // node id. + let c_key = match key.extract::(py) { Ok(rev) => rev.to_py_object(py).into_object(), Err(_) => key, }; - self.cindex(py).borrow().inner().get_item(py, key) + let c_res = self.cindex(py).borrow().inner().get_item(py, c_key)?; + + assert_py_eq(py, "__getitem__", &rust_res, &c_res)?; + Ok(rust_res) } def __contains__(&self, item: PyObject) -> PyResult { @@ -426,13 +429,49 @@ fn py_tuple_to_revision_data_params( parent_rev_1: tuple.get_item(py, 5).extract(py)?, parent_rev_2: tuple.get_item(py, 6).extract(py)?, node_id, - _sidedata_offset: 0, - _sidedata_compressed_length: 0, - data_compression_mode: COMPRESSION_MODE_INLINE, - _sidedata_compression_mode: COMPRESSION_MODE_INLINE, - _rank: -1, + ..Default::default() }) } +fn revision_data_params_to_py_tuple( + py: Python, + params: RevisionDataParams, +) -> PyTuple { + PyTuple::new( + py, + &[ + params.data_offset.into_py_object(py).into_object(), + params + .data_compressed_length + .into_py_object(py) + .into_object(), + params + .data_uncompressed_length + .into_py_object(py) + .into_object(), + params.data_delta_base.into_py_object(py).into_object(), + params.link_rev.into_py_object(py).into_object(), + params.parent_rev_1.into_py_object(py).into_object(), + params.parent_rev_2.into_py_object(py).into_object(), + PyBytes::new(py, ¶ms.node_id) + .into_py_object(py) + .into_object(), + params._sidedata_offset.into_py_object(py).into_object(), + params + ._sidedata_compressed_length + .into_py_object(py) + .into_object(), + params + .data_compression_mode + .into_py_object(py) + .into_object(), + params + ._sidedata_compression_mode + .into_py_object(py) + .into_object(), + params._rank.into_py_object(py).into_object(), + ], + ) +} impl MixedIndex { fn new( @@ -601,6 +640,34 @@ impl MixedIndex { Ok(py.None()) } + + fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult { + let idx = self.index(py).borrow(); + Ok(match key.extract::(py) { + Ok(key_as_int) => { + let entry_params = if key_as_int == NULL_REVISION.0 { + RevisionDataParams::default() + } else { + let rev = UncheckedRevision(key_as_int); + match idx.entry_as_params(rev) { + Some(e) => e, + None => { + return Err(PyErr::new::( + py, + "revlog index out of range", + )); + } + } + }; + revision_data_params_to_py_tuple(py, entry_params) + .into_object() + } + _ => self.get_rev(py, key.extract::(py)?)?.map_or_else( + || py.None(), + |py_rev| py_rev.into_py_object(py).into_object(), + ), + }) + } } fn revlog_error(py: Python) -> PyErr {