##// END OF EJS Templates
hg-cpython: implement vcsgraph::Graph for our Index...
hg-cpython: implement vcsgraph::Graph for our Index Differential Revision: https://phab.mercurial-scm.org/D11946

File last commit:

r49073:a098543b stable
r49349:8e8737a1 default
Show More
revlog.rs
512 lines | 16.9 KiB | application/rls-services+xml | RustLexer
rust-index: add a function to convert PyObject index for hg-core...
r44398 // revlog.rs
//
Georges Racinet
rust-nodemap: add utils for propagating errors...
r44993 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
rust-index: add a function to convert PyObject index for hg-core...
r44398 //
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 use crate::{
cindex,
utils::{node_from_py_bytes, node_from_py_object},
};
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 use cpython::{
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 buffer::{Element, PyBuffer},
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 exc::{IndexError, ValueError},
revlog: replace revlog._io.size with a new revlog.index.entry_size...
r47736 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 };
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 use hg::{
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 nodemap::{Block, NodeMapError, NodeTree},
Simon Sapin
rust: Remove hex parsing from the nodemap...
r47161 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
Simon Sapin
rust: Simplify error type for reading hex node IDs...
r47156 Revision,
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 };
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 use std::cell::RefCell;
rust-index: add a function to convert PyObject index for hg-core...
r44398
/// Return a Struct implementing the Graph trait
Augie Fackler
revlog: run rustfmt nightly...
r44527 pub(crate) fn pyindex_to_graph(
py: Python,
index: PyObject,
) -> PyResult<cindex::Index> {
rust-index: handle `MixedIndex` in `pyindex_to_graph`...
r44463 match index.extract::<MixedIndex>(py) {
Ok(midx) => Ok(midx.clone_cindex(py)),
Err(_) => cindex::Index::new(py, index),
}
rust-index: add a function to convert PyObject index for hg-core...
r44398 }
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
py_class!(pub class MixedIndex |py| {
data cindex: RefCell<cindex::Index>;
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 data nt: RefCell<Option<NodeTree>>;
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 data docket: RefCell<Option<PyObject>>;
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 // Holds a reference to the mmap'ed persistent nodemap data
data mmap: RefCell<Option<PyBuffer>>;
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
Georges Racinet
rust-index: moved constructor in separate impl block...
r44990 Self::new(py, cindex)
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 }
Georges Racinet
rust-index: expose a method to retrieve the C index...
r44464 /// Compatibility layer used for Python consumers needing access to the C index
///
/// Only use case so far is `scmutil.shortesthexnodeidprefix`,
/// that may need to build a custom `nodetree`, based on a specified revset.
/// With a Rust implementation of the nodemap, we will be able to get rid of
/// this, by exposing our own standalone nodemap class,
/// ready to accept `MixedIndex`.
def get_cindex(&self) -> PyResult<PyObject> {
Ok(self.cindex(py).borrow().inner().clone_ref(py))
}
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 /// Return Revision if found, raises a bare `error.RevlogError`
/// in case of ambiguity, same as C version does
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 let node = node_from_py_bytes(py, &node)?;
nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 }
/// same as `get_rev()` but raises a bare `error.RevlogError` if node
/// is not found.
///
/// No need to repeat `node` in the exception, `mercurial/revlog.py`
/// will catch and rewrap with it
def rev(&self, node: PyBytes) -> PyResult<Revision> {
self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
}
/// return True if the node exist in the index
def has_node(&self, node: PyBytes) -> PyResult<bool> {
self.get_rev(py, node).map(|opt| opt.is_some())
}
/// find length of shortest hex nodeid of a binary ID
def shortest(&self, node: PyBytes) -> PyResult<usize> {
let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
{
Ok(Some(l)) => Ok(l),
Ok(None) => Err(revlog_error(py)),
Err(e) => Err(nodemap_error(py, e)),
}
}
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 let opt = self.get_nodetree(py)?.borrow();
let nt = opt.as_ref().unwrap();
let idx = &*self.cindex(py).borrow();
let node_as_string = if cfg!(feature = "python3-sys") {
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 }
else {
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 let node = node.extract::<PyBytes>(py)?;
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 String::from_utf8_lossy(node.data(py)).to_string()
};
Simon Sapin
rust: Remove hex parsing from the nodemap...
r47161 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
Georges Racinet
rust-nodemap: backed out mitigation for issue 6554...
r49090 nt.find_bin(idx, prefix)
// TODO make an inner API returning the node directly
.map(|opt| opt.map(
|rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
.map_err(|e| nodemap_error(py, e))
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 }
/// append an index entry
def append(&self, tup: PyTuple) -> PyResult<PyObject> {
if tup.len(py) < 8 {
// this is better than the panic promised by tup.get_item()
return Err(
PyErr::new::<IndexError, _>(py, "tuple index out of range"))
}
let node_bytes = tup.get_item(py, 7).extract(py)?;
let node = node_from_py_object(py, &node_bytes)?;
let mut idx = self.cindex(py).borrow_mut();
let rev = idx.len() as Revision;
idx.append(py, tup)?;
self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
.insert(&*idx, &node, rev)
.map_err(|e| nodemap_error(py, e))?;
Ok(py.None())
}
def __delitem__(&self, key: PyObject) -> PyResult<()> {
// __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
self.cindex(py).borrow().inner().del_item(py, key)?;
let mut opt = self.get_nodetree(py)?.borrow_mut();
let mut nt = opt.as_mut().unwrap();
nt.invalidate_all();
self.fill_nodemap(py, &mut nt)?;
Ok(())
}
//
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 // Reforwarded C index API
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 //
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
// index_methods (tp_methods). Same ordering as in revlog.c
/// return the gca set of the given revs
def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "ancestors", args, kw)
}
/// return the heads of the common ancestors of the given revs
def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "commonancestorsheads", args, kw)
}
Georges Racinet
rust-nodemap: also clear Rust data in `clearcaches`...
r44998 /// Clear the index caches and inner py_class data.
/// It is Python's responsibility to call `update_nodemap_data` again.
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
Georges Racinet
rust-nodemap: also clear Rust data in `clearcaches`...
r44998 self.nt(py).borrow_mut().take();
self.docket(py).borrow_mut().take();
self.mmap(py).borrow_mut().take();
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 self.call_cindex(py, "clearcaches", args, kw)
}
revlog: add a `entry_binary` method on index...
r47808 /// return the raw binary string representing a revision
def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "entry_binary", args, kw)
}
revlog: have an explicit "pack_header" method...
r47811 /// return a binary packed version of the header
def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "pack_header", args, kw)
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 /// get an index entry
def get(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "get", args, kw)
}
/// compute phases
def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "computephasesmapsets", args, kw)
}
/// reachableroots
def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "reachableroots2", args, kw)
}
/// get head revisions
def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "headrevs", args, kw)
}
/// get filtered head revisions
def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "headrevsfiltered", args, kw)
}
/// True if the object is a snapshot
def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "issnapshot", args, kw)
}
/// Gather snapshot data in a cache dict
def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "findsnapshots", args, kw)
}
/// determine revisions with deltas to reconstruct fulltext
def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "deltachain", args, kw)
}
/// slice planned chunk read to reach a density threshold
def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "slicechunktodensity", args, kw)
}
/// stats for the index
def stats(&self, *args, **kw) -> PyResult<PyObject> {
self.call_cindex(py, "stats", args, kw)
}
// index_sequence_methods and index_mapping_methods.
//
// Since we call back through the high level Python API,
// there's no point making a distinction between index_get
// and index_getitem.
def __len__(&self) -> PyResult<usize> {
self.cindex(py).borrow().inner().len(py)
}
def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
// this conversion seems needless, but that's actually because
// `index_getitem` does not handle conversion from PyLong,
// which expressions such as [e for e in index] internally use.
// Note that we don't seem to have a direct way to call
Georges Racinet
rust-nodemap: also clear Rust data in `clearcaches`...
r44998 // PySequence_GetItem (does the job), which would possibly be better
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 // for performance
let key = match key.extract::<Revision>(py) {
Ok(rev) => rev.to_py_object(py).into_object(),
Err(_) => key,
};
self.cindex(py).borrow().inner().get_item(py, key)
}
def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
self.cindex(py).borrow().inner().set_item(py, key, value)
}
def __contains__(&self, item: PyObject) -> PyResult<bool> {
// ObjectProtocol does not seem to provide contains(), so
// this is an equivalent implementation of the index_contains()
// defined in revlog.c
let cindex = self.cindex(py).borrow();
match item.extract::<Revision>(py) {
Ok(rev) => {
Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
}
Err(_) => {
cindex.inner().call_method(
py,
"has_node",
PyTuple::new(py, &[item]),
None)?
.extract(py)
}
}
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_all`...
r44995 def nodemap_data_all(&self) -> PyResult<PyBytes> {
self.inner_nodemap_data_all(py)
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
self.inner_nodemap_data_incremental(py)
}
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 def update_nodemap_data(
&self,
docket: PyObject,
nm_data: PyObject
) -> PyResult<PyObject> {
self.inner_update_nodemap_data(py, docket, nm_data)
}
revlog: replace revlog._io.size with a new revlog.index.entry_size...
r47736 @property
def entry_size(&self) -> PyResult<PyInt> {
self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413
revlog: signal which revlog index are compatible with Rust...
r48042 @property
def rust_ext_compat(&self) -> PyResult<PyInt> {
self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 });
impl MixedIndex {
Georges Racinet
rust-index: moved constructor in separate impl block...
r44990 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
Self::create_instance(
py,
RefCell::new(cindex::Index::new(py, cindex)?),
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 RefCell::new(None),
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996 RefCell::new(None),
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997 RefCell::new(None),
Georges Racinet
rust-index: moved constructor in separate impl block...
r44990 )
}
Raphaël Gomès
rust-nodemap: use proper Index API instead of using the C API...
r44994 /// This is scaffolding at this point, but it could also become
/// a way to start a persistent nodemap or perform a
/// vacuum / repack operation
fn fill_nodemap(
&self,
py: Python,
nt: &mut NodeTree,
) -> PyResult<PyObject> {
let index = self.cindex(py).borrow();
for r in 0..index.len() {
let rev = r as Revision;
// in this case node() won't ever return None
nt.insert(&*index, index.node(rev).unwrap(), rev)
.map_err(|e| nodemap_error(py, e))?
}
Ok(py.None())
}
fn get_nodetree<'a>(
&'a self,
py: Python<'a>,
) -> PyResult<&'a RefCell<Option<NodeTree>>> {
if self.nt(py).borrow().is_none() {
let readonly = Box::new(Vec::new());
let mut nt = NodeTree::load_bytes(readonly, 0);
self.fill_nodemap(py, &mut nt)?;
self.nt(py).borrow_mut().replace(nt);
}
Ok(self.nt(py))
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 /// forward a method call to the underlying C index
fn call_cindex(
&self,
py: Python,
name: &str,
args: &PyTuple,
kwargs: Option<&PyDict>,
) -> PyResult<PyObject> {
self.cindex(py)
.borrow()
.inner()
.call_method(py, name, args, kwargs)
}
Georges Racinet
rust-index: make it possible to clone the struct referencing the C index...
r44462
pub fn clone_cindex(&self, py: Python) -> cindex::Index {
self.cindex(py).borrow().clone_ref(py)
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_all`...
r44995
/// Returns the full nodemap bytes to be written as-is to disk
fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
// If there's anything readonly, we need to build the data again from
// scratch
let bytes = if readonly.len() > 0 {
let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
self.fill_nodemap(py, &mut nt)?;
let (readonly, bytes) = nt.into_readonly_and_added_bytes();
assert_eq!(readonly.len(), 0);
bytes
} else {
bytes
};
let bytes = PyBytes::new(py, &bytes);
Ok(bytes)
}
Georges Racinet
rust-nodemap: add binding for `nodemap_data_incremental`...
r44996
/// Returns the last saved docket along with the size of any changed data
/// (in number of blocks), and said data as bytes.
fn inner_nodemap_data_incremental(
&self,
py: Python,
) -> PyResult<PyObject> {
let docket = self.docket(py).borrow();
let docket = match docket.as_ref() {
Some(d) => d,
None => return Ok(py.None()),
};
let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
let masked_blocks = node_tree.masked_readonly_blocks();
let (_, data) = node_tree.into_readonly_and_added_bytes();
let changed = masked_blocks * std::mem::size_of::<Block>();
Ok((docket, changed, PyBytes::new(py, &data))
.to_py_object(py)
.into_object())
}
Georges Racinet
rust-nodemap: add binding to `nodemap_update_data`...
r44997
/// Update the nodemap from the new (mmaped) data.
/// The docket is kept as a reference for later incremental calls.
fn inner_update_nodemap_data(
&self,
py: Python,
docket: PyObject,
nm_data: PyObject,
) -> PyResult<PyObject> {
let buf = PyBuffer::get(py, &nm_data)?;
let len = buf.item_count();
// Build a slice from the mmap'ed buffer data
let cbuf = buf.buf_ptr();
let bytes = if std::mem::size_of::<u8>() == buf.item_size()
&& buf.is_c_contiguous()
&& u8::is_compatible_format(buf.format())
{
unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
} else {
return Err(PyErr::new::<ValueError, _>(
py,
"Nodemap data buffer has an invalid memory representation"
.to_string(),
));
};
// Keep a reference to the mmap'ed buffer, otherwise we get a dangling
// pointer.
self.mmap(py).borrow_mut().replace(buf);
let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
let data_tip =
docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
self.docket(py).borrow_mut().replace(docket.clone_ref(py));
let idx = self.cindex(py).borrow();
let current_tip = idx.len();
for r in (data_tip + 1)..current_tip as Revision {
let rev = r as Revision;
// in this case node() won't ever return None
nt.insert(&*idx, idx.node(rev).unwrap(), rev)
.map_err(|e| nodemap_error(py, e))?
}
*self.nt(py).borrow_mut() = Some(nt);
Ok(py.None())
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 }
Georges Racinet
rust-nodemap: add utils for propagating errors...
r44993 fn revlog_error(py: Python) -> PyErr {
match py
.import("mercurial.error")
.and_then(|m| m.get(py, "RevlogError"))
{
Err(e) => e,
Raphaël Gomès
hg-cpython: fix new occuring TypeError...
r48086 Ok(cls) => PyErr::from_instance(
py,
cls.call(py, (py.None(),), None).ok().into_py_object(py),
),
Georges Racinet
rust-nodemap: add utils for propagating errors...
r44993 }
}
fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
PyErr::new::<ValueError, _>(
py,
format!(
"Inconsistency: Revision {} found in nodemap \
is not in revlog index",
rev
),
)
}
/// Standard treatment of NodeMapError
fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
match err {
NodeMapError::MultipleResults => revlog_error(py),
NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
}
}
Georges Racinet
rust-index: add a struct wrapping the C index...
r44413 /// Create the module, with __package__ given from parent
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let dotted_name = &format!("{}.revlog", package);
let m = PyModule::new(py, dotted_name)?;
m.add(py, "__package__", package)?;
m.add(py, "__doc__", "RevLog - Rust implementations")?;
m.add_class::<MixedIndex>(py)?;
let sys = PyModule::import(py, "sys")?;
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
sys_modules.set_item(py, dotted_name, &m)?;
Ok(m)
}