revlog.rs
512 lines
| 16.9 KiB
| application/rls-services+xml
|
RustLexer
r44398 | // revlog.rs | |||
// | ||||
Georges Racinet
|
r44993 | // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> | ||
r44398 | // | |||
// This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | ||||
Raphaël Gomès
|
r44994 | use crate::{ | ||
cindex, | ||||
utils::{node_from_py_bytes, node_from_py_object}, | ||||
}; | ||||
Georges Racinet
|
r44413 | use cpython::{ | ||
Georges Racinet
|
r44997 | buffer::{Element, PyBuffer}, | ||
Raphaël Gomès
|
r44994 | exc::{IndexError, ValueError}, | ||
r47736 | ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule, | |||
PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, | ||||
Georges Racinet
|
r44413 | }; | ||
Raphaël Gomès
|
r44994 | use hg::{ | ||
Georges Racinet
|
r44996 | nodemap::{Block, NodeMapError, NodeTree}, | ||
Simon Sapin
|
r47161 | revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, | ||
Simon Sapin
|
r47156 | Revision, | ||
Raphaël Gomès
|
r44994 | }; | ||
Georges Racinet
|
r44413 | use std::cell::RefCell; | ||
r44398 | ||||
/// Return a Struct implementing the Graph trait | ||||
Augie Fackler
|
r44527 | pub(crate) fn pyindex_to_graph( | ||
py: Python, | ||||
index: PyObject, | ||||
) -> PyResult<cindex::Index> { | ||||
r44463 | match index.extract::<MixedIndex>(py) { | |||
Ok(midx) => Ok(midx.clone_cindex(py)), | ||||
Err(_) => cindex::Index::new(py, index), | ||||
} | ||||
r44398 | } | |||
Georges Racinet
|
r44413 | |||
py_class!(pub class MixedIndex |py| { | ||||
data cindex: RefCell<cindex::Index>; | ||||
Raphaël Gomès
|
r44994 | data nt: RefCell<Option<NodeTree>>; | ||
Georges Racinet
|
r44996 | data docket: RefCell<Option<PyObject>>; | ||
Georges Racinet
|
r44997 | // Holds a reference to the mmap'ed persistent nodemap data | ||
data mmap: RefCell<Option<PyBuffer>>; | ||||
Georges Racinet
|
r44413 | |||
def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> { | ||||
Georges Racinet
|
r44990 | Self::new(py, cindex) | ||
Georges Racinet
|
r44413 | } | ||
Georges Racinet
|
r44464 | /// Compatibility layer used for Python consumers needing access to the C index | ||
/// | ||||
/// Only use case so far is `scmutil.shortesthexnodeidprefix`, | ||||
/// that may need to build a custom `nodetree`, based on a specified revset. | ||||
/// With a Rust implementation of the nodemap, we will be able to get rid of | ||||
/// this, by exposing our own standalone nodemap class, | ||||
/// ready to accept `MixedIndex`. | ||||
def get_cindex(&self) -> PyResult<PyObject> { | ||||
Ok(self.cindex(py).borrow().inner().clone_ref(py)) | ||||
} | ||||
Raphaël Gomès
|
r44994 | // Index API involving nodemap, as defined in mercurial/pure/parsers.py | ||
Georges Racinet
|
r44413 | |||
Raphaël Gomès
|
r44994 | /// Return Revision if found, raises a bare `error.RevlogError` | ||
/// in case of ambiguity, same as C version does | ||||
Georges Racinet
|
r49090 | def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> { | ||
Raphaël Gomès
|
r44994 | let opt = self.get_nodetree(py)?.borrow(); | ||
let nt = opt.as_ref().unwrap(); | ||||
let idx = &*self.cindex(py).borrow(); | ||||
Georges Racinet
|
r49090 | let node = node_from_py_bytes(py, &node)?; | ||
nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e)) | ||||
Raphaël Gomès
|
r44994 | } | ||
/// same as `get_rev()` but raises a bare `error.RevlogError` if node | ||||
/// is not found. | ||||
/// | ||||
/// No need to repeat `node` in the exception, `mercurial/revlog.py` | ||||
/// will catch and rewrap with it | ||||
def rev(&self, node: PyBytes) -> PyResult<Revision> { | ||||
self.get_rev(py, node)?.ok_or_else(|| revlog_error(py)) | ||||
} | ||||
/// return True if the node exist in the index | ||||
def has_node(&self, node: PyBytes) -> PyResult<bool> { | ||||
self.get_rev(py, node).map(|opt| opt.is_some()) | ||||
} | ||||
/// find length of shortest hex nodeid of a binary ID | ||||
def shortest(&self, node: PyBytes) -> PyResult<usize> { | ||||
let opt = self.get_nodetree(py)?.borrow(); | ||||
let nt = opt.as_ref().unwrap(); | ||||
let idx = &*self.cindex(py).borrow(); | ||||
match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) | ||||
{ | ||||
Ok(Some(l)) => Ok(l), | ||||
Ok(None) => Err(revlog_error(py)), | ||||
Err(e) => Err(nodemap_error(py, e)), | ||||
} | ||||
} | ||||
Georges Racinet
|
r49090 | def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> { | ||
Raphaël Gomès
|
r44994 | let opt = self.get_nodetree(py)?.borrow(); | ||
let nt = opt.as_ref().unwrap(); | ||||
let idx = &*self.cindex(py).borrow(); | ||||
let node_as_string = if cfg!(feature = "python3-sys") { | ||||
Georges Racinet
|
r49090 | node.cast_as::<PyString>(py)?.to_string(py)?.to_string() | ||
Raphaël Gomès
|
r44994 | } | ||
else { | ||||
Georges Racinet
|
r49090 | let node = node.extract::<PyBytes>(py)?; | ||
Raphaël Gomès
|
r44994 | String::from_utf8_lossy(node.data(py)).to_string() | ||
}; | ||||
Simon Sapin
|
r47161 | let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?; | ||
Georges Racinet
|
r49090 | nt.find_bin(idx, prefix) | ||
// TODO make an inner API returning the node directly | ||||
.map(|opt| opt.map( | ||||
|rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))) | ||||
.map_err(|e| nodemap_error(py, e)) | ||||
Raphaël Gomès
|
r44994 | } | ||
/// append an index entry | ||||
def append(&self, tup: PyTuple) -> PyResult<PyObject> { | ||||
if tup.len(py) < 8 { | ||||
// this is better than the panic promised by tup.get_item() | ||||
return Err( | ||||
PyErr::new::<IndexError, _>(py, "tuple index out of range")) | ||||
} | ||||
let node_bytes = tup.get_item(py, 7).extract(py)?; | ||||
let node = node_from_py_object(py, &node_bytes)?; | ||||
let mut idx = self.cindex(py).borrow_mut(); | ||||
let rev = idx.len() as Revision; | ||||
idx.append(py, tup)?; | ||||
self.get_nodetree(py)?.borrow_mut().as_mut().unwrap() | ||||
.insert(&*idx, &node, rev) | ||||
.map_err(|e| nodemap_error(py, e))?; | ||||
Ok(py.None()) | ||||
} | ||||
def __delitem__(&self, key: PyObject) -> PyResult<()> { | ||||
// __delitem__ is both for `del idx[r]` and `del idx[r1:r2]` | ||||
self.cindex(py).borrow().inner().del_item(py, key)?; | ||||
let mut opt = self.get_nodetree(py)?.borrow_mut(); | ||||
let mut nt = opt.as_mut().unwrap(); | ||||
nt.invalidate_all(); | ||||
self.fill_nodemap(py, &mut nt)?; | ||||
Ok(()) | ||||
} | ||||
// | ||||
Georges Racinet
|
r44413 | // Reforwarded C index API | ||
Raphaël Gomès
|
r44994 | // | ||
Georges Racinet
|
r44413 | |||
// index_methods (tp_methods). Same ordering as in revlog.c | ||||
/// return the gca set of the given revs | ||||
def ancestors(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "ancestors", args, kw) | ||||
} | ||||
/// return the heads of the common ancestors of the given revs | ||||
def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "commonancestorsheads", args, kw) | ||||
} | ||||
Georges Racinet
|
r44998 | /// Clear the index caches and inner py_class data. | ||
/// It is Python's responsibility to call `update_nodemap_data` again. | ||||
Georges Racinet
|
r44413 | def clearcaches(&self, *args, **kw) -> PyResult<PyObject> { | ||
Georges Racinet
|
r44998 | self.nt(py).borrow_mut().take(); | ||
self.docket(py).borrow_mut().take(); | ||||
self.mmap(py).borrow_mut().take(); | ||||
Georges Racinet
|
r44413 | self.call_cindex(py, "clearcaches", args, kw) | ||
} | ||||
r47808 | /// return the raw binary string representing a revision | |||
def entry_binary(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "entry_binary", args, kw) | ||||
} | ||||
r47811 | /// return a binary packed version of the header | |||
def pack_header(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "pack_header", args, kw) | ||||
} | ||||
Georges Racinet
|
r44413 | /// get an index entry | ||
def get(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "get", args, kw) | ||||
} | ||||
/// compute phases | ||||
def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "computephasesmapsets", args, kw) | ||||
} | ||||
/// reachableroots | ||||
def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "reachableroots2", args, kw) | ||||
} | ||||
/// get head revisions | ||||
def headrevs(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "headrevs", args, kw) | ||||
} | ||||
/// get filtered head revisions | ||||
def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "headrevsfiltered", args, kw) | ||||
} | ||||
/// True if the object is a snapshot | ||||
def issnapshot(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "issnapshot", args, kw) | ||||
} | ||||
/// Gather snapshot data in a cache dict | ||||
def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "findsnapshots", args, kw) | ||||
} | ||||
/// determine revisions with deltas to reconstruct fulltext | ||||
def deltachain(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "deltachain", args, kw) | ||||
} | ||||
/// slice planned chunk read to reach a density threshold | ||||
def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "slicechunktodensity", args, kw) | ||||
} | ||||
/// stats for the index | ||||
def stats(&self, *args, **kw) -> PyResult<PyObject> { | ||||
self.call_cindex(py, "stats", args, kw) | ||||
} | ||||
// index_sequence_methods and index_mapping_methods. | ||||
// | ||||
// Since we call back through the high level Python API, | ||||
// there's no point making a distinction between index_get | ||||
// and index_getitem. | ||||
def __len__(&self) -> PyResult<usize> { | ||||
self.cindex(py).borrow().inner().len(py) | ||||
} | ||||
def __getitem__(&self, key: PyObject) -> PyResult<PyObject> { | ||||
// this conversion seems needless, but that's actually because | ||||
// `index_getitem` does not handle conversion from PyLong, | ||||
// which expressions such as [e for e in index] internally use. | ||||
// Note that we don't seem to have a direct way to call | ||||
Georges Racinet
|
r44998 | // PySequence_GetItem (does the job), which would possibly be better | ||
Georges Racinet
|
r44413 | // for performance | ||
let key = match key.extract::<Revision>(py) { | ||||
Ok(rev) => rev.to_py_object(py).into_object(), | ||||
Err(_) => key, | ||||
}; | ||||
self.cindex(py).borrow().inner().get_item(py, key) | ||||
} | ||||
def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> { | ||||
self.cindex(py).borrow().inner().set_item(py, key, value) | ||||
} | ||||
def __contains__(&self, item: PyObject) -> PyResult<bool> { | ||||
// ObjectProtocol does not seem to provide contains(), so | ||||
// this is an equivalent implementation of the index_contains() | ||||
// defined in revlog.c | ||||
let cindex = self.cindex(py).borrow(); | ||||
match item.extract::<Revision>(py) { | ||||
Ok(rev) => { | ||||
Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision) | ||||
} | ||||
Err(_) => { | ||||
cindex.inner().call_method( | ||||
py, | ||||
"has_node", | ||||
PyTuple::new(py, &[item]), | ||||
None)? | ||||
.extract(py) | ||||
} | ||||
} | ||||
} | ||||
Georges Racinet
|
r44995 | def nodemap_data_all(&self) -> PyResult<PyBytes> { | ||
self.inner_nodemap_data_all(py) | ||||
} | ||||
Georges Racinet
|
r44996 | def nodemap_data_incremental(&self) -> PyResult<PyObject> { | ||
self.inner_nodemap_data_incremental(py) | ||||
} | ||||
Georges Racinet
|
r44997 | def update_nodemap_data( | ||
&self, | ||||
docket: PyObject, | ||||
nm_data: PyObject | ||||
) -> PyResult<PyObject> { | ||||
self.inner_update_nodemap_data(py, docket, nm_data) | ||||
} | ||||
r47736 | @property | |||
def entry_size(&self) -> PyResult<PyInt> { | ||||
self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py) | ||||
} | ||||
Georges Racinet
|
r44413 | |||
r48042 | @property | |||
def rust_ext_compat(&self) -> PyResult<PyInt> { | ||||
self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py) | ||||
} | ||||
Georges Racinet
|
r44413 | }); | ||
impl MixedIndex { | ||||
Georges Racinet
|
r44990 | fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> { | ||
Self::create_instance( | ||||
py, | ||||
RefCell::new(cindex::Index::new(py, cindex)?), | ||||
Raphaël Gomès
|
r44994 | RefCell::new(None), | ||
Georges Racinet
|
r44996 | RefCell::new(None), | ||
Georges Racinet
|
r44997 | RefCell::new(None), | ||
Georges Racinet
|
r44990 | ) | ||
} | ||||
Raphaël Gomès
|
r44994 | /// This is scaffolding at this point, but it could also become | ||
/// a way to start a persistent nodemap or perform a | ||||
/// vacuum / repack operation | ||||
fn fill_nodemap( | ||||
&self, | ||||
py: Python, | ||||
nt: &mut NodeTree, | ||||
) -> PyResult<PyObject> { | ||||
let index = self.cindex(py).borrow(); | ||||
for r in 0..index.len() { | ||||
let rev = r as Revision; | ||||
// in this case node() won't ever return None | ||||
nt.insert(&*index, index.node(rev).unwrap(), rev) | ||||
.map_err(|e| nodemap_error(py, e))? | ||||
} | ||||
Ok(py.None()) | ||||
} | ||||
fn get_nodetree<'a>( | ||||
&'a self, | ||||
py: Python<'a>, | ||||
) -> PyResult<&'a RefCell<Option<NodeTree>>> { | ||||
if self.nt(py).borrow().is_none() { | ||||
let readonly = Box::new(Vec::new()); | ||||
let mut nt = NodeTree::load_bytes(readonly, 0); | ||||
self.fill_nodemap(py, &mut nt)?; | ||||
self.nt(py).borrow_mut().replace(nt); | ||||
} | ||||
Ok(self.nt(py)) | ||||
} | ||||
Georges Racinet
|
r44413 | /// forward a method call to the underlying C index | ||
fn call_cindex( | ||||
&self, | ||||
py: Python, | ||||
name: &str, | ||||
args: &PyTuple, | ||||
kwargs: Option<&PyDict>, | ||||
) -> PyResult<PyObject> { | ||||
self.cindex(py) | ||||
.borrow() | ||||
.inner() | ||||
.call_method(py, name, args, kwargs) | ||||
} | ||||
Georges Racinet
|
r44462 | |||
pub fn clone_cindex(&self, py: Python) -> cindex::Index { | ||||
self.cindex(py).borrow().clone_ref(py) | ||||
} | ||||
Georges Racinet
|
r44995 | |||
/// Returns the full nodemap bytes to be written as-is to disk | ||||
fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> { | ||||
let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap(); | ||||
let (readonly, bytes) = nodemap.into_readonly_and_added_bytes(); | ||||
// If there's anything readonly, we need to build the data again from | ||||
// scratch | ||||
let bytes = if readonly.len() > 0 { | ||||
let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0); | ||||
self.fill_nodemap(py, &mut nt)?; | ||||
let (readonly, bytes) = nt.into_readonly_and_added_bytes(); | ||||
assert_eq!(readonly.len(), 0); | ||||
bytes | ||||
} else { | ||||
bytes | ||||
}; | ||||
let bytes = PyBytes::new(py, &bytes); | ||||
Ok(bytes) | ||||
} | ||||
Georges Racinet
|
r44996 | |||
/// Returns the last saved docket along with the size of any changed data | ||||
/// (in number of blocks), and said data as bytes. | ||||
fn inner_nodemap_data_incremental( | ||||
&self, | ||||
py: Python, | ||||
) -> PyResult<PyObject> { | ||||
let docket = self.docket(py).borrow(); | ||||
let docket = match docket.as_ref() { | ||||
Some(d) => d, | ||||
None => return Ok(py.None()), | ||||
}; | ||||
let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap(); | ||||
let masked_blocks = node_tree.masked_readonly_blocks(); | ||||
let (_, data) = node_tree.into_readonly_and_added_bytes(); | ||||
let changed = masked_blocks * std::mem::size_of::<Block>(); | ||||
Ok((docket, changed, PyBytes::new(py, &data)) | ||||
.to_py_object(py) | ||||
.into_object()) | ||||
} | ||||
Georges Racinet
|
r44997 | |||
/// Update the nodemap from the new (mmaped) data. | ||||
/// The docket is kept as a reference for later incremental calls. | ||||
fn inner_update_nodemap_data( | ||||
&self, | ||||
py: Python, | ||||
docket: PyObject, | ||||
nm_data: PyObject, | ||||
) -> PyResult<PyObject> { | ||||
let buf = PyBuffer::get(py, &nm_data)?; | ||||
let len = buf.item_count(); | ||||
// Build a slice from the mmap'ed buffer data | ||||
let cbuf = buf.buf_ptr(); | ||||
let bytes = if std::mem::size_of::<u8>() == buf.item_size() | ||||
&& buf.is_c_contiguous() | ||||
&& u8::is_compatible_format(buf.format()) | ||||
{ | ||||
unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) } | ||||
} else { | ||||
return Err(PyErr::new::<ValueError, _>( | ||||
py, | ||||
"Nodemap data buffer has an invalid memory representation" | ||||
.to_string(), | ||||
)); | ||||
}; | ||||
// Keep a reference to the mmap'ed buffer, otherwise we get a dangling | ||||
// pointer. | ||||
self.mmap(py).borrow_mut().replace(buf); | ||||
let mut nt = NodeTree::load_bytes(Box::new(bytes), len); | ||||
let data_tip = | ||||
docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?; | ||||
self.docket(py).borrow_mut().replace(docket.clone_ref(py)); | ||||
let idx = self.cindex(py).borrow(); | ||||
let current_tip = idx.len(); | ||||
for r in (data_tip + 1)..current_tip as Revision { | ||||
let rev = r as Revision; | ||||
// in this case node() won't ever return None | ||||
nt.insert(&*idx, idx.node(rev).unwrap(), rev) | ||||
.map_err(|e| nodemap_error(py, e))? | ||||
} | ||||
*self.nt(py).borrow_mut() = Some(nt); | ||||
Ok(py.None()) | ||||
} | ||||
Georges Racinet
|
r44413 | } | ||
Georges Racinet
|
r44993 | fn revlog_error(py: Python) -> PyErr { | ||
match py | ||||
.import("mercurial.error") | ||||
.and_then(|m| m.get(py, "RevlogError")) | ||||
{ | ||||
Err(e) => e, | ||||
Raphaël Gomès
|
r48086 | Ok(cls) => PyErr::from_instance( | ||
py, | ||||
cls.call(py, (py.None(),), None).ok().into_py_object(py), | ||||
), | ||||
Georges Racinet
|
r44993 | } | ||
} | ||||
fn rev_not_in_index(py: Python, rev: Revision) -> PyErr { | ||||
PyErr::new::<ValueError, _>( | ||||
py, | ||||
format!( | ||||
"Inconsistency: Revision {} found in nodemap \ | ||||
is not in revlog index", | ||||
rev | ||||
), | ||||
) | ||||
} | ||||
/// Standard treatment of NodeMapError | ||||
fn nodemap_error(py: Python, err: NodeMapError) -> PyErr { | ||||
match err { | ||||
NodeMapError::MultipleResults => revlog_error(py), | ||||
NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r), | ||||
} | ||||
} | ||||
Georges Racinet
|
r44413 | /// Create the module, with __package__ given from parent | ||
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> { | ||||
let dotted_name = &format!("{}.revlog", package); | ||||
let m = PyModule::new(py, dotted_name)?; | ||||
m.add(py, "__package__", package)?; | ||||
m.add(py, "__doc__", "RevLog - Rust implementations")?; | ||||
m.add_class::<MixedIndex>(py)?; | ||||
let sys = PyModule::import(py, "sys")?; | ||||
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; | ||||
sys_modules.set_item(py, dotted_name, &m)?; | ||||
Ok(m) | ||||
} | ||||