##// END OF EJS Templates
match: convert O(n) to O(log n) in exactmatcher.visitchildrenset...
match: convert O(n) to O(log n) in exactmatcher.visitchildrenset When using narrow, during rebase this is called (at least) once per directory in the set of files in the commit being rebased. Every time it's called, we did the set arithmetic (now extracted and cached), which was probably pretty cheap but not necessary to repeat each time, looped over every item in the matcher and kept things that started with the directory we were querying. With very large narrowspecs, and a commit that touched a file in a large number of directories, this was slow. In a pathological repo, the rebase of a single commit (that touched over 17k files, I believe in approximately as many directories) with a narrowspec that had >32k entries took 8,246s of profiled time, with 5,007s of that spent in visitchildrenset (transitively). With this change, the time spent in visitchildrenset is less than 34s (which is where my profile cut off). Most of the remaining time was network access due to our custom remotefilelog-based setup not properly prefetching. Differential Revision: https://phab.mercurial-scm.org/D10294

File last commit:

r47337:98a455a6 default
r47634:8bca353b default
Show More
dirstate_map.rs
608 lines | 18.8 KiB | application/rls-services+xml | RustLexer
// dirstate_map.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Bindings for the `hg::dirstate::dirstate_map` file provided by the
//! `hg-core` package.
use std::cell::{Ref, RefCell};
use std::convert::TryInto;
use std::time::Duration;
use cpython::{
exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList,
PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
UnsafePyLeaked,
};
use crate::{
dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
dirstate::non_normal_entries::{
NonNormalEntries, NonNormalEntriesIterator,
},
dirstate::{dirs_multiset::Dirs, make_dirstate_tuple},
parsers::dirstate_parents_to_pytuple,
};
use hg::{
errors::HgError,
revlog::Node,
utils::hg_path::{HgPath, HgPathBuf},
DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
DirstateMapError, DirstateParents, EntryState, StateMapIter,
};
// TODO
// This object needs to share references to multiple members of its Rust
// inner struct, namely `copy_map`, `dirs` and `all_dirs`.
// Right now `CopyMap` is done, but it needs to have an explicit reference
// to `RustDirstateMap` which itself needs to have an encapsulation for
// every method in `CopyMap` (copymapcopy, etc.).
// This is ugly and hard to maintain.
// The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
// `py_class!` is already implemented and does not mention
// `RustDirstateMap`, rightfully so.
// All attributes also have to have a separate refcount data attribute for
// leaks, with all methods that go along for reference sharing.
py_class!(pub class DirstateMap |py| {
@shared data inner: RustDirstateMap;
def __new__(_cls, _root: PyObject) -> PyResult<Self> {
let inner = RustDirstateMap::default();
Self::create_instance(py, inner)
}
def clear(&self) -> PyResult<PyObject> {
self.inner(py).borrow_mut().clear();
Ok(py.None())
}
def get(
&self,
key: PyObject,
default: Option<PyObject> = None
) -> PyResult<Option<PyObject>> {
let key = key.extract::<PyBytes>(py)?;
match self.inner(py).borrow().get(HgPath::new(key.data(py))) {
Some(entry) => {
Ok(Some(make_dirstate_tuple(py, entry)?))
},
None => Ok(default)
}
}
def addfile(
&self,
f: PyObject,
oldstate: PyObject,
state: PyObject,
mode: PyObject,
size: PyObject,
mtime: PyObject
) -> PyResult<PyObject> {
self.inner(py).borrow_mut().add_file(
HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
oldstate.extract::<PyBytes>(py)?.data(py)[0]
.try_into()
.map_err(|e: HgError| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?,
DirstateEntry {
state: state.extract::<PyBytes>(py)?.data(py)[0]
.try_into()
.map_err(|e: HgError| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?,
mode: mode.extract(py)?,
size: size.extract(py)?,
mtime: mtime.extract(py)?,
},
).and(Ok(py.None())).or_else(|e: DirstateMapError| {
Err(PyErr::new::<exc::ValueError, _>(py, e.to_string()))
})
}
def removefile(
&self,
f: PyObject,
oldstate: PyObject,
size: PyObject
) -> PyResult<PyObject> {
self.inner(py).borrow_mut()
.remove_file(
HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
oldstate.extract::<PyBytes>(py)?.data(py)[0]
.try_into()
.map_err(|e: HgError| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?,
size.extract(py)?,
)
.or_else(|_| {
Err(PyErr::new::<exc::OSError, _>(
py,
"Dirstate error".to_string(),
))
})?;
Ok(py.None())
}
def dropfile(
&self,
f: PyObject,
oldstate: PyObject
) -> PyResult<PyBool> {
self.inner(py).borrow_mut()
.drop_file(
HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
oldstate.extract::<PyBytes>(py)?.data(py)[0]
.try_into()
.map_err(|e: HgError| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?,
)
.and_then(|b| Ok(b.to_py_object(py)))
.or_else(|e| {
Err(PyErr::new::<exc::OSError, _>(
py,
format!("Dirstate error: {}", e.to_string()),
))
})
}
def clearambiguoustimes(
&self,
files: PyObject,
now: PyObject
) -> PyResult<PyObject> {
let files: PyResult<Vec<HgPathBuf>> = files
.iter(py)?
.map(|filename| {
Ok(HgPathBuf::from_bytes(
filename?.extract::<PyBytes>(py)?.data(py),
))
})
.collect();
self.inner(py).borrow_mut()
.clear_ambiguous_times(files?, now.extract(py)?);
Ok(py.None())
}
def other_parent_entries(&self) -> PyResult<PyObject> {
let mut inner_shared = self.inner(py).borrow_mut();
let (_, other_parent) =
inner_shared.get_non_normal_other_parent_entries();
let locals = PyDict::new(py);
locals.set_item(
py,
"other_parent",
other_parent
.iter()
.map(|v| PyBytes::new(py, v.as_bytes()))
.collect::<Vec<PyBytes>>()
.to_py_object(py),
)?;
py.eval("set(other_parent)", None, Some(&locals))
}
def non_normal_entries(&self) -> PyResult<NonNormalEntries> {
NonNormalEntries::from_inner(py, self.clone_ref(py))
}
def non_normal_entries_contains(&self, key: PyObject) -> PyResult<bool> {
let key = key.extract::<PyBytes>(py)?;
Ok(self
.inner(py)
.borrow_mut()
.get_non_normal_other_parent_entries().0
.contains(HgPath::new(key.data(py))))
}
def non_normal_entries_display(&self) -> PyResult<PyString> {
Ok(
PyString::new(
py,
&format!(
"NonNormalEntries: {:?}",
self
.inner(py)
.borrow_mut()
.get_non_normal_other_parent_entries().0
.iter().map(|o| o))
)
)
}
def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> {
let key = key.extract::<PyBytes>(py)?;
self
.inner(py)
.borrow_mut()
.non_normal_entries_remove(HgPath::new(key.data(py)));
Ok(py.None())
}
def non_normal_entries_union(&self, other: PyObject) -> PyResult<PyList> {
let other: PyResult<_> = other.iter(py)?
.map(|f| {
Ok(HgPathBuf::from_bytes(
f?.extract::<PyBytes>(py)?.data(py),
))
})
.collect();
let res = self
.inner(py)
.borrow_mut()
.non_normal_entries_union(other?);
let ret = PyList::new(py, &[]);
for filename in res.iter() {
let as_pystring = PyBytes::new(py, filename.as_bytes());
ret.append(py, as_pystring.into_object());
}
Ok(ret)
}
def non_normal_entries_iter(&self) -> PyResult<NonNormalEntriesIterator> {
// Make sure the sets are defined before we no longer have a mutable
// reference to the dmap.
self.inner(py)
.borrow_mut()
.set_non_normal_other_parent_entries(false);
let leaked_ref = self.inner(py).leak_immutable();
NonNormalEntriesIterator::from_inner(py, unsafe {
leaked_ref.map(py, |o| {
o.get_non_normal_other_parent_entries_panic().0.iter()
})
})
}
def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
let d = d.extract::<PyBytes>(py)?;
Ok(self.inner(py).borrow_mut()
.has_tracked_dir(HgPath::new(d.data(py)))
.map_err(|e| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?
.to_py_object(py))
}
def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
let d = d.extract::<PyBytes>(py)?;
Ok(self.inner(py).borrow_mut()
.has_dir(HgPath::new(d.data(py)))
.map_err(|e| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?
.to_py_object(py))
}
def parents(&self, st: PyObject) -> PyResult<PyTuple> {
self.inner(py).borrow_mut()
.parents(st.extract::<PyBytes>(py)?.data(py))
.map(|parents| dirstate_parents_to_pytuple(py, parents))
.or_else(|_| {
Err(PyErr::new::<exc::OSError, _>(
py,
"Dirstate error".to_string(),
))
})
}
def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
let p1 = extract_node_id(py, &p1)?;
let p2 = extract_node_id(py, &p2)?;
self.inner(py).borrow_mut()
.set_parents(&DirstateParents { p1, p2 });
Ok(py.None())
}
def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
match self.inner(py).borrow_mut()
.read(st.extract::<PyBytes>(py)?.data(py))
{
Ok(Some(parents)) => Ok(Some(
dirstate_parents_to_pytuple(py, parents)
.into_object()
)),
Ok(None) => Ok(Some(py.None())),
Err(_) => Err(PyErr::new::<exc::OSError, _>(
py,
"Dirstate error".to_string(),
)),
}
}
def write(
&self,
p1: PyObject,
p2: PyObject,
now: PyObject
) -> PyResult<PyBytes> {
let now = Duration::new(now.extract(py)?, 0);
let parents = DirstateParents {
p1: extract_node_id(py, &p1)?,
p2: extract_node_id(py, &p2)?,
};
match self.inner(py).borrow_mut().pack(parents, now) {
Ok(packed) => Ok(PyBytes::new(py, &packed)),
Err(_) => Err(PyErr::new::<exc::OSError, _>(
py,
"Dirstate error".to_string(),
)),
}
}
def filefoldmapasdict(&self) -> PyResult<PyDict> {
let dict = PyDict::new(py);
for (key, value) in
self.inner(py).borrow_mut().build_file_fold_map().iter()
{
dict.set_item(
py,
PyBytes::new(py, key.as_bytes()).into_object(),
PyBytes::new(py, value.as_bytes()).into_object(),
)?;
}
Ok(dict)
}
def __len__(&self) -> PyResult<usize> {
Ok(self.inner(py).borrow().len())
}
def __contains__(&self, key: PyObject) -> PyResult<bool> {
let key = key.extract::<PyBytes>(py)?;
Ok(self.inner(py).borrow().contains_key(HgPath::new(key.data(py))))
}
def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
let key = key.extract::<PyBytes>(py)?;
let key = HgPath::new(key.data(py));
match self.inner(py).borrow().get(key) {
Some(entry) => {
Ok(make_dirstate_tuple(py, entry)?)
},
None => Err(PyErr::new::<exc::KeyError, _>(
py,
String::from_utf8_lossy(key.as_bytes()),
)),
}
}
def keys(&self) -> PyResult<DirstateMapKeysIterator> {
let leaked_ref = self.inner(py).leak_immutable();
DirstateMapKeysIterator::from_inner(
py,
unsafe { leaked_ref.map(py, |o| o.iter()) },
)
}
def items(&self) -> PyResult<DirstateMapItemsIterator> {
let leaked_ref = self.inner(py).leak_immutable();
DirstateMapItemsIterator::from_inner(
py,
unsafe { leaked_ref.map(py, |o| o.iter()) },
)
}
def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
let leaked_ref = self.inner(py).leak_immutable();
DirstateMapKeysIterator::from_inner(
py,
unsafe { leaked_ref.map(py, |o| o.iter()) },
)
}
def getdirs(&self) -> PyResult<Dirs> {
// TODO don't copy, share the reference
self.inner(py).borrow_mut().set_dirs()
.map_err(|e| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?;
Dirs::from_inner(
py,
DirsMultiset::from_dirstate(
&self.inner(py).borrow(),
Some(EntryState::Removed),
)
.map_err(|e| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?,
)
}
def getalldirs(&self) -> PyResult<Dirs> {
// TODO don't copy, share the reference
self.inner(py).borrow_mut().set_all_dirs()
.map_err(|e| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?;
Dirs::from_inner(
py,
DirsMultiset::from_dirstate(
&self.inner(py).borrow(),
None,
).map_err(|e| {
PyErr::new::<exc::ValueError, _>(py, e.to_string())
})?,
)
}
// TODO all copymap* methods, see docstring above
def copymapcopy(&self) -> PyResult<PyDict> {
let dict = PyDict::new(py);
for (key, value) in self.inner(py).borrow().copy_map.iter() {
dict.set_item(
py,
PyBytes::new(py, key.as_bytes()),
PyBytes::new(py, value.as_bytes()),
)?;
}
Ok(dict)
}
def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
let key = key.extract::<PyBytes>(py)?;
match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) {
Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
None => Err(PyErr::new::<exc::KeyError, _>(
py,
String::from_utf8_lossy(key.data(py)),
)),
}
}
def copymap(&self) -> PyResult<CopyMap> {
CopyMap::from_inner(py, self.clone_ref(py))
}
def copymaplen(&self) -> PyResult<usize> {
Ok(self.inner(py).borrow().copy_map.len())
}
def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
let key = key.extract::<PyBytes>(py)?;
Ok(self
.inner(py)
.borrow()
.copy_map
.contains_key(HgPath::new(key.data(py))))
}
def copymapget(
&self,
key: PyObject,
default: Option<PyObject>
) -> PyResult<Option<PyObject>> {
let key = key.extract::<PyBytes>(py)?;
match self
.inner(py)
.borrow()
.copy_map
.get(HgPath::new(key.data(py)))
{
Some(copy) => Ok(Some(
PyBytes::new(py, copy.as_bytes()).into_object(),
)),
None => Ok(default),
}
}
def copymapsetitem(
&self,
key: PyObject,
value: PyObject
) -> PyResult<PyObject> {
let key = key.extract::<PyBytes>(py)?;
let value = value.extract::<PyBytes>(py)?;
self.inner(py).borrow_mut().copy_map.insert(
HgPathBuf::from_bytes(key.data(py)),
HgPathBuf::from_bytes(value.data(py)),
);
Ok(py.None())
}
def copymappop(
&self,
key: PyObject,
default: Option<PyObject>
) -> PyResult<Option<PyObject>> {
let key = key.extract::<PyBytes>(py)?;
match self
.inner(py)
.borrow_mut()
.copy_map
.remove(HgPath::new(key.data(py)))
{
Some(_) => Ok(None),
None => Ok(default),
}
}
def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
let leaked_ref = self.inner(py).leak_immutable();
CopyMapKeysIterator::from_inner(
py,
unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
)
}
def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
let leaked_ref = self.inner(py).leak_immutable();
CopyMapItemsIterator::from_inner(
py,
unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
)
}
});
impl DirstateMap {
pub fn get_inner<'a>(
&'a self,
py: Python<'a>,
) -> Ref<'a, RustDirstateMap> {
self.inner(py).borrow()
}
#[cfg(not(feature = "dirstate-tree"))]
fn translate_key(
py: Python,
res: (&HgPathBuf, &DirstateEntry),
) -> PyResult<Option<PyBytes>> {
Ok(Some(PyBytes::new(py, res.0.as_bytes())))
}
#[cfg(not(feature = "dirstate-tree"))]
fn translate_key_value(
py: Python,
res: (&HgPathBuf, &DirstateEntry),
) -> PyResult<Option<(PyBytes, PyObject)>> {
let (f, entry) = res;
Ok(Some((
PyBytes::new(py, f.as_bytes()),
make_dirstate_tuple(py, &entry)?,
)))
}
#[cfg(feature = "dirstate-tree")]
fn translate_key(
py: Python,
res: (HgPathBuf, DirstateEntry),
) -> PyResult<Option<PyBytes>> {
Ok(Some(PyBytes::new(py, res.0.as_bytes())))
}
#[cfg(feature = "dirstate-tree")]
fn translate_key_value(
py: Python,
res: (HgPathBuf, DirstateEntry),
) -> PyResult<Option<(PyBytes, PyObject)>> {
let (f, entry) = res;
Ok(Some((
PyBytes::new(py, f.as_bytes()),
make_dirstate_tuple(py, &entry)?,
)))
}
}
py_shared_iterator!(
DirstateMapKeysIterator,
UnsafePyLeaked<StateMapIter<'static>>,
DirstateMap::translate_key,
Option<PyBytes>
);
py_shared_iterator!(
DirstateMapItemsIterator,
UnsafePyLeaked<StateMapIter<'static>>,
DirstateMap::translate_key_value,
Option<(PyBytes, PyObject)>
);
fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<Node> {
let bytes = obj.extract::<PyBytes>(py)?;
match bytes.data(py).try_into() {
Ok(s) => Ok(s),
Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
}
}