##// END OF EJS Templates
copies: do full filtering at end of _changesetforwardcopies()...
copies: do full filtering at end of _changesetforwardcopies() As mentioned earlier, pathcopies() is very slow when copies are stored in the changeset. Most of the cost comes from calling _chain() for every changeset, which is slow because it needs to read manifests. It needs to read manifests to be able to filter out copies that are were created in one commit and then deleted. (It also filters out copies that were created from a file that didn't exist in the starting revision, but that's a fixed revision across calls to _chain(), so it's much cheaper.) This patch changes from _chainandfilter() to just _chain() in the main loop in _changesetforwardcopies(). It instead removes copies that have subsequently been removed by using ctx.filesremoved(). We thus rely on that to be fast. It timed this command in mozilla-unified: hg debugpathcopies FIREFOX_59_0b3_BUILD2 FIREFOX_BETA_59_END It took 18s before and 1.1s after. It's still faster when copy information is stored in filelogs: 0.70s. It also still gets slow when there are merge commits involved, because we read manifests there too. We'll deal with that later. Differential Revision: https://phab.mercurial-scm.org/D6419

File last commit:

r42600:4e4fa3a9 default
r42685:4c39c99d default
Show More
dirstate.rs
227 lines | 7.1 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-dirstate: add rust-cpython bindings to the new parse/pack functions...
r42489 // dirstate.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Bindings for the `hg::dirstate` module provided by the
//! `hg-core` package.
//!
//! From Python, this will be seen as `mercurial.rustext.dirstate`
use cpython::{
exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
Georges Racinet
rust-python3: compatibility fix for incoming PyLong...
r42548 PySequence, PythonObject, PyTuple, Python, ToPyObject,
Raphaël Gomès
rust-dirstate: add rust-cpython bindings to the new parse/pack functions...
r42489 };
use hg::{
pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry,
DirstatePackError, DirstateParents, DirstateParseError, DirstateVec,
};
use std::collections::HashMap;
use std::ffi::CStr;
#[cfg(feature = "python27")]
extern crate python27_sys as python_sys;
#[cfg(feature = "python3")]
extern crate python3_sys as python_sys;
use self::python_sys::PyCapsule_Import;
use libc::{c_char, c_int};
use std::mem::transmute;
/// C code uses a custom `dirstate_tuple` type, checks in multiple instances
/// for this type, and raises a Python `Exception` if the check does not pass.
/// Because this type differs only in name from the regular Python tuple, it
/// would be a good idea in the near future to remove it entirely to allow
/// for a pure Python tuple of the same effective structure to be used,
/// rendering this type and the capsule below useless.
type MakeDirstateTupleFn = extern "C" fn(
state: c_char,
mode: c_int,
size: c_int,
mtime: c_int,
) -> PyObject;
/// This is largely a copy/paste from cindex.rs, pending the merge of a
/// `py_capsule_fn!` macro in the rust-cpython project:
/// https://github.com/dgrunwald/rust-cpython/pull/169
fn decapsule_make_dirstate_tuple(py: Python) -> PyResult<MakeDirstateTupleFn> {
unsafe {
let caps_name = CStr::from_bytes_with_nul_unchecked(
b"mercurial.cext.parsers.make_dirstate_tuple_CAPI\0",
);
let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0);
if from_caps.is_null() {
return Err(PyErr::fetch(py));
}
Ok(transmute(from_caps))
}
}
fn parse_dirstate_wrapper(
py: Python,
dmap: PyDict,
copymap: PyDict,
st: PyBytes,
) -> PyResult<PyTuple> {
match parse_dirstate(st.data(py)) {
Ok((parents, dirstate_vec, copies)) => {
for (filename, entry) in dirstate_vec {
dmap.set_item(
py,
PyBytes::new(py, &filename[..]),
decapsule_make_dirstate_tuple(py)?(
Georges Racinet
rust-dirstate: architecture independence fix...
r42600 entry.state as c_char,
Raphaël Gomès
rust-dirstate: add rust-cpython bindings to the new parse/pack functions...
r42489 entry.mode,
entry.size,
entry.mtime,
),
)?;
}
for CopyVecEntry { path, copy_path } in copies {
copymap.set_item(
py,
PyBytes::new(py, path),
PyBytes::new(py, copy_path),
)?;
}
Ok((PyBytes::new(py, parents.p1), PyBytes::new(py, parents.p2))
.to_py_object(py))
}
Err(e) => Err(PyErr::new::<exc::ValueError, _>(
py,
match e {
DirstateParseError::TooLittleData => {
"too little data for parents".to_string()
}
DirstateParseError::Overflow => {
"overflow in dirstate".to_string()
}
DirstateParseError::CorruptedEntry(e) => e,
},
)),
}
}
fn pack_dirstate_wrapper(
py: Python,
dmap: PyDict,
copymap: PyDict,
pl: PyTuple,
now: PyInt,
) -> PyResult<PyBytes> {
let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
let p1: &[u8] = p1.data(py);
let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
let p2: &[u8] = p2.data(py);
let dirstate_vec: Result<DirstateVec, PyErr> = dmap
.items(py)
.iter()
.map(|(filename, stats)| {
let stats = stats.extract::<PySequence>(py)?;
let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
let state = state.data(py)[0] as i8;
let mode = stats.get_item(py, 1)?.extract(py)?;
let size = stats.get_item(py, 2)?.extract(py)?;
let mtime = stats.get_item(py, 3)?.extract(py)?;
let filename = filename.extract::<PyBytes>(py)?;
let filename = filename.data(py);
Ok((
filename.to_owned(),
DirstateEntry {
state,
mode,
size,
mtime,
},
))
})
.collect();
let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
.items(py)
.iter()
.map(|(key, value)| {
Ok((
key.extract::<PyBytes>(py)?.data(py).to_owned(),
value.extract::<PyBytes>(py)?.data(py).to_owned(),
))
})
.collect();
match pack_dirstate(
&dirstate_vec?,
&copies?,
DirstateParents { p1, p2 },
Georges Racinet
rust-python3: compatibility fix for incoming PyLong...
r42548 now.as_object().extract::<i32>(py)?,
Raphaël Gomès
rust-dirstate: add rust-cpython bindings to the new parse/pack functions...
r42489 ) {
Ok((packed, new_dirstate_vec)) => {
for (
filename,
DirstateEntry {
state,
mode,
size,
mtime,
},
) in new_dirstate_vec
{
dmap.set_item(
py,
PyBytes::new(py, &filename[..]),
decapsule_make_dirstate_tuple(py)?(
Georges Racinet
rust-dirstate: architecture independence fix...
r42600 state as c_char, mode, size, mtime,
Raphaël Gomès
rust-dirstate: add rust-cpython bindings to the new parse/pack functions...
r42489 ),
)?;
}
Ok(PyBytes::new(py, &packed))
}
Err(error) => Err(PyErr::new::<exc::ValueError, _>(
py,
match error {
DirstatePackError::CorruptedParent => {
"expected a 20-byte hash".to_string()
}
DirstatePackError::CorruptedEntry(e) => e,
DirstatePackError::BadSize(expected, actual) => {
format!("bad dirstate size: {} != {}", actual, expected)
}
},
)),
}
}
/// Create the module, with `__package__` given from parent
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let dotted_name = &format!("{}.dirstate", package);
let m = PyModule::new(py, dotted_name)?;
m.add(py, "__package__", package)?;
m.add(py, "__doc__", "Dirstate - Rust implementation")?;
m.add(
py,
"parse_dirstate",
py_fn!(
py,
parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
),
)?;
m.add(
py,
"pack_dirstate",
py_fn!(
py,
pack_dirstate_wrapper(
dmap: PyDict,
copymap: PyDict,
pl: PyTuple,
now: PyInt
)
),
)?;
let sys = PyModule::import(py, "sys")?;
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
sys_modules.set_item(py, dotted_name, &m)?;
Ok(m)
}