##// END OF EJS Templates
copies: do full filtering at end of _changesetforwardcopies()...
copies: do full filtering at end of _changesetforwardcopies() As mentioned earlier, pathcopies() is very slow when copies are stored in the changeset. Most of the cost comes from calling _chain() for every changeset, which is slow because it needs to read manifests. It needs to read manifests to be able to filter out copies that are were created in one commit and then deleted. (It also filters out copies that were created from a file that didn't exist in the starting revision, but that's a fixed revision across calls to _chain(), so it's much cheaper.) This patch changes from _chainandfilter() to just _chain() in the main loop in _changesetforwardcopies(). It instead removes copies that have subsequently been removed by using ctx.filesremoved(). We thus rely on that to be fast. It timed this command in mozilla-unified: hg debugpathcopies FIREFOX_59_0b3_BUILD2 FIREFOX_BETA_59_END It took 18s before and 1.1s after. It's still faster when copy information is stored in filelogs: 0.70s. It also still gets slow when there are merge commits involved, because we read manifests there too. We'll deal with that later. Differential Revision: https://phab.mercurial-scm.org/D6419

File last commit:

r35650:fa9747e7 default
r42685:4c39c99d default
Show More
main.rs
233 lines | 7.6 KiB | application/rls-services+xml | RustLexer
Gregory Szorc
rust: implementation of `hg`...
r35587 // main.rs -- Main routines for `hg` program
//
// Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
extern crate libc;
extern crate cpython;
extern crate python27_sys;
use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python};
use libc::{c_char, c_int};
use std::env;
use std::path::PathBuf;
Yuya Nishihara
rust: extract function to convert Path to platform CString...
r35649 use std::ffi::{CString, OsStr};
Gregory Szorc
rust: implementation of `hg`...
r35587 #[cfg(target_family = "unix")]
Yuya Nishihara
rust: convert Unix path to CString transparently...
r35650 use std::os::unix::ffi::{OsStrExt, OsStringExt};
Gregory Szorc
rust: implementation of `hg`...
r35587
#[derive(Debug)]
struct Environment {
_exe: PathBuf,
python_exe: PathBuf,
python_home: PathBuf,
mercurial_modules: PathBuf,
}
/// Run Mercurial locally from a source distribution or checkout.
///
/// hg is <srcdir>/rust/target/<target>/hg
/// Python interpreter is detected by build script.
/// Python home is relative to Python interpreter.
/// Mercurial files are relative to hg binary, which is relative to source root.
#[cfg(feature = "localdev")]
fn get_environment() -> Environment {
let exe = env::current_exe().unwrap();
let mut mercurial_modules = exe.clone();
mercurial_modules.pop(); // /rust/target/<target>
mercurial_modules.pop(); // /rust/target
mercurial_modules.pop(); // /rust
mercurial_modules.pop(); // /
let python_exe: &'static str = env!("PYTHON_INTERPRETER");
let python_exe = PathBuf::from(python_exe);
let mut python_home = python_exe.clone();
python_home.pop();
// On Windows, python2.7.exe exists at the root directory of the Python
// install. Everywhere else, the Python install root is one level up.
if !python_exe.ends_with("python2.7.exe") {
python_home.pop();
}
Environment {
_exe: exe.clone(),
python_exe: python_exe,
python_home: python_home,
mercurial_modules: mercurial_modules.to_path_buf(),
}
}
Yuya Nishihara
rust: convert Unix path to CString transparently...
r35650 // On UNIX, platform string is just bytes and should not contain NUL.
#[cfg(target_family = "unix")]
fn cstring_from_os<T: AsRef<OsStr>>(s: T) -> CString {
CString::new(s.as_ref().as_bytes()).unwrap()
}
// TODO convert to ANSI characters?
#[cfg(target_family = "windows")]
Yuya Nishihara
rust: extract function to convert Path to platform CString...
r35649 fn cstring_from_os<T: AsRef<OsStr>>(s: T) -> CString {
CString::new(s.as_ref().to_str().unwrap()).unwrap()
}
Gregory Szorc
rust: implementation of `hg`...
r35587 // On UNIX, argv starts as an array of char*. So it is easy to convert
// to C strings.
#[cfg(target_family = "unix")]
fn args_to_cstrings() -> Vec<CString> {
env::args_os()
.map(|a| CString::new(a.into_vec()).unwrap())
.collect()
}
// TODO Windows support is incomplete. We should either use env::args_os()
// (or call into GetCommandLineW() + CommandLinetoArgvW()), convert these to
// PyUnicode instances, and pass these into Python/Mercurial outside the
// standard PySys_SetArgvEx() mechanism. This will allow us to preserve the
// raw bytes (since PySys_SetArgvEx() is based on char* and can drop wchar
// data.
//
// For now, we use env::args(). This will choke on invalid UTF-8 arguments.
// But it is better than nothing.
#[cfg(target_family = "windows")]
fn args_to_cstrings() -> Vec<CString> {
env::args().map(|a| CString::new(a).unwrap()).collect()
}
fn set_python_home(env: &Environment) {
Yuya Nishihara
rust: extract function to convert Path to platform CString...
r35649 let raw = cstring_from_os(&env.python_home).into_raw();
Gregory Szorc
rust: implementation of `hg`...
r35587 unsafe {
python27_sys::Py_SetPythonHome(raw);
}
}
fn update_encoding(_py: Python, _sys_mod: &PyModule) {
// Call sys.setdefaultencoding("undefined") if HGUNICODEPEDANTRY is set.
let pedantry = env::var("HGUNICODEPEDANTRY").is_ok();
if pedantry {
// site.py removes the sys.setdefaultencoding attribute. So we need
// to reload the module to get a handle on it. This is a lesser
// used feature and we'll support this later.
// TODO support this
panic!("HGUNICODEPEDANTRY is not yet supported");
}
}
fn update_modules_path(env: &Environment, py: Python, sys_mod: &PyModule) {
let sys_path = sys_mod.get(py, "path").unwrap();
sys_path
.call_method(py, "insert", (0, env.mercurial_modules.to_str()), None)
.expect("failed to update sys.path to location of Mercurial modules");
}
fn run() -> Result<(), i32> {
let env = get_environment();
//println!("{:?}", env);
// Tell Python where it is installed.
set_python_home(&env);
// Set program name. The backing memory needs to live for the duration of the
// interpreter.
//
Gregory Szorc
rust: add TODO about lifetime of program_name variable...
r35622 // TODO consider storing this in a static or associating with lifetime of
// the Python interpreter.
//
Gregory Szorc
rust: implementation of `hg`...
r35587 // Yes, we use the path to the Python interpreter not argv[0] here. The
// reason is because Python uses the given path to find the location of
// Python files. Apparently we could define our own ``Py_GetPath()``
// implementation. But this may require statically linking Python, which is
// not desirable.
Yuya Nishihara
rust: extract function to convert Path to platform CString...
r35649 let program_name = cstring_from_os(&env.python_exe).as_ptr();
Gregory Szorc
rust: implementation of `hg`...
r35587 unsafe {
python27_sys::Py_SetProgramName(program_name as *mut i8);
}
unsafe {
python27_sys::Py_Initialize();
}
// https://docs.python.org/2/c-api/init.html#c.PySys_SetArgvEx has important
// usage information about PySys_SetArgvEx:
//
// * It says the first argument should be the script that is being executed.
// If not a script, it can be empty. We are definitely not a script.
// However, parts of Mercurial do look at sys.argv[0]. So we need to set
// something here.
//
// * When embedding Python, we should use ``PySys_SetArgvEx()`` and set
// ``updatepath=0`` for security reasons. Essentially, Python's default
// logic will treat an empty argv[0] in a manner that could result in
// sys.path picking up directories it shouldn't and this could lead to
// loading untrusted modules.
// env::args() will panic if it sees a non-UTF-8 byte sequence. And
// Mercurial supports arbitrary encodings of input data. So we need to
// use OS-specific mechanisms to get the raw bytes without UTF-8
// interference.
let args = args_to_cstrings();
let argv: Vec<*const c_char> = args.iter().map(|a| a.as_ptr()).collect();
unsafe {
python27_sys::PySys_SetArgvEx(args.len() as c_int, argv.as_ptr() as *mut *mut i8, 0);
}
let result;
{
// These need to be dropped before we call Py_Finalize(). Hence the
// block.
let gil = Python::acquire_gil();
let py = gil.python();
// Mercurial code could call sys.exit(), which will call exit()
// itself. So this may not return.
// TODO this may cause issues on Windows due to the CRT mismatch.
// Investigate if we can intercept sys.exit() or SystemExit() to
// ensure we handle process exit.
result = match run_py(&env, py) {
// Print unhandled exceptions and exit code 255, as this is what
// `python` does.
Err(err) => {
err.print(py);
Err(255)
}
Ok(()) => Ok(()),
};
}
unsafe {
python27_sys::Py_Finalize();
}
result
}
fn run_py(env: &Environment, py: Python) -> PyResult<()> {
let sys_mod = py.import("sys").unwrap();
update_encoding(py, &sys_mod);
update_modules_path(&env, py, &sys_mod);
// TODO consider a better error message on failure to import.
let demand_mod = py.import("hgdemandimport")?;
demand_mod.call(py, "enable", NoArgs, None)?;
let dispatch_mod = py.import("mercurial.dispatch")?;
dispatch_mod.call(py, "run", NoArgs, None)?;
Ok(())
}
fn main() {
let exit_code = match run() {
Err(err) => err,
Ok(()) => 0,
};
std::process::exit(exit_code);
}