##// END OF EJS Templates
copies-rust: move is_ancestor caching within the rust code...
copies-rust: move is_ancestor caching within the rust code Now that the OrdMap merging is fast, smaller things start to matters. We move the caching of `is_ancestor` call within the Rust code. This avoid round-trip to Python and help us to shave more time on our slower case: Repo Cases Source-Rev Dest-Rev Old-Time New-Time Difference Factor ------------------------------------------------------------------------------------------------------------------------------------ pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 2.780174 s, 2.137894 s, -0.642280 s, × 0.7690 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 9.843481 s, 8.100385 s, -1.743096 s, × 0.8229 Note: I would happily have used native code for ancestors computation, however I failed (did not tried hard) to created a rust version that goes as fast as the current C version. Below are full tables for: - this change compared to the previous change - this change compared to filelog performance Repo Cases Source-Rev Dest-Rev Old-Time New-Time Difference Factor ------------------------------------------------------------------------------------------------------------------------------------ mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 0.000049 s, 0.000047 s, -0.000002 s, × 0.9592 mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 0.000182 s, 0.000181 s, -0.000001 s, × 0.9945 mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 0.005872 s, 0.005852 s, -0.000020 s, × 0.9966 pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 0.000229 s, 0.000229 s, +0.000000 s, × 1.0000 pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 0.000058 s, 0.000058 s, +0.000000 s, × 1.0000 pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 0.000148 s, 0.000146 s, -0.000002 s, × 0.9865 pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 0.001205 s, 0.001206 s, +0.000001 s, × 1.0008 pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 0.025662 s, 0.025275 s, -0.000387 s, × 0.9849 pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 0.080113 s, 0.080303 s, +0.000190 s, × 1.0024 pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 0.153030 s, 0.152641 s, -0.000389 s, × 0.9975 pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 0.098774 s, 0.099107 s, +0.000333 s, × 1.0034 pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 2.780174 s, 2.137894 s, -0.642280 s, × 0.7690 pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 0.022218 s, 0.022202 s, -0.000016 s, × 0.9993 pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 0.252125 s, 0.228946 s, -0.023179 s, × 0.9081 netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 0.000186 s, 0.000186 s, +0.000000 s, × 1.0000 netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 0.000133 s, 0.000133 s, +0.000000 s, × 1.0000 netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 0.000320 s, 0.000320 s, +0.000000 s, × 1.0000 netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 0.001336 s, 0.001339 s, +0.000003 s, × 1.0022 netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 0.015573 s, 0.015694 s, +0.000121 s, × 1.0078 netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 0.018667 s, 0.018457 s, -0.000210 s, × 0.9888 netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 0.112534 s, 0.111691 s, -0.000843 s, × 0.9925 netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 1.231869 s, 1.166017 s, -0.065852 s, × 0.9465 mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 0.000197 s, 0.000197 s, +0.000000 s, × 1.0000 mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 0.000637 s, 0.000626 s, -0.000011 s, × 0.9827 mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 0.000303 s, 0.000303 s, +0.000000 s, × 1.0000 mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 0.001663 s, 0.001679 s, +0.000016 s, × 1.0096 mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 0.007008 s, 0.006947 s, -0.000061 s, × 0.9913 mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 0.127385 s, 0.133070 s, +0.005685 s, × 1.0446 mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.008740 s, 0.008705 s, -0.000035 s, × 0.9960 mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.005783 s, 0.005913 s, +0.000130 s, × 1.0225 mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 0.102184 s, 0.101373 s, -0.000811 s, × 0.9921 mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 0.046220 s, 0.046526 s, +0.000306 s, × 1.0066 mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 0.315271 s, 0.313954 s, -0.001317 s, × 0.9958 mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 3.478747 s, 3.367395 s, -0.111352 s, × 0.9680 mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 4.766435 s, 4.691820 s, -0.074615 s, × 0.9843 mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 0.001214 s, 0.001199 s, -0.000015 s, × 0.9876 mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 0.001221 s, 0.001216 s, -0.000005 s, × 0.9959 mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 0.000613 s, 0.000613 s, +0.000000 s, × 1.0000 mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 0.001904 s, 0.001906 s, +0.000002 s, × 1.0011 mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 0.093000 s, 0.092766 s, -0.000234 s, × 0.9975 mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 0.132194 s, 0.136074 s, +0.003880 s, × 1.0294 mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.009069 s, 0.009067 s, -0.000002 s, × 0.9998 mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.006169 s, 0.006243 s, +0.000074 s, × 1.0120 mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 0.115540 s, 0.114463 s, -0.001077 s, × 0.9907 mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 0.435381 s, 0.433683 s, -0.001698 s, × 0.9961 mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 0.415461 s, 0.411278 s, -0.004183 s, × 0.9899 mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 0.155946 s, 0.155133 s, -0.000813 s, × 0.9948 mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 0.048521 s, 0.048933 s, +0.000412 s, × 1.0085 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 9.843481 s, 8.100385 s, -1.743096 s, × 0.8229 mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 1.465128 s, 1.446720 s, -0.018408 s, × 0.9874 mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 1.374283 s, 1.369537 s, -0.004746 s, × 0.9965 mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 5.255158 s, 5.186079 s, -0.069079 s, × 0.9869 Repo Case Source-Rev Dest-Rev filelog sidedata Difference Factor -------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 0.000892 s, 0.000047 s, -0.000845 s, × 0.052691 mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 0.001823 s, 0.000181 s, -0.001642 s, × 0.099287 mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 0.018063 s, 0.005852 s, -0.012211 s, × 0.323977 pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 0.001505 s, 0.000229 s, -0.001276 s, × 0.152159 pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 0.205895 s, 0.000058 s, -0.205837 s, × 0.000282 pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 0.017021 s, 0.000146 s, -0.016875 s, × 0.008578 pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 0.019422 s, 0.001206 s, -0.018216 s, × 0.062095 pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 0.767740 s, 0.025275 s, -0.742465 s, × 0.032921 pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 1.188515 s, 0.080303 s, -1.108212 s, × 0.067566 pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 1.251968 s, 0.152641 s, -1.099327 s, × 0.121921 pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 1.616799 s, 0.099107 s, -1.517692 s, × 0.061298 pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 0.001057 s, 2.137894 s, +2.136837 s, × 2022.605487 pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 1.069485 s, 0.022202 s, -1.047283 s, × 0.020760 pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 1.350162 s, 0.228946 s, -1.121216 s, × 0.169569 netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 0.028008 s, 0.000186 s, -0.027822 s, × 0.006641 netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 0.132281 s, 0.000133 s, -0.132148 s, × 0.001005 netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 0.025311 s, 0.000320 s, -0.024991 s, × 0.012643 netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 0.052957 s, 0.001339 s, -0.051618 s, × 0.025285 netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 0.038011 s, 0.015694 s, -0.022317 s, × 0.412880 netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 0.198639 s, 0.018457 s, -0.180182 s, × 0.092917 netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 0.955713 s, 0.111691 s, -0.844022 s, × 0.116867 netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 3.838886 s, 1.166017 s, -2.672869 s, × 0.303738 mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 0.024548 s, 0.000197 s, -0.024351 s, × 0.008025 mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 0.143394 s, 0.000626 s, -0.142768 s, × 0.004366 mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 0.026046 s, 0.000303 s, -0.025743 s, × 0.011633 mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 0.085440 s, 0.001679 s, -0.083761 s, × 0.019651 mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 0.195656 s, 0.006947 s, -0.188709 s, × 0.035506 mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 2.190874 s, 0.133070 s, -2.057804 s, × 0.060738 mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.090208 s, 0.008705 s, -0.081503 s, × 0.096499 mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.747367 s, 0.005913 s, -0.741454 s, × 0.007912 mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 1.152863 s, 0.101373 s, -1.051490 s, × 0.087932 mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 6.598336 s, 0.046526 s, -6.551810 s, × 0.007051 mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 3.255015 s, 0.313954 s, -2.941061 s, × 0.096452 mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 15.668041 s, 3.367395 s, -12.300646 s, × 0.214921 mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 20.439638 s, 4.691820 s, -15.747818 s, × 0.229545 mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 0.080923 s, 0.001199 s, -0.079724 s, × 0.014817 mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 0.498456 s, 0.001216 s, -0.497240 s, × 0.002440 mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 0.020798 s, 0.000613 s, -0.020185 s, × 0.029474 mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 0.226930 s, 0.001906 s, -0.225024 s, × 0.008399 mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1.113005 s, 0.092766 s, -1.020239 s, × 0.083347 mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 2.230671 s, 0.136074 s, -2.094597 s, × 0.061001 mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.089672 s, 0.009067 s, -0.080605 s, × 0.101113 mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.740221 s, 0.006243 s, -0.733978 s, × 0.008434 mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 1.185881 s, 0.114463 s, -1.071418 s, × 0.096521 mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 0.086072 s, 0.433683 s, +0.347611 s, × 5.038607 mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 0.081321 s, 0.411278 s, +0.329957 s, × 5.057464 mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 7.528370 s, 0.155133 s, -7.373237 s, × 0.020606 mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 6.757368 s, 0.048933 s, -6.708435 s, × 0.007241 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 7.643752 s, 8.100385 s, +0.456633 s, × 1.059739 mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 9.704242 s, 1.446720 s, -8.257522 s, × 0.149081 mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 0.092845 s, killed mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 26.626870 s, 1.369537 s, -25.257333 s, × 0.051434 mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 0.092953 s, killed mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 0.227131 s, killed mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 18.884666 s, 5.186079 s, -13.698587 s, × 0.274619 mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 21.451622 s, killed mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 25.152558 s, killed Differential Revision: https://phab.mercurial-scm.org/D9303

File last commit:

r46109:2d5dfc8f default
r46586:8b99c473 default
Show More
hg_path.rs
773 lines | 24.2 KiB | application/rls-services+xml | RustLexer
// hg_path.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
use std::borrow::Borrow;
use std::convert::TryFrom;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::ops::Deref;
use std::path::{Path, PathBuf};
#[derive(Debug, Eq, PartialEq)]
pub enum HgPathError {
/// Bytes from the invalid `HgPath`
LeadingSlash(Vec<u8>),
ConsecutiveSlashes {
bytes: Vec<u8>,
second_slash_index: usize,
},
ContainsNullByte {
bytes: Vec<u8>,
null_byte_index: usize,
},
/// Bytes
DecodeError(Vec<u8>),
/// The rest come from audit errors
EndsWithSlash(HgPathBuf),
ContainsIllegalComponent(HgPathBuf),
/// Path is inside the `.hg` folder
InsideDotHg(HgPathBuf),
IsInsideNestedRepo {
path: HgPathBuf,
nested_repo: HgPathBuf,
},
TraversesSymbolicLink {
path: HgPathBuf,
symlink: HgPathBuf,
},
NotFsCompliant(HgPathBuf),
/// `path` is the smallest invalid path
NotUnderRoot {
path: PathBuf,
root: PathBuf,
},
}
impl ToString for HgPathError {
fn to_string(&self) -> String {
match self {
HgPathError::LeadingSlash(bytes) => {
format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
}
HgPathError::ConsecutiveSlashes {
bytes,
second_slash_index: pos,
} => format!(
"Invalid HgPath '{:?}': consecutive slashes at pos {}.",
bytes, pos
),
HgPathError::ContainsNullByte {
bytes,
null_byte_index: pos,
} => format!(
"Invalid HgPath '{:?}': contains null byte at pos {}.",
bytes, pos
),
HgPathError::DecodeError(bytes) => {
format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
}
HgPathError::EndsWithSlash(path) => {
format!("Audit failed for '{}': ends with a slash.", path)
}
HgPathError::ContainsIllegalComponent(path) => format!(
"Audit failed for '{}': contains an illegal component.",
path
),
HgPathError::InsideDotHg(path) => format!(
"Audit failed for '{}': is inside the '.hg' folder.",
path
),
HgPathError::IsInsideNestedRepo {
path,
nested_repo: nested,
} => format!(
"Audit failed for '{}': is inside a nested repository '{}'.",
path, nested
),
HgPathError::TraversesSymbolicLink { path, symlink } => format!(
"Audit failed for '{}': traverses symbolic link '{}'.",
path, symlink
),
HgPathError::NotFsCompliant(path) => format!(
"Audit failed for '{}': cannot be turned into a \
filesystem path.",
path
),
HgPathError::NotUnderRoot { path, root } => format!(
"Audit failed for '{}': not under root {}.",
path.display(),
root.display()
),
}
}
}
impl From<HgPathError> for std::io::Error {
fn from(e: HgPathError) -> Self {
std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
}
}
/// This is a repository-relative path (or canonical path):
/// - no null characters
/// - `/` separates directories
/// - no consecutive slashes
/// - no leading slash,
/// - no `.` nor `..` of special meaning
/// - stored in repository and shared across platforms
///
/// Note: there is no guarantee of any `HgPath` being well-formed at any point
/// in its lifetime for performance reasons and to ease ergonomics. It is
/// however checked using the `check_state` method before any file-system
/// operation.
///
/// This allows us to be encoding-transparent as much as possible, until really
/// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
/// or `Path`) whenever more complex operations are needed:
/// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
/// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
/// character encoding will be determined on a per-repository basis.
//
// FIXME: (adapted from a comment in the stdlib)
// `HgPath::new()` current implementation relies on `Slice` being
// layout-compatible with `[u8]`.
// When attribute privacy is implemented, `Slice` should be annotated as
// `#[repr(transparent)]`.
// Anyway, `Slice` representation and layout are considered implementation
// detail, are not documented and must not be relied upon.
#[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
pub struct HgPath {
inner: [u8],
}
impl HgPath {
pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
pub fn len(&self) -> usize {
self.inner.len()
}
fn to_hg_path_buf(&self) -> HgPathBuf {
HgPathBuf {
inner: self.inner.to_owned(),
}
}
pub fn bytes(&self) -> std::slice::Iter<u8> {
self.inner.iter()
}
pub fn to_ascii_uppercase(&self) -> HgPathBuf {
HgPathBuf::from(self.inner.to_ascii_uppercase())
}
pub fn to_ascii_lowercase(&self) -> HgPathBuf {
HgPathBuf::from(self.inner.to_ascii_lowercase())
}
pub fn as_bytes(&self) -> &[u8] {
&self.inner
}
pub fn contains(&self, other: u8) -> bool {
self.inner.contains(&other)
}
pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
self.inner.starts_with(needle.as_ref().as_bytes())
}
pub fn trim_trailing_slash(&self) -> &Self {
Self::new(if self.inner.last() == Some(&b'/') {
&self.inner[..self.inner.len() - 1]
} else {
&self.inner[..]
})
}
/// Returns a tuple of slices `(base, filename)` resulting from the split
/// at the rightmost `/`, if any.
///
/// # Examples:
///
/// ```
/// use hg::utils::hg_path::HgPath;
///
/// let path = HgPath::new(b"cool/hg/path").split_filename();
/// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
///
/// let path = HgPath::new(b"pathwithoutsep").split_filename();
/// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
/// ```
pub fn split_filename(&self) -> (&Self, &Self) {
match &self.inner.iter().rposition(|c| *c == b'/') {
None => (HgPath::new(""), &self),
Some(size) => (
HgPath::new(&self.inner[..*size]),
HgPath::new(&self.inner[*size + 1..]),
),
}
}
pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
let mut inner = self.inner.to_owned();
if !inner.is_empty() && inner.last() != Some(&b'/') {
inner.push(b'/');
}
inner.extend(other.as_ref().bytes());
HgPathBuf::from_bytes(&inner)
}
pub fn parent(&self) -> &Self {
let inner = self.as_bytes();
HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
Some(pos) => &inner[..pos],
None => &[],
})
}
/// Given a base directory, returns the slice of `self` relative to the
/// base directory. If `base` is not a directory (does not end with a
/// `b'/'`), returns `None`.
pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
let base = base.as_ref();
if base.is_empty() {
return Some(self);
}
let is_dir = base.as_bytes().ends_with(b"/");
if is_dir && self.starts_with(base) {
Some(Self::new(&self.inner[base.len()..]))
} else {
None
}
}
#[cfg(windows)]
/// Copied from the Python stdlib's `os.path.splitdrive` implementation.
///
/// Split a pathname into drive/UNC sharepoint and relative path
/// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
/// be empty.
///
/// If you assign
/// result = split_drive(p)
/// It is always true that:
/// result[0] + result[1] == p
///
/// If the path contained a drive letter, drive_or_unc will contain
/// everything up to and including the colon.
/// e.g. split_drive("c:/dir") returns ("c:", "/dir")
///
/// If the path contained a UNC path, the drive_or_unc will contain the
/// host name and share up to but not including the fourth directory
/// separator character.
/// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
/// "/dir")
///
/// Paths cannot contain both a drive letter and a UNC path.
pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
let bytes = self.as_bytes();
let is_sep = |b| std::path::is_separator(b as char);
if self.len() < 2 {
(HgPath::new(b""), &self)
} else if is_sep(bytes[0])
&& is_sep(bytes[1])
&& (self.len() == 2 || !is_sep(bytes[2]))
{
// Is a UNC path:
// vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
// \\machine\mountpoint\directory\etc\...
// directory ^^^^^^^^^^^^^^^
let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
let mountpoint_start_index = if let Some(i) = machine_end_index {
i + 2
} else {
return (HgPath::new(b""), &self);
};
match bytes[mountpoint_start_index + 1..]
.iter()
.position(|b| is_sep(*b))
{
// A UNC path can't have two slashes in a row
// (after the initial two)
Some(0) => (HgPath::new(b""), &self),
Some(i) => {
let (a, b) =
bytes.split_at(mountpoint_start_index + 1 + i);
(HgPath::new(a), HgPath::new(b))
}
None => (&self, HgPath::new(b"")),
}
} else if bytes[1] == b':' {
// Drive path c:\directory
let (a, b) = bytes.split_at(2);
(HgPath::new(a), HgPath::new(b))
} else {
(HgPath::new(b""), &self)
}
}
#[cfg(unix)]
/// Split a pathname into drive and path. On Posix, drive is always empty.
pub fn split_drive(&self) -> (&HgPath, &HgPath) {
(HgPath::new(b""), &self)
}
/// Checks for errors in the path, short-circuiting at the first one.
/// This generates fine-grained errors useful for debugging.
/// To simply check if the path is valid during tests, use `is_valid`.
pub fn check_state(&self) -> Result<(), HgPathError> {
if self.is_empty() {
return Ok(());
}
let bytes = self.as_bytes();
let mut previous_byte = None;
if bytes[0] == b'/' {
return Err(HgPathError::LeadingSlash(bytes.to_vec()));
}
for (index, byte) in bytes.iter().enumerate() {
match byte {
0 => {
return Err(HgPathError::ContainsNullByte {
bytes: bytes.to_vec(),
null_byte_index: index,
})
}
b'/' => {
if previous_byte.is_some() && previous_byte == Some(b'/') {
return Err(HgPathError::ConsecutiveSlashes {
bytes: bytes.to_vec(),
second_slash_index: index,
});
}
}
_ => (),
};
previous_byte = Some(*byte);
}
Ok(())
}
#[cfg(test)]
/// Only usable during tests to force developers to handle invalid states
fn is_valid(&self) -> bool {
self.check_state().is_ok()
}
}
impl fmt::Debug for HgPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
}
}
impl fmt::Display for HgPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", String::from_utf8_lossy(&self.inner))
}
}
#[derive(Default, Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
pub struct HgPathBuf {
inner: Vec<u8>,
}
impl HgPathBuf {
pub fn new() -> Self {
Default::default()
}
pub fn push(&mut self, byte: u8) {
self.inner.push(byte);
}
pub fn from_bytes(s: &[u8]) -> HgPathBuf {
HgPath::new(s).to_owned()
}
pub fn into_vec(self) -> Vec<u8> {
self.inner
}
}
impl fmt::Debug for HgPathBuf {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
}
}
impl fmt::Display for HgPathBuf {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", String::from_utf8_lossy(&self.inner))
}
}
impl Deref for HgPathBuf {
type Target = HgPath;
#[inline]
fn deref(&self) -> &HgPath {
&HgPath::new(&self.inner)
}
}
impl From<Vec<u8>> for HgPathBuf {
fn from(vec: Vec<u8>) -> Self {
Self { inner: vec }
}
}
impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
fn from(s: &T) -> HgPathBuf {
s.as_ref().to_owned()
}
}
impl Into<Vec<u8>> for HgPathBuf {
fn into(self) -> Vec<u8> {
self.inner
}
}
impl Borrow<HgPath> for HgPathBuf {
fn borrow(&self) -> &HgPath {
&HgPath::new(self.as_bytes())
}
}
impl ToOwned for HgPath {
type Owned = HgPathBuf;
fn to_owned(&self) -> HgPathBuf {
self.to_hg_path_buf()
}
}
impl AsRef<HgPath> for HgPath {
fn as_ref(&self) -> &HgPath {
self
}
}
impl AsRef<HgPath> for HgPathBuf {
fn as_ref(&self) -> &HgPath {
self
}
}
impl Extend<u8> for HgPathBuf {
fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
self.inner.extend(iter);
}
}
/// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
/// implemented, these conversion utils will have to work differently depending
/// on the repository encoding: either `UTF-8` or `MBCS`.
pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
hg_path: P,
) -> Result<OsString, HgPathError> {
hg_path.as_ref().check_state()?;
let os_str;
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
}
// TODO Handle other platforms
// TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
Ok(os_str.to_os_string())
}
pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
hg_path: P,
) -> Result<PathBuf, HgPathError> {
Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
}
pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
os_string: S,
) -> Result<HgPathBuf, HgPathError> {
let buf;
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
}
// TODO Handle other platforms
// TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
buf.check_state()?;
Ok(buf)
}
pub fn path_to_hg_path_buf<P: AsRef<Path>>(
path: P,
) -> Result<HgPathBuf, HgPathError> {
let buf;
let os_str = path.as_ref().as_os_str();
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
buf = HgPathBuf::from_bytes(&os_str.as_bytes());
}
// TODO Handle other platforms
// TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
buf.check_state()?;
Ok(buf)
}
impl TryFrom<PathBuf> for HgPathBuf {
type Error = HgPathError;
fn try_from(path: PathBuf) -> Result<Self, Self::Error> {
path_to_hg_path_buf(path)
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_path_states() {
assert_eq!(
Err(HgPathError::LeadingSlash(b"/".to_vec())),
HgPath::new(b"/").check_state()
);
assert_eq!(
Err(HgPathError::ConsecutiveSlashes {
bytes: b"a/b//c".to_vec(),
second_slash_index: 4
}),
HgPath::new(b"a/b//c").check_state()
);
assert_eq!(
Err(HgPathError::ContainsNullByte {
bytes: b"a/b/\0c".to_vec(),
null_byte_index: 4
}),
HgPath::new(b"a/b/\0c").check_state()
);
// TODO test HgPathError::DecodeError for the Windows implementation.
assert_eq!(true, HgPath::new(b"").is_valid());
assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
// Backslashes in paths are not significant, but allowed
assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
// Dots in paths are not significant, but allowed
assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
}
#[test]
fn test_iter() {
let path = HgPath::new(b"a");
let mut iter = path.bytes();
assert_eq!(Some(&b'a'), iter.next());
assert_eq!(None, iter.next_back());
assert_eq!(None, iter.next());
let path = HgPath::new(b"a");
let mut iter = path.bytes();
assert_eq!(Some(&b'a'), iter.next_back());
assert_eq!(None, iter.next_back());
assert_eq!(None, iter.next());
let path = HgPath::new(b"abc");
let mut iter = path.bytes();
assert_eq!(Some(&b'a'), iter.next());
assert_eq!(Some(&b'c'), iter.next_back());
assert_eq!(Some(&b'b'), iter.next_back());
assert_eq!(None, iter.next_back());
assert_eq!(None, iter.next());
let path = HgPath::new(b"abc");
let mut iter = path.bytes();
assert_eq!(Some(&b'a'), iter.next());
assert_eq!(Some(&b'b'), iter.next());
assert_eq!(Some(&b'c'), iter.next());
assert_eq!(None, iter.next_back());
assert_eq!(None, iter.next());
let path = HgPath::new(b"abc");
let iter = path.bytes();
let mut vec = Vec::new();
vec.extend(iter);
assert_eq!(vec![b'a', b'b', b'c'], vec);
let path = HgPath::new(b"abc");
let mut iter = path.bytes();
assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
let path = HgPath::new(b"abc");
let mut iter = path.bytes();
assert_eq!(None, iter.rposition(|c| *c == b'd'));
}
#[test]
fn test_join() {
let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
assert_eq!(b"a/b", path.as_bytes());
let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
assert_eq!(b"a/b/c", path.as_bytes());
// No leading slash if empty before join
let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
assert_eq!(b"b/c", path.as_bytes());
// The leading slash is an invalid representation of an `HgPath`, but
// it can happen. This creates another invalid representation of
// consecutive bytes.
// TODO What should be done in this case? Should we silently remove
// the extra slash? Should we change the signature to a problematic
// `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
// let the error happen upon filesystem interaction?
let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
assert_eq!(b"a//b", path.as_bytes());
let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
assert_eq!(b"a//b", path.as_bytes());
}
#[test]
fn test_relative_to() {
let path = HgPath::new(b"");
let base = HgPath::new(b"");
assert_eq!(Some(path), path.relative_to(base));
let path = HgPath::new(b"path");
let base = HgPath::new(b"");
assert_eq!(Some(path), path.relative_to(base));
let path = HgPath::new(b"a");
let base = HgPath::new(b"b");
assert_eq!(None, path.relative_to(base));
let path = HgPath::new(b"a/b");
let base = HgPath::new(b"a");
assert_eq!(None, path.relative_to(base));
let path = HgPath::new(b"a/b");
let base = HgPath::new(b"a/");
assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
let path = HgPath::new(b"nested/path/to/b");
let base = HgPath::new(b"nested/path/");
assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
let path = HgPath::new(b"ends/with/dir/");
let base = HgPath::new(b"ends/");
assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
}
#[test]
#[cfg(unix)]
fn test_split_drive() {
// Taken from the Python stdlib's tests
assert_eq!(
HgPath::new(br"/foo/bar").split_drive(),
(HgPath::new(b""), HgPath::new(br"/foo/bar"))
);
assert_eq!(
HgPath::new(br"foo:bar").split_drive(),
(HgPath::new(b""), HgPath::new(br"foo:bar"))
);
assert_eq!(
HgPath::new(br":foo:bar").split_drive(),
(HgPath::new(b""), HgPath::new(br":foo:bar"))
);
// Also try NT paths; should not split them
assert_eq!(
HgPath::new(br"c:\foo\bar").split_drive(),
(HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
);
assert_eq!(
HgPath::new(b"c:/foo/bar").split_drive(),
(HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
);
assert_eq!(
HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
(
HgPath::new(b""),
HgPath::new(br"\\conky\mountpoint\foo\bar")
)
);
}
#[test]
#[cfg(windows)]
fn test_split_drive() {
assert_eq!(
HgPath::new(br"c:\foo\bar").split_drive(),
(HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
);
assert_eq!(
HgPath::new(b"c:/foo/bar").split_drive(),
(HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
);
assert_eq!(
HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
(
HgPath::new(br"\\conky\mountpoint"),
HgPath::new(br"\foo\bar")
)
);
assert_eq!(
HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
(
HgPath::new(br"//conky/mountpoint"),
HgPath::new(br"/foo/bar")
)
);
assert_eq!(
HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
(
HgPath::new(br""),
HgPath::new(br"\\\conky\mountpoint\foo\bar")
)
);
assert_eq!(
HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
(
HgPath::new(br""),
HgPath::new(br"///conky/mountpoint/foo/bar")
)
);
assert_eq!(
HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
(
HgPath::new(br""),
HgPath::new(br"\\conky\\mountpoint\foo\bar")
)
);
assert_eq!(
HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
(
HgPath::new(br""),
HgPath::new(br"//conky//mountpoint/foo/bar")
)
);
// UNC part containing U+0130
assert_eq!(
HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
(
HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
HgPath::new(br"/foo/bar")
)
);
}
#[test]
fn test_parent() {
let path = HgPath::new(b"");
assert_eq!(path.parent(), path);
let path = HgPath::new(b"a");
assert_eq!(path.parent(), HgPath::new(b""));
let path = HgPath::new(b"a/b");
assert_eq!(path.parent(), HgPath::new(b"a"));
let path = HgPath::new(b"a/other/b");
assert_eq!(path.parent(), HgPath::new(b"a/other"));
}
}