##// END OF EJS Templates
revlog: make `clearcaches()` signature consistent with ManifestRevlog...
revlog: make `clearcaches()` signature consistent with ManifestRevlog I'm not sure if this a newly added bug, because of using a different version of pytype, or if the recent work around avoiding the zope interface types in the type checking phase (see 5eb98ea78fd7 and friends)... but pytype 2023.11.21 started flagging this series since it was last pushed ~6 weeks ago: File "/mnt/c/Users/Matt/hg/mercurial/bundlerepo.py", line 204, in <module>: Overriding method signature mismatch [signature-mismatch] Base signature: 'def mercurial.manifest.ManifestRevlog.clearcaches(self, clear_persisted_data: Any = ...) -> None'. Subclass signature: 'def mercurial.revlog.revlog.clearcaches(self) -> None'. Not enough positional parameters in overriding method. Maybe the multiple inheritance in `bundlerepo.bundlemanifest` is bad, but it seems like a `ManifestRevlog` is-a `revlog`, even though the class hierarchy isn't coded that way. Additionally, it looks like `revlog.clearcaches()` is dealing with some persistent data, so maybe this is useful to have there anyway. Also sprinkle some trivial type hints on the method, because there are other `clearcaches()` definitions in the codebase with these hints, and I don't feel like waiting for another pytype run to see if it cares that specifically about the signature matching.

File last commit:

r52047:aba622c7 default
r52765:5e79783d default
Show More
utils.rs
572 lines | 17.0 KiB | application/rls-services+xml | RustLexer
// utils module
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Contains useful functions, traits, structs, etc. for use in core.
use crate::errors::{HgError, IoErrorContext};
use crate::utils::hg_path::HgPath;
use im_rc::ordmap::DiffItem;
use im_rc::ordmap::OrdMap;
use itertools::EitherOrBoth;
use itertools::Itertools;
use std::cell::Cell;
use std::cmp::Ordering;
use std::fmt;
use std::{io::Write, ops::Deref};
pub mod debug;
pub mod files;
pub mod hg_path;
pub mod path_auditor;
/// Useful until rust/issues/56345 is stable
///
/// # Examples
///
/// ```
/// use crate::hg::utils::find_slice_in_slice;
///
/// let haystack = b"This is the haystack".to_vec();
/// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
/// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
/// ```
pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
where
for<'a> &'a [T]: PartialEq,
{
slice
.windows(needle.len())
.position(|window| window == needle)
}
/// Replaces the `from` slice with the `to` slice inside the `buf` slice.
///
/// # Examples
///
/// ```
/// use crate::hg::utils::replace_slice;
/// let mut line = b"I hate writing tests!".to_vec();
/// replace_slice(&mut line, b"hate", b"love");
/// assert_eq!(
/// line,
/// b"I love writing tests!".to_vec()
/// );
/// ```
pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
where
T: Clone + PartialEq,
{
if buf.len() < from.len() || from.len() != to.len() {
return;
}
for i in 0..=buf.len() - from.len() {
if buf[i..].starts_with(from) {
buf[i..(i + from.len())].clone_from_slice(to);
}
}
}
pub trait SliceExt {
fn trim_end(&self) -> &Self;
fn trim_start(&self) -> &Self;
fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
fn trim(&self) -> &Self;
fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
}
impl SliceExt for [u8] {
fn trim_end(&self) -> &[u8] {
self.trim_end_matches(|byte| byte.is_ascii_whitespace())
}
fn trim_start(&self) -> &[u8] {
self.trim_start_matches(|byte| byte.is_ascii_whitespace())
}
fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
&self[..=last]
} else {
&[]
}
}
fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
if let Some(first) = self.iter().position(|&byte| !f(byte)) {
&self[first..]
} else {
&[]
}
}
/// ```
/// use hg::utils::SliceExt;
/// assert_eq!(
/// b" to trim ".trim(),
/// b"to trim"
/// );
/// assert_eq!(
/// b"to trim ".trim(),
/// b"to trim"
/// );
/// assert_eq!(
/// b" to trim".trim(),
/// b"to trim"
/// );
/// ```
fn trim(&self) -> &[u8] {
self.trim_start().trim_end()
}
fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
if self.starts_with(needle) {
Some(&self[needle.len()..])
} else {
None
}
}
fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
let mut iter = self.splitn(2, |&byte| byte == separator);
let a = iter.next()?;
let b = iter.next()?;
Some((a, b))
}
fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
find_slice_in_slice(self, separator)
.map(|pos| (&self[..pos], &self[pos + separator.len()..]))
}
}
pub trait Escaped {
/// Return bytes escaped for display to the user
fn escaped_bytes(&self) -> Vec<u8>;
}
impl Escaped for u8 {
fn escaped_bytes(&self) -> Vec<u8> {
let mut acc = vec![];
match self {
c @ b'\'' | c @ b'\\' => {
acc.push(b'\\');
acc.push(*c);
}
b'\t' => {
acc.extend(br"\\t");
}
b'\n' => {
acc.extend(br"\\n");
}
b'\r' => {
acc.extend(br"\\r");
}
c if (*c < b' ' || *c >= 127) => {
write!(acc, "\\x{:x}", self).unwrap();
}
c => {
acc.push(*c);
}
}
acc
}
}
impl<'a, T: Escaped> Escaped for &'a [T] {
fn escaped_bytes(&self) -> Vec<u8> {
self.iter().flat_map(Escaped::escaped_bytes).collect()
}
}
impl<T: Escaped> Escaped for Vec<T> {
fn escaped_bytes(&self) -> Vec<u8> {
self.deref().escaped_bytes()
}
}
impl<'a> Escaped for &'a HgPath {
fn escaped_bytes(&self) -> Vec<u8> {
self.as_bytes().escaped_bytes()
}
}
#[cfg(unix)]
pub fn shell_quote(value: &[u8]) -> Vec<u8> {
if value.iter().all(|&byte| {
matches!(
byte,
b'a'..=b'z'
| b'A'..=b'Z'
| b'0'..=b'9'
| b'.'
| b'_'
| b'/'
| b'+'
| b'-'
)
}) {
value.to_owned()
} else {
let mut quoted = Vec::with_capacity(value.len() + 2);
quoted.push(b'\'');
for &byte in value {
if byte == b'\'' {
quoted.push(b'\\');
}
quoted.push(byte);
}
quoted.push(b'\'');
quoted
}
}
pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
std::env::current_dir().map_err(|error| HgError::IoError {
error,
context: IoErrorContext::CurrentDir,
})
}
pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
std::env::current_exe().map_err(|error| HgError::IoError {
error,
context: IoErrorContext::CurrentExe,
})
}
/// Expand `$FOO` and `${FOO}` environment variables in the given byte string
pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
lazy_static::lazy_static! {
/// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
/// The `x` makes whitespace ignored.
/// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
static ref VAR_RE: regex::bytes::Regex =
regex::bytes::Regex::new(r"(?x-u)
\$
(?:
(\w+)
|
\{
([^}]*)
\}
)
").unwrap();
}
VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
let var_name = files::get_os_str_from_bytes(
captures
.get(1)
.or_else(|| captures.get(2))
.expect("either side of `|` must participate in match")
.as_bytes(),
);
std::env::var_os(var_name)
.map(files::get_bytes_from_os_str)
.unwrap_or_else(|| {
// Referencing an environment variable that does not exist.
// Leave the $FOO reference as-is.
captures[0].to_owned()
})
})
}
#[test]
fn test_expand_vars() {
// Modifying process-global state in a test isn’t great,
// but hopefully this won’t collide with anything.
std::env::set_var("TEST_EXPAND_VAR", "1");
assert_eq!(
expand_vars(b"before/$TEST_EXPAND_VAR/after"),
&b"before/1/after"[..]
);
assert_eq!(
expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
&b"before111after"[..]
);
let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
assert_eq!(expand_vars(s), &s[..]);
}
pub(crate) enum MergeResult<V> {
Left,
Right,
New(V),
}
/// Return the union of the two given maps,
/// calling `merge(key, left_value, right_value)` to resolve keys that exist in
/// both.
///
/// CC <https://github.com/bodil/im-rs/issues/166>
pub(crate) fn ordmap_union_with_merge<K, V>(
left: OrdMap<K, V>,
right: OrdMap<K, V>,
mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
) -> OrdMap<K, V>
where
K: Clone + Ord,
V: Clone + PartialEq,
{
if left.ptr_eq(&right) {
// One of the two maps is an unmodified clone of the other
left
} else if left.len() / 2 > right.len() {
// When two maps have different sizes,
// their size difference is a lower bound on
// how many keys of the larger map are not also in the smaller map.
// This in turn is a lower bound on the number of differences in
// `OrdMap::diff` and the "amount of work" that would be done
// by `ordmap_union_with_merge_by_diff`.
//
// Here `left` is more than twice the size of `right`,
// so the number of differences is more than the total size of
// `right`. Therefore an algorithm based on iterating `right`
// is more efficient.
//
// This helps a lot when a tiny (or empty) map is merged
// with a large one.
ordmap_union_with_merge_by_iter(left, right, merge)
} else if left.len() < right.len() / 2 {
// Same as above but with `left` and `right` swapped
ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
// Also swapped in `merge` arguments:
match merge(key, b, a) {
MergeResult::New(v) => MergeResult::New(v),
// … and swap back in `merge` result:
MergeResult::Left => MergeResult::Right,
MergeResult::Right => MergeResult::Left,
}
})
} else {
// For maps of similar size, use the algorithm based on `OrdMap::diff`
ordmap_union_with_merge_by_diff(left, right, merge)
}
}
/// Efficient if `right` is much smaller than `left`
fn ordmap_union_with_merge_by_iter<K, V>(
mut left: OrdMap<K, V>,
right: OrdMap<K, V>,
mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
) -> OrdMap<K, V>
where
K: Clone + Ord,
V: Clone,
{
for (key, right_value) in right {
match left.get(&key) {
None => {
left.insert(key, right_value);
}
Some(left_value) => match merge(&key, left_value, &right_value) {
MergeResult::Left => {}
MergeResult::Right => {
left.insert(key, right_value);
}
MergeResult::New(new_value) => {
left.insert(key, new_value);
}
},
}
}
left
}
/// Fallback when both maps are of similar size
fn ordmap_union_with_merge_by_diff<K, V>(
mut left: OrdMap<K, V>,
mut right: OrdMap<K, V>,
mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
) -> OrdMap<K, V>
where
K: Clone + Ord,
V: Clone + PartialEq,
{
// (key, value) pairs that would need to be inserted in either map
// in order to turn it into the union.
//
// TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
// change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
// with `left_updates` only borrowing from `right` and `right_updates` from
// `left`, and with `Cow::Owned` used for `MergeResult::New`.
//
// This would allow moving all `.clone()` calls to after we’ve decided
// which of `right_updates` or `left_updates` to use
// (value ones becoming `Cow::into_owned`),
// and avoid making clones we don’t end up using.
let mut left_updates = Vec::new();
let mut right_updates = Vec::new();
for difference in left.diff(&right) {
match difference {
DiffItem::Add(key, value) => {
left_updates.push((key.clone(), value.clone()))
}
DiffItem::Remove(key, value) => {
right_updates.push((key.clone(), value.clone()))
}
DiffItem::Update {
old: (key, left_value),
new: (_, right_value),
} => match merge(key, left_value, right_value) {
MergeResult::Left => {
right_updates.push((key.clone(), left_value.clone()))
}
MergeResult::Right => {
left_updates.push((key.clone(), right_value.clone()))
}
MergeResult::New(new_value) => {
left_updates.push((key.clone(), new_value.clone()));
right_updates.push((key.clone(), new_value))
}
},
}
}
if left_updates.len() < right_updates.len() {
for (key, value) in left_updates {
left.insert(key, value);
}
left
} else {
for (key, value) in right_updates {
right.insert(key, value);
}
right
}
}
/// Join items of the iterable with the given separator, similar to Python’s
/// `separator.join(iter)`.
///
/// Formatting the return value consumes the iterator.
/// Formatting it again will produce an empty string.
pub fn join_display(
iter: impl IntoIterator<Item = impl fmt::Display>,
separator: impl fmt::Display,
) -> impl fmt::Display {
JoinDisplay {
iter: Cell::new(Some(iter.into_iter())),
separator,
}
}
struct JoinDisplay<I, S> {
iter: Cell<Option<I>>,
separator: S,
}
impl<I, T, S> fmt::Display for JoinDisplay<I, S>
where
I: Iterator<Item = T>,
T: fmt::Display,
S: fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(mut iter) = self.iter.take() {
if let Some(first) = iter.next() {
first.fmt(f)?;
}
for value in iter {
self.separator.fmt(f)?;
value.fmt(f)?;
}
}
Ok(())
}
}
/// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
///
/// The callback is only called for incoming `Ok` values. Errors are passed
/// through as-is. In order to let it use the `?` operator the callback is
/// expected to return a `Result` of `Option`, instead of an `Option` of
/// `Result`.
pub fn filter_map_results<'a, I, F, A, B, E>(
iter: I,
f: F,
) -> impl Iterator<Item = Result<B, E>> + 'a
where
I: Iterator<Item = Result<A, E>> + 'a,
F: Fn(A) -> Result<Option<B>, E> + 'a,
{
iter.filter_map(move |result| match result {
Ok(node) => f(node).transpose(),
Err(e) => Some(Err(e)),
})
}
/// Like `itertools::merge_join_by`, but merges fallible iterators.
///
/// The callback is only used for Ok values. Errors are passed through as-is.
/// Errors compare less than Ok values, which makes the error handling
/// conservative.
pub fn merge_join_results_by<'a, I1, I2, F, A, B, E>(
iter1: I1,
iter2: I2,
f: F,
) -> impl Iterator<Item = Result<EitherOrBoth<A, B>, E>> + 'a
where
I1: Iterator<Item = Result<A, E>> + 'a,
I2: Iterator<Item = Result<B, E>> + 'a,
F: FnMut(&A, &B) -> Ordering + 'a,
{
let mut g = f;
iter1
.merge_join_by(iter2, move |i1, i2| match i1 {
Err(_) => Ordering::Less,
Ok(i1) => match i2 {
Err(_) => Ordering::Greater,
Ok(i2) => g(i1, i2),
},
})
.map(|result| match result {
EitherOrBoth::Left(Err(e)) => Err(e),
EitherOrBoth::Right(Err(e)) => Err(e),
EitherOrBoth::Both(Err(e), _) => Err(e),
EitherOrBoth::Both(_, Err(e)) => Err(e),
EitherOrBoth::Left(Ok(v)) => Ok(EitherOrBoth::Left(v)),
EitherOrBoth::Right(Ok(v)) => Ok(EitherOrBoth::Right(v)),
EitherOrBoth::Both(Ok(v1), Ok(v2)) => {
Ok(EitherOrBoth::Both(v1, v2))
}
})
}
/// Force the global rayon threadpool to not exceed 16 concurrent threads
/// unless the user has specified a value.
/// This is a stop-gap measure until we figure out why using more than 16
/// threads makes `status` slower for each additional thread.
///
/// TODO find the underlying cause and fix it, then remove this.
///
/// # Errors
///
/// Returns an error if the global threadpool has already been initialized if
/// we try to initialize it.
pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> {
const THREAD_CAP: usize = 16;
if std::env::var("RAYON_NUM_THREADS").is_err() {
let available_parallelism = std::thread::available_parallelism()
.map(usize::from)
.unwrap_or(1);
let new_thread_count = THREAD_CAP.min(available_parallelism);
let res = rayon::ThreadPoolBuilder::new()
.num_threads(new_thread_count)
.build_global();
if res.is_ok() {
log::trace!(
"Capped the rayon threadpool to {new_thread_count} threads",
);
}
return res;
}
Ok(())
}