upstream/mercurial-mirror Commit - r52047:aba622c7

rust: add a utility function to merge ordered fallible iterators...

Arseniy Alekseyev -

r52047:aba622c7 default

parent child

rust/hg-core/src/utils.rs

0 +40 0

              // utils module
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Contains useful functions, traits, structs, etc. for use in core.
              use crate::errors::{HgError, IoErrorContext};
              use crate::utils::hg_path::HgPath;
              use im_rc::ordmap::DiffItem;
              use im_rc::ordmap::OrdMap;
+             use itertools::EitherOrBoth;
+             use itertools::Itertools;
              use std::cell::Cell;
+             use std::cmp::Ordering;
              use std::fmt;
              use std::{io::Write, ops::Deref};
              pub mod debug;
              pub mod files;
              pub mod hg_path;
              pub mod path_auditor;
              /// Useful until rust/issues/56345 is stable
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::find_slice_in_slice;
              ///
              /// let haystack = b"This is the haystack".to_vec();
              /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
              /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
              /// ```
              pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
              where
                  for<'a> &'a [T]: PartialEq,
              {
                  slice
                      .windows(needle.len())
                      .position(|window| window == needle)
              }
              /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::replace_slice;
              /// let mut line = b"I hate writing tests!".to_vec();
              /// replace_slice(&mut line, b"hate", b"love");
              /// assert_eq!(
              ///     line,
              ///     b"I love writing tests!".to_vec()
              /// );
              /// ```
              pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
              where
                  T: Clone + PartialEq,
              {
                  if buf.len() < from.len() || from.len() != to.len() {
                      return;
                  }
                  for i in 0..=buf.len() - from.len() {
                      if buf[i..].starts_with(from) {
                          buf[i..(i + from.len())].clone_from_slice(to);
                      }
                  }
              }
              pub trait SliceExt {
                  fn trim_end(&self) -> &Self;
                  fn trim_start(&self) -> &Self;
                  fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
                  fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
                  fn trim(&self) -> &Self;
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
                  fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
                  fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
              }
              impl SliceExt for [u8] {
                  fn trim_end(&self) -> &[u8] {
                      self.trim_end_matches(|byte| byte.is_ascii_whitespace())
                  }
                  fn trim_start(&self) -> &[u8] {
                      self.trim_start_matches(|byte| byte.is_ascii_whitespace())
                  }
                  fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
                      if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
                          &self[..=last]
                      } else {
                          &[]
                      }
                  }
                  fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
                      if let Some(first) = self.iter().position(|&byte| !f(byte)) {
                          &self[first..]
                      } else {
                          &[]
                      }
                  }
                  /// ```
                  /// use hg::utils::SliceExt;
                  /// assert_eq!(
                  ///     b"  to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"  to trim".trim(),
                  ///     b"to trim"
                  /// );
                  /// ```
                  fn trim(&self) -> &[u8] {
                      self.trim_start().trim_end()
                  }
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
                      if self.starts_with(needle) {
                          Some(&self[needle.len()..])
                      } else {
                          None
                      }
                  }
                  fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
                      let mut iter = self.splitn(2, |&byte| byte == separator);
                      let a = iter.next()?;
                      let b = iter.next()?;
                      Some((a, b))
                  }
                  fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
                      find_slice_in_slice(self, separator)
                          .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
                  }
              }
              pub trait Escaped {
                  /// Return bytes escaped for display to the user
                  fn escaped_bytes(&self) -> Vec<u8>;
              }
              impl Escaped for u8 {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      let mut acc = vec![];
                      match self {
                          c @ b'\'' | c @ b'\\' => {
                              acc.push(b'\\');
                              acc.push(*c);
                          }
                          b'\t' => {
                              acc.extend(br"\\t");
                          }
                          b'\n' => {
                              acc.extend(br"\\n");
                          }
                          b'\r' => {
                              acc.extend(br"\\r");
                          }
                          c if (*c < b' ' || *c >= 127) => {
                              write!(acc, "\\x{:x}", self).unwrap();
                          }
                          c => {
                              acc.push(*c);
                          }
                      }
                      acc
                  }
              }
              impl<'a, T: Escaped> Escaped for &'a [T] {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.iter().flat_map(Escaped::escaped_bytes).collect()
                  }
              }
              impl<T: Escaped> Escaped for Vec<T> {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.deref().escaped_bytes()
                  }
              }
              impl<'a> Escaped for &'a HgPath {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.as_bytes().escaped_bytes()
                  }
              }
              #[cfg(unix)]
              pub fn shell_quote(value: &[u8]) -> Vec<u8> {
                  if value.iter().all(|&byte| {
                      matches!(
                          byte,
                          b'a'..=b'z'
                          | b'A'..=b'Z'
                          | b'0'..=b'9'
                          | b'.'
                          | b'_'
                          | b'/'
                          | b'+'
                          | b'-'
                      )
                  }) {
                      value.to_owned()
                  } else {
                      let mut quoted = Vec::with_capacity(value.len() + 2);
                      quoted.push(b'\'');
                      for &byte in value {
                          if byte == b'\'' {
                              quoted.push(b'\\');
                          }
                          quoted.push(byte);
                      }
                      quoted.push(b'\'');
                      quoted
                  }
              }
              pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
                  std::env::current_dir().map_err(|error| HgError::IoError {
                      error,
                      context: IoErrorContext::CurrentDir,
                  })
              }
              pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
                  std::env::current_exe().map_err(|error| HgError::IoError {
                      error,
                      context: IoErrorContext::CurrentExe,
                  })
              }
              /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
              pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
                  lazy_static::lazy_static! {
                      /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
                      /// The `x` makes whitespace ignored.
                      /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
                      static ref VAR_RE: regex::bytes::Regex =
                          regex::bytes::Regex::new(r"(?x-u)
                              \$
                              (?:
                                  (\w+)
                                  |
                                  \{
                                      ([^}]*)
                                  \}
                              )
                          ").unwrap();
                  }
                  VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
                      let var_name = files::get_os_str_from_bytes(
                          captures
                              .get(1)
                              .or_else(|| captures.get(2))
                              .expect("either side of `|` must participate in match")
                              .as_bytes(),
                      );
                      std::env::var_os(var_name)
                          .map(files::get_bytes_from_os_str)
                          .unwrap_or_else(|| {
                              // Referencing an environment variable that does not exist.
                              // Leave the $FOO reference as-is.
                              captures[0].to_owned()
                          })
                  })
              }
              #[test]
              fn test_expand_vars() {
                  // Modifying process-global state in a test isn’t great,
                  // but hopefully this won’t collide with anything.
                  std::env::set_var("TEST_EXPAND_VAR", "1");
                  assert_eq!(
                      expand_vars(b"before/$TEST_EXPAND_VAR/after"),
                      &b"before/1/after"[..]
                  );
                  assert_eq!(
                      expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
                      &b"before111after"[..]
                  );
                  let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
                  assert_eq!(expand_vars(s), &s[..]);
              }
              pub(crate) enum MergeResult<V> {
                  Left,
                  Right,
                  New(V),
              }
              /// Return the union of the two given maps,
              /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
              /// both.
              ///
              /// CC <https://github.com/bodil/im-rs/issues/166>
              pub(crate) fn ordmap_union_with_merge<K, V>(
                  left: OrdMap<K, V>,
                  right: OrdMap<K, V>,
                  mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
              ) -> OrdMap<K, V>
              where
                  K: Clone + Ord,
                  V: Clone + PartialEq,
              {
                  if left.ptr_eq(&right) {
                      // One of the two maps is an unmodified clone of the other
                      left
                  } else if left.len() / 2 > right.len() {
                      // When two maps have different sizes,
                      // their size difference is a lower bound on
                      // how many keys of the larger map are not also in the smaller map.
                      // This in turn is a lower bound on the number of differences in
                      // `OrdMap::diff` and the "amount of work" that would be done
                      // by `ordmap_union_with_merge_by_diff`.
                      //
                      // Here `left` is more than twice the size of `right`,
                      // so the number of differences is more than the total size of
                      // `right`. Therefore an algorithm based on iterating `right`
                      // is more efficient.
                      //
                      // This helps a lot when a tiny (or empty) map is merged
                      // with a large one.
                      ordmap_union_with_merge_by_iter(left, right, merge)
                  } else if left.len() < right.len() / 2 {
                      // Same as above but with `left` and `right` swapped
                      ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
                          // Also swapped in `merge` arguments:
                          match merge(key, b, a) {
                              MergeResult::New(v) => MergeResult::New(v),
                              // … and swap back in `merge` result:
                              MergeResult::Left => MergeResult::Right,
                              MergeResult::Right => MergeResult::Left,
                          }
                      })
                  } else {
                      // For maps of similar size, use the algorithm based on `OrdMap::diff`
                      ordmap_union_with_merge_by_diff(left, right, merge)
                  }
              }
              /// Efficient if `right` is much smaller than `left`
              fn ordmap_union_with_merge_by_iter<K, V>(
                  mut left: OrdMap<K, V>,
                  right: OrdMap<K, V>,
                  mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
              ) -> OrdMap<K, V>
              where
                  K: Clone + Ord,
                  V: Clone,
              {
                  for (key, right_value) in right {
                      match left.get(&key) {
                          None => {
                              left.insert(key, right_value);
                          }
                          Some(left_value) => match merge(&key, left_value, &right_value) {
                              MergeResult::Left => {}
                              MergeResult::Right => {
                                  left.insert(key, right_value);
                              }
                              MergeResult::New(new_value) => {
                                  left.insert(key, new_value);
                              }
                          },
                      }
                  }
                  left
              }
              /// Fallback when both maps are of similar size
              fn ordmap_union_with_merge_by_diff<K, V>(
                  mut left: OrdMap<K, V>,
                  mut right: OrdMap<K, V>,
                  mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
              ) -> OrdMap<K, V>
              where
                  K: Clone + Ord,
                  V: Clone + PartialEq,
              {
                  // (key, value) pairs that would need to be inserted in either map
                  // in order to turn it into the union.
                  //
                  // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
                  // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
                  // with `left_updates` only borrowing from `right` and `right_updates` from
                  // `left`, and with `Cow::Owned` used for `MergeResult::New`.
                  //
                  // This would allow moving all `.clone()` calls to after we’ve decided
                  // which of `right_updates` or `left_updates` to use
                  // (value ones becoming `Cow::into_owned`),
                  // and avoid making clones we don’t end up using.
                  let mut left_updates = Vec::new();
                  let mut right_updates = Vec::new();
                  for difference in left.diff(&right) {
                      match difference {
                          DiffItem::Add(key, value) => {
                              left_updates.push((key.clone(), value.clone()))
                          }
                          DiffItem::Remove(key, value) => {
                              right_updates.push((key.clone(), value.clone()))
                          }
                          DiffItem::Update {
                              old: (key, left_value),
                              new: (_, right_value),
                          } => match merge(key, left_value, right_value) {
                              MergeResult::Left => {
                                  right_updates.push((key.clone(), left_value.clone()))
                              }
                              MergeResult::Right => {
                                  left_updates.push((key.clone(), right_value.clone()))
                              }
                              MergeResult::New(new_value) => {
                                  left_updates.push((key.clone(), new_value.clone()));
                                  right_updates.push((key.clone(), new_value))
                              }
                          },
                      }
                  }
                  if left_updates.len() < right_updates.len() {
                      for (key, value) in left_updates {
                          left.insert(key, value);
                      }
                      left
                  } else {
                      for (key, value) in right_updates {
                          right.insert(key, value);
                      }
                      right
                  }
              }
              /// Join items of the iterable with the given separator, similar to Python’s
              /// `separator.join(iter)`.
              ///
              /// Formatting the return value consumes the iterator.
              /// Formatting it again will produce an empty string.
              pub fn join_display(
                  iter: impl IntoIterator<Item = impl fmt::Display>,
                  separator: impl fmt::Display,
              ) -> impl fmt::Display {
                  JoinDisplay {
                      iter: Cell::new(Some(iter.into_iter())),
                      separator,
                  }
              }
              struct JoinDisplay<I, S> {
                  iter: Cell<Option<I>>,
                  separator: S,
              }
              impl<I, T, S> fmt::Display for JoinDisplay<I, S>
              where
                  I: Iterator<Item = T>,
                  T: fmt::Display,
                  S: fmt::Display,
              {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      if let Some(mut iter) = self.iter.take() {
                          if let Some(first) = iter.next() {
                              first.fmt(f)?;
                          }
                          for value in iter {
                              self.separator.fmt(f)?;
                              value.fmt(f)?;
                          }
                      }
                      Ok(())
                  }
              }
              /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
              ///
              /// The callback is only called for incoming `Ok` values. Errors are passed
              /// through as-is. In order to let it use the `?` operator the callback is
              /// expected to return a `Result` of `Option`, instead of an `Option` of
              /// `Result`.
              pub fn filter_map_results<'a, I, F, A, B, E>(
                  iter: I,
                  f: F,
              ) -> impl Iterator<Item = Result<B, E>> + 'a
              where
                  I: Iterator<Item = Result<A, E>> + 'a,
                  F: Fn(A) -> Result<Option<B>, E> + 'a,
              {
                  iter.filter_map(move |result| match result {
                      Ok(node) => f(node).transpose(),
                      Err(e) => Some(Err(e)),
                  })
              }
+             /// Like `itertools::merge_join_by`, but merges fallible iterators.
+             ///
+             /// The callback is only used for Ok values. Errors are passed through as-is.
+             /// Errors compare less than Ok values, which makes the error handling
+             /// conservative.
+             pub fn merge_join_results_by<'a, I1, I2, F, A, B, E>(
+                 iter1: I1,
+                 iter2: I2,
+                 f: F,
+             ) -> impl Iterator<Item = Result<EitherOrBoth<A, B>, E>> + 'a
+             where
+                 I1: Iterator<Item = Result<A, E>> + 'a,
+                 I2: Iterator<Item = Result<B, E>> + 'a,
+                 F: FnMut(&A, &B) -> Ordering + 'a,
+             {
+                 let mut g = f;
+                 iter1
+                     .merge_join_by(iter2, move |i1, i2| match i1 {
+                         Err(_) => Ordering::Less,
+                         Ok(i1) => match i2 {
+                             Err(_) => Ordering::Greater,
+                             Ok(i2) => g(i1, i2),
+                         },
+                     })
+                     .map(|result| match result {
+                         EitherOrBoth::Left(Err(e)) => Err(e),
+                         EitherOrBoth::Right(Err(e)) => Err(e),
+                         EitherOrBoth::Both(Err(e), _) => Err(e),
+                         EitherOrBoth::Both(_, Err(e)) => Err(e),
+                         EitherOrBoth::Left(Ok(v)) => Ok(EitherOrBoth::Left(v)),
+                         EitherOrBoth::Right(Ok(v)) => Ok(EitherOrBoth::Right(v)),
+                         EitherOrBoth::Both(Ok(v1), Ok(v2)) => {
+                             Ok(EitherOrBoth::Both(v1, v2))
+                         }
+                     })
+             }
              /// Force the global rayon threadpool to not exceed 16 concurrent threads
              /// unless the user has specified a value.
              /// This is a stop-gap measure until we figure out why using more than 16
              /// threads makes `status` slower for each additional thread.
              ///
              /// TODO find the underlying cause and fix it, then remove this.
              ///
              /// # Errors
              ///
              /// Returns an error if the global threadpool has already been initialized if
              /// we try to initialize it.
              pub fn cap_default_rayon_threads() -> Result<(), rayon::ThreadPoolBuildError> {
                  const THREAD_CAP: usize = 16;
                  if std::env::var("RAYON_NUM_THREADS").is_err() {
                      let available_parallelism = std::thread::available_parallelism()
                          .map(usize::from)
                          .unwrap_or(1);
                      let new_thread_count = THREAD_CAP.min(available_parallelism);
                      let res = rayon::ThreadPoolBuilder::new()
                          .num_threads(new_thread_count)
                          .build_global();
                      if res.is_ok() {
                          log::trace!(
                              "Capped the rayon threadpool to {new_thread_count} threads",
                          );
                      }
                      return res;
                  }
                  Ok(())
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages