upstream/mercurial-mirror Commit - r47476:91ab5190

rhg: Add support for environment variables in config include paths...

Simon Sapin -

r47476:91ab5190 default

parent child

rust/hg-core/src/config/layer.rs

0 +1 0

              // layer.rs
              //
              // Copyright 2020
              //      Valentin Gatien-Baron,
              //      Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::errors::HgError;
              use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
              use format_bytes::{format_bytes, write_bytes, DisplayBytes};
              use lazy_static::lazy_static;
              use regex::bytes::Regex;
              use std::collections::HashMap;
              use std::path::{Path, PathBuf};
              lazy_static! {
                  static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
                  static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
                  /// Continuation whitespace
                  static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
                  static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
                  static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
                  /// A directive that allows for removing previous entries
                  static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
                  /// A directive that allows for including other config files
                  static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
              }
              /// All config values separated by layers of precedence.
              /// Each config source may be split in multiple layers if `%include` directives
              /// are used.
              /// TODO detail the general precedence
              #[derive(Clone)]
              pub struct ConfigLayer {
                  /// Mapping of the sections to their items
                  sections: HashMap<Vec<u8>, ConfigItem>,
                  /// All sections (and their items/values) in a layer share the same origin
                  pub origin: ConfigOrigin,
                  /// Whether this layer comes from a trusted user or group
                  pub trusted: bool,
              }
              impl ConfigLayer {
                  pub fn new(origin: ConfigOrigin) -> Self {
                      ConfigLayer {
                          sections: HashMap::new(),
                          trusted: true, // TODO check
                          origin,
                      }
                  }
                  /// Parse `--config` CLI arguments and return a layer if there’s any
                  pub(crate) fn parse_cli_args(
                      cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
                  ) -> Result<Option<Self>, ConfigError> {
                      fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
                          use crate::utils::SliceExt;
                          let (section_and_item, value) = arg.split_2(b'=')?;
                          let (section, item) = section_and_item.trim().split_2(b'.')?;
                          Some((
                              section.to_owned(),
                              item.to_owned(),
                              value.trim().to_owned(),
                          ))
                      }
                      let mut layer = Self::new(ConfigOrigin::CommandLine);
                      for arg in cli_config_args {
                          let arg = arg.as_ref();
                          if let Some((section, item, value)) = parse_one(arg) {
                              layer.add(section, item, value, None);
                          } else {
                              Err(HgError::abort(format!(
                                  "abort: malformed --config option: '{}' \
                                  (use --config section.name=value)",
                                  String::from_utf8_lossy(arg),
                              )))?
                          }
                      }
                      if layer.sections.is_empty() {
                          Ok(None)
                      } else {
                          Ok(Some(layer))
                      }
                  }
                  /// Returns whether this layer comes from `--config` CLI arguments
                  pub(crate) fn is_from_command_line(&self) -> bool {
                      if let ConfigOrigin::CommandLine = self.origin {
                          true
                      } else {
                          false
                      }
                  }
                  /// Add an entry to the config, overwriting the old one if already present.
                  pub fn add(
                      &mut self,
                      section: Vec<u8>,
                      item: Vec<u8>,
                      value: Vec<u8>,
                      line: Option<usize>,
                  ) {
                      self.sections
                          .entry(section)
                          .or_insert_with(|| HashMap::new())
                          .insert(item, ConfigValue { bytes: value, line });
                  }
                  /// Returns the config value in `<section>.<item>` if it exists
                  pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
                      Some(self.sections.get(section)?.get(item)?)
                  }
                  /// Returns the keys defined in the given section
                  pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
                      self.sections
                          .get(section)
                          .into_iter()
                          .flat_map(|section| section.keys().map(|vec| &**vec))
                  }
                  pub fn is_empty(&self) -> bool {
                      self.sections.is_empty()
                  }
                  /// Returns a `Vec` of layers in order of precedence (so, in read order),
                  /// recursively parsing the `%include` directives if any.
                  pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
                      let mut layers = vec![];
                      // Discard byte order mark if any
                      let data = if data.starts_with(b"\xef\xbb\xbf") {
                          &data[3..]
                      } else {
                          data
                      };
                      // TODO check if it's trusted
                      let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
                      let mut lines_iter =
                          data.split(|b| *b == b'\n').enumerate().peekable();
                      let mut section = b"".to_vec();
                      while let Some((index, bytes)) = lines_iter.next() {
                          let line = Some(index + 1);
                          if let Some(m) = INCLUDE_RE.captures(&bytes) {
                              let filename_bytes = &m[1];
+                             let filename_bytes = crate::utils::expand_vars(filename_bytes);
                              // `Path::parent` only fails for the root directory,
                              // which `src` can’t be since we’ve managed to open it as a
                              // file.
                              let dir = src
                                  .parent()
                                  .expect("Path::parent fail on a file we’ve read");
                              // `Path::join` with an absolute argument correctly ignores the
                              // base path
                              let filename = dir.join(&get_path_from_bytes(&filename_bytes));
                              let data = std::fs::read(&filename).map_err(|io_error| {
                                  ConfigParseError {
                                      origin: ConfigOrigin::File(src.to_owned()),
                                      line,
                                      message: format_bytes!(
                                          b"cannot include {} ({})",
                                          filename_bytes,
                                          format_bytes::Utf8(io_error)
                                      ),
                                  }
                              })?;
                              layers.push(current_layer);
                              layers.extend(Self::parse(&filename, &data)?);
                              current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
                          } else if let Some(_) = EMPTY_RE.captures(&bytes) {
                          } else if let Some(m) = SECTION_RE.captures(&bytes) {
                              section = m[1].to_vec();
                          } else if let Some(m) = ITEM_RE.captures(&bytes) {
                              let item = m[1].to_vec();
                              let mut value = m[2].to_vec();
                              loop {
                                  match lines_iter.peek() {
                                      None => break,
                                      Some((_, v)) => {
                                          if let Some(_) = COMMENT_RE.captures(&v) {
                                          } else if let Some(_) = CONT_RE.captures(&v) {
                                              value.extend(b"\n");
                                              value.extend(&m[1]);
                                          } else {
                                              break;
                                          }
                                      }
                                  };
                                  lines_iter.next();
                              }
                              current_layer.add(section.clone(), item, value, line);
                          } else if let Some(m) = UNSET_RE.captures(&bytes) {
                              if let Some(map) = current_layer.sections.get_mut(&section) {
                                  map.remove(&m[1]);
                              }
                          } else {
                              let message = if bytes.starts_with(b" ") {
                                  format_bytes!(b"unexpected leading whitespace: {}", bytes)
                              } else {
                                  bytes.to_owned()
                              };
                              return Err(ConfigParseError {
                                  origin: ConfigOrigin::File(src.to_owned()),
                                  line,
                                  message,
                              }
                              .into());
                          }
                      }
                      if !current_layer.is_empty() {
                          layers.push(current_layer);
                      }
                      Ok(layers)
                  }
              }
              impl DisplayBytes for ConfigLayer {
                  fn display_bytes(
                      &self,
                      out: &mut dyn std::io::Write,
                  ) -> std::io::Result<()> {
                      let mut sections: Vec<_> = self.sections.iter().collect();
                      sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
                      for (section, items) in sections.into_iter() {
                          let mut items: Vec<_> = items.into_iter().collect();
                          items.sort_by(|e0, e1| e0.0.cmp(e1.0));
                          for (item, config_entry) in items {
                              write_bytes!(
                                  out,
                                  b"{}.{}={} # {}\n",
                                  section,
                                  item,
                                  &config_entry.bytes,
                                  &self.origin,
                              )?
                          }
                      }
                      Ok(())
                  }
              }
              /// Mapping of section item to value.
              /// In the following:
              /// ```text
              /// [ui]
              /// paginate=no
              /// ```
              /// "paginate" is the section item and "no" the value.
              pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
              #[derive(Clone, Debug, PartialEq)]
              pub struct ConfigValue {
                  /// The raw bytes of the value (be it from the CLI, env or from a file)
                  pub bytes: Vec<u8>,
                  /// Only present if the value comes from a file, 1-indexed.
                  pub line: Option<usize>,
              }
              #[derive(Clone, Debug)]
              pub enum ConfigOrigin {
                  /// From a configuration file
                  File(PathBuf),
                  /// From a `--config` CLI argument
                  CommandLine,
                  /// From environment variables like `$PAGER` or `$EDITOR`
                  Environment(Vec<u8>),
                  /* TODO cli
                   * TODO defaults (configitems.py)
                   * TODO extensions
                   * TODO Python resources?
                   * Others? */
              }
              impl DisplayBytes for ConfigOrigin {
                  fn display_bytes(
                      &self,
                      out: &mut dyn std::io::Write,
                  ) -> std::io::Result<()> {
                      match self {
                          ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
                          ConfigOrigin::CommandLine => out.write_all(b"--config"),
                          ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
                      }
                  }
              }
              #[derive(Debug)]
              pub struct ConfigParseError {
                  pub origin: ConfigOrigin,
                  pub line: Option<usize>,
                  pub message: Vec<u8>,
              }
              #[derive(Debug, derive_more::From)]
              pub enum ConfigError {
                  Parse(ConfigParseError),
                  Other(HgError),
              }
              fn make_regex(pattern: &'static str) -> Regex {
                  Regex::new(pattern).expect("expected a valid regex")
              }

rust/hg-core/src/utils.rs

0 +53 0

              // utils module
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Contains useful functions, traits, structs, etc. for use in core.
              use crate::errors::{HgError, IoErrorContext};
              use crate::utils::hg_path::HgPath;
              use im_rc::ordmap::DiffItem;
              use im_rc::ordmap::OrdMap;
              use std::cell::Cell;
              use std::fmt;
              use std::{io::Write, ops::Deref};
              pub mod files;
              pub mod hg_path;
              pub mod path_auditor;
              /// Useful until rust/issues/56345 is stable
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::find_slice_in_slice;
              ///
              /// let haystack = b"This is the haystack".to_vec();
              /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
              /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
              /// ```
              pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
              where
                  for<'a> &'a [T]: PartialEq,
              {
                  slice
                      .windows(needle.len())
                      .position(|window| window == needle)
              }
              /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
              ///
              /// # Examples
              ///
              /// ```
              /// use crate::hg::utils::replace_slice;
              /// let mut line = b"I hate writing tests!".to_vec();
              /// replace_slice(&mut line, b"hate", b"love");
              /// assert_eq!(
              ///     line,
              ///     b"I love writing tests!".to_vec()
              /// );
              /// ```
              pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
              where
                  T: Clone + PartialEq,
              {
                  if buf.len() < from.len() || from.len() != to.len() {
                      return;
                  }
                  for i in 0..=buf.len() - from.len() {
                      if buf[i..].starts_with(from) {
                          buf[i..(i + from.len())].clone_from_slice(to);
                      }
                  }
              }
              pub trait SliceExt {
                  fn trim_end_newlines(&self) -> &Self;
                  fn trim_end(&self) -> &Self;
                  fn trim_start(&self) -> &Self;
                  fn trim(&self) -> &Self;
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
                  fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
              }
              #[allow(clippy::trivially_copy_pass_by_ref)]
              fn is_not_whitespace(c: &u8) -> bool {
                  !(*c as char).is_whitespace()
              }
              impl SliceExt for [u8] {
                  fn trim_end_newlines(&self) -> &[u8] {
                      if let Some(last) = self.iter().rposition(|&byte| byte != b'\n') {
                          &self[..=last]
                      } else {
                          &[]
                      }
                  }
                  fn trim_end(&self) -> &[u8] {
                      if let Some(last) = self.iter().rposition(is_not_whitespace) {
                          &self[..=last]
                      } else {
                          &[]
                      }
                  }
                  fn trim_start(&self) -> &[u8] {
                      if let Some(first) = self.iter().position(is_not_whitespace) {
                          &self[first..]
                      } else {
                          &[]
                      }
                  }
                  /// ```
                  /// use hg::utils::SliceExt;
                  /// assert_eq!(
                  ///     b"  to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"to trim  ".trim(),
                  ///     b"to trim"
                  /// );
                  /// assert_eq!(
                  ///     b"  to trim".trim(),
                  ///     b"to trim"
                  /// );
                  /// ```
                  fn trim(&self) -> &[u8] {
                      self.trim_start().trim_end()
                  }
                  fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
                      if self.starts_with(needle) {
                          Some(&self[needle.len()..])
                      } else {
                          None
                      }
                  }
                  fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
                      let mut iter = self.splitn(2, |&byte| byte == separator);
                      let a = iter.next()?;
                      let b = iter.next()?;
                      Some((a, b))
                  }
              }
              pub trait Escaped {
                  /// Return bytes escaped for display to the user
                  fn escaped_bytes(&self) -> Vec<u8>;
              }
              impl Escaped for u8 {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      let mut acc = vec![];
                      match self {
                          c @ b'\'' | c @ b'\\' => {
                              acc.push(b'\\');
                              acc.push(*c);
                          }
                          b'\t' => {
                              acc.extend(br"\\t");
                          }
                          b'\n' => {
                              acc.extend(br"\\n");
                          }
                          b'\r' => {
                              acc.extend(br"\\r");
                          }
                          c if (*c < b' ' || *c >= 127) => {
                              write!(acc, "\\x{:x}", self).unwrap();
                          }
                          c => {
                              acc.push(*c);
                          }
                      }
                      acc
                  }
              }
              impl<'a, T: Escaped> Escaped for &'a [T] {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.iter().flat_map(Escaped::escaped_bytes).collect()
                  }
              }
              impl<T: Escaped> Escaped for Vec<T> {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.deref().escaped_bytes()
                  }
              }
              impl<'a> Escaped for &'a HgPath {
                  fn escaped_bytes(&self) -> Vec<u8> {
                      self.as_bytes().escaped_bytes()
                  }
              }
              // TODO: use the str method when we require Rust 1.45
              pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
                  if s.ends_with(suffix) {
                      Some(&s[..s.len() - suffix.len()])
                  } else {
                      None
                  }
              }
              #[cfg(unix)]
              pub fn shell_quote(value: &[u8]) -> Vec<u8> {
                  // TODO: Use the `matches!` macro when we require Rust 1.42+
                  if value.iter().all(|&byte| match byte {
                      b'a'..=b'z'
                      | b'A'..=b'Z'
                      | b'0'..=b'9'
                      | b'.'
                      | b'_'
                      | b'/'
                      | b'+'
                      | b'-' => true,
                      _ => false,
                  }) {
                      value.to_owned()
                  } else {
                      let mut quoted = Vec::with_capacity(value.len() + 2);
                      quoted.push(b'\'');
                      for &byte in value {
                          if byte == b'\'' {
                              quoted.push(b'\\');
                          }
                          quoted.push(byte);
                      }
                      quoted.push(b'\'');
                      quoted
                  }
              }
              pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
                  std::env::current_dir().map_err(|error| HgError::IoError {
                      error,
                      context: IoErrorContext::CurrentDir,
                  })
              }
              pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
                  std::env::current_exe().map_err(|error| HgError::IoError {
                      error,
                      context: IoErrorContext::CurrentExe,
                  })
              }
+             /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
+             pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
+                 lazy_static::lazy_static! {
+                     /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
+                     /// The `x` makes whitespace ignored.
+                     /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
+                     static ref VAR_RE: regex::bytes::Regex =
+                         regex::bytes::Regex::new(r"(?x-u)
+                             \$
+                             (?:
+                                 (\w+)
+                                 |
+                                 \{
+                                     ([^}]*)
+                                 \}
+                             )
+                         ").unwrap();
+                 }
+                 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
+                     let var_name = files::get_os_str_from_bytes(
+                         captures
+                             .get(1)
+                             .or_else(|| captures.get(2))
+                             .expect("either side of `|` must participate in match")
+                             .as_bytes(),
+                     );
+                     std::env::var_os(var_name)
+                         .map(files::get_bytes_from_os_str)
+                         .unwrap_or_else(|| {
+                             // Referencing an environment variable that does not exist.
+                             // Leave the $FOO reference as-is.
+                             captures[0].to_owned()
+                         })
+                 })
+             }
+             #[test]
+             fn test_expand_vars() {
+                 // Modifying process-global state in a test isn’t great,
+                 // but hopefully this won’t collide with anything.
+                 std::env::set_var("TEST_EXPAND_VAR", "1");
+                 assert_eq!(
+                     expand_vars(b"before/$TEST_EXPAND_VAR/after"),
+                     &b"before/1/after"[..]
+                 );
+                 assert_eq!(
+                     expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
+                     &b"before111after"[..]
+                 );
+                 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
+                 assert_eq!(expand_vars(s), &s[..]);
+             }
              pub(crate) enum MergeResult<V> {
                  UseLeftValue,
                  UseRightValue,
                  UseNewValue(V),
              }
              /// Return the union of the two given maps,
              /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
              /// both.
              ///
              /// CC https://github.com/bodil/im-rs/issues/166
              pub(crate) fn ordmap_union_with_merge<K, V>(
                  left: OrdMap<K, V>,
                  right: OrdMap<K, V>,
                  mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
              ) -> OrdMap<K, V>
              where
                  K: Clone + Ord,
                  V: Clone + PartialEq,
              {
                  if left.ptr_eq(&right) {
                      // One of the two maps is an unmodified clone of the other
                      left
                  } else if left.len() / 2 > right.len() {
                      // When two maps have different sizes,
                      // their size difference is a lower bound on
                      // how many keys of the larger map are not also in the smaller map.
                      // This in turn is a lower bound on the number of differences in
                      // `OrdMap::diff` and the "amount of work" that would be done
                      // by `ordmap_union_with_merge_by_diff`.
                      //
                      // Here `left` is more than twice the size of `right`,
                      // so the number of differences is more than the total size of
                      // `right`. Therefore an algorithm based on iterating `right`
                      // is more efficient.
                      //
                      // This helps a lot when a tiny (or empty) map is merged
                      // with a large one.
                      ordmap_union_with_merge_by_iter(left, right, merge)
                  } else if left.len() < right.len() / 2 {
                      // Same as above but with `left` and `right` swapped
                      ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
                          // Also swapped in `merge` arguments:
                          match merge(key, b, a) {
                              MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
                              // … and swap back in `merge` result:
                              MergeResult::UseLeftValue => MergeResult::UseRightValue,
                              MergeResult::UseRightValue => MergeResult::UseLeftValue,
                          }
                      })
                  } else {
                      // For maps of similar size, use the algorithm based on `OrdMap::diff`
                      ordmap_union_with_merge_by_diff(left, right, merge)
                  }
              }
              /// Efficient if `right` is much smaller than `left`
              fn ordmap_union_with_merge_by_iter<K, V>(
                  mut left: OrdMap<K, V>,
                  right: OrdMap<K, V>,
                  mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
              ) -> OrdMap<K, V>
              where
                  K: Clone + Ord,
                  V: Clone,
              {
                  for (key, right_value) in right {
                      match left.get(&key) {
                          None => {
                              left.insert(key, right_value);
                          }
                          Some(left_value) => match merge(&key, left_value, &right_value) {
                              MergeResult::UseLeftValue => {}
                              MergeResult::UseRightValue => {
                                  left.insert(key, right_value);
                              }
                              MergeResult::UseNewValue(new_value) => {
                                  left.insert(key, new_value);
                              }
                          },
                      }
                  }
                  left
              }
              /// Fallback when both maps are of similar size
              fn ordmap_union_with_merge_by_diff<K, V>(
                  mut left: OrdMap<K, V>,
                  mut right: OrdMap<K, V>,
                  mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
              ) -> OrdMap<K, V>
              where
                  K: Clone + Ord,
                  V: Clone + PartialEq,
              {
                  // (key, value) pairs that would need to be inserted in either map
                  // in order to turn it into the union.
                  //
                  // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
                  // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
                  // with `left_updates` only borrowing from `right` and `right_updates` from
                  // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
                  //
                  // This would allow moving all `.clone()` calls to after we’ve decided
                  // which of `right_updates` or `left_updates` to use
                  // (value ones becoming `Cow::into_owned`),
                  // and avoid making clones we don’t end up using.
                  let mut left_updates = Vec::new();
                  let mut right_updates = Vec::new();
                  for difference in left.diff(&right) {
                      match difference {
                          DiffItem::Add(key, value) => {
                              left_updates.push((key.clone(), value.clone()))
                          }
                          DiffItem::Remove(key, value) => {
                              right_updates.push((key.clone(), value.clone()))
                          }
                          DiffItem::Update {
                              old: (key, left_value),
                              new: (_, right_value),
                          } => match merge(key, left_value, right_value) {
                              MergeResult::UseLeftValue => {
                                  right_updates.push((key.clone(), left_value.clone()))
                              }
                              MergeResult::UseRightValue => {
                                  left_updates.push((key.clone(), right_value.clone()))
                              }
                              MergeResult::UseNewValue(new_value) => {
                                  left_updates.push((key.clone(), new_value.clone()));
                                  right_updates.push((key.clone(), new_value))
                              }
                          },
                      }
                  }
                  if left_updates.len() < right_updates.len() {
                      for (key, value) in left_updates {
                          left.insert(key, value);
                      }
                      left
                  } else {
                      for (key, value) in right_updates {
                          right.insert(key, value);
                      }
                      right
                  }
              }
              /// Join items of the iterable with the given separator, similar to Python’s
              /// `separator.join(iter)`.
              ///
              /// Formatting the return value consumes the iterator.
              /// Formatting it again will produce an empty string.
              pub fn join_display(
                  iter: impl IntoIterator<Item = impl fmt::Display>,
                  separator: impl fmt::Display,
              ) -> impl fmt::Display {
                  JoinDisplay {
                      iter: Cell::new(Some(iter.into_iter())),
                      separator,
                  }
              }
              struct JoinDisplay<I, S> {
                  iter: Cell<Option<I>>,
                  separator: S,
              }
              impl<I, T, S> fmt::Display for JoinDisplay<I, S>
              where
                  I: Iterator<Item = T>,
                  T: fmt::Display,
                  S: fmt::Display,
              {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      if let Some(mut iter) = self.iter.take() {
                          if let Some(first) = iter.next() {
                              first.fmt(f)?;
                          }
                          for value in iter {
                              self.separator.fmt(f)?;
                              value.fmt(f)?;
                          }
                      }
                      Ok(())
                  }
              }

rust/hg-core/src/utils/files.rs

0 +5 -2

              // files.rs
              //
              // Copyright 2019
              // Raphaël Gomès <rgomes@octobus.net>,
              // Yuya Nishihara <yuya@tcha.org>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Functions for fiddling with files.
              use crate::utils::{
                  hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
                  path_auditor::PathAuditor,
                  replace_slice,
              };
              use lazy_static::lazy_static;
              use same_file::is_same_file;
              use std::borrow::{Cow, ToOwned};
              use std::ffi::OsStr;
              use std::fs::Metadata;
              use std::iter::FusedIterator;
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
-             pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
+             pub fn get_os_str_from_bytes(bytes: &[u8]) -> &OsStr {
                  let os_str;
                  #[cfg(unix)]
                  {
                      use std::os::unix::ffi::OsStrExt;
                      os_str = std::ffi::OsStr::from_bytes(bytes);
                  }
                  // TODO Handle other platforms
                  // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                  // Perhaps, the return type would have to be Result<PathBuf>.
+                 os_str
+             }
-                 Path::new(os_str)
+             pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
+                 Path::new(get_os_str_from_bytes(bytes))
              }
              // TODO: need to convert from WTF8 to MBCS bytes on Windows.
              // that's why Vec<u8> is returned.
              #[cfg(unix)]
              pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
                  get_bytes_from_os_str(path.as_ref())
              }
              #[cfg(unix)]
              pub fn get_bytes_from_os_str(str: impl AsRef<OsStr>) -> Vec<u8> {
                  use std::os::unix::ffi::OsStrExt;
                  str.as_ref().as_bytes().to_vec()
              }
              /// An iterator over repository path yielding itself and its ancestors.
              #[derive(Copy, Clone, Debug)]
              pub struct Ancestors<'a> {
                  next: Option<&'a HgPath>,
              }
              impl<'a> Iterator for Ancestors<'a> {
                  type Item = &'a HgPath;
                  fn next(&mut self) -> Option<Self::Item> {
                      let next = self.next;
                      self.next = match self.next {
                          Some(s) if s.is_empty() => None,
                          Some(s) => {
                              let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
                              Some(HgPath::new(&s.as_bytes()[..p]))
                          }
                          None => None,
                      };
                      next
                  }
              }
              impl<'a> FusedIterator for Ancestors<'a> {}
              /// An iterator over repository path yielding itself and its ancestors.
              #[derive(Copy, Clone, Debug)]
              pub(crate) struct AncestorsWithBase<'a> {
                  next: Option<(&'a HgPath, &'a HgPath)>,
              }
              impl<'a> Iterator for AncestorsWithBase<'a> {
                  type Item = (&'a HgPath, &'a HgPath);
                  fn next(&mut self) -> Option<Self::Item> {
                      let next = self.next;
                      self.next = match self.next {
                          Some((s, _)) if s.is_empty() => None,
                          Some((s, _)) => Some(s.split_filename()),
                          None => None,
                      };
                      next
                  }
              }
              impl<'a> FusedIterator for AncestorsWithBase<'a> {}
              /// Returns an iterator yielding ancestor directories of the given repository
              /// path.
              ///
              /// The path is separated by '/', and must not start with '/'.
              ///
              /// The path itself isn't included unless it is b"" (meaning the root
              /// directory.)
              pub fn find_dirs(path: &HgPath) -> Ancestors {
                  let mut dirs = Ancestors { next: Some(path) };
                  if !path.is_empty() {
                      dirs.next(); // skip itself
                  }
                  dirs
              }
              /// Returns an iterator yielding ancestor directories of the given repository
              /// path.
              ///
              /// The path is separated by '/', and must not start with '/'.
              ///
              /// The path itself isn't included unless it is b"" (meaning the root
              /// directory.)
              pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
                  let mut dirs = AncestorsWithBase {
                      next: Some((path, HgPath::new(b""))),
                  };
                  if !path.is_empty() {
                      dirs.next(); // skip itself
                  }
                  dirs
              }
              /// TODO more than ASCII?
              pub fn normalize_case(path: &HgPath) -> HgPathBuf {
                  #[cfg(windows)] // NTFS compares via upper()
                  return path.to_ascii_uppercase();
                  #[cfg(unix)]
                  path.to_ascii_lowercase()
              }
              lazy_static! {
                  static ref IGNORED_CHARS: Vec<Vec<u8>> = {
                      [
 x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
 x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
                      ]
                      .iter()
                      .map(|code| {
                          std::char::from_u32(*code)
                              .unwrap()
                              .encode_utf8(&mut [0; 3])
                              .bytes()
                              .collect()
                      })
                      .collect()
                  };
              }
              fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
                  let mut buf = bytes.to_owned();
                  let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
                  if needs_escaping {
                      for forbidden in IGNORED_CHARS.iter() {
                          replace_slice(&mut buf, forbidden, &[])
                      }
                      buf
                  } else {
                      buf
                  }
              }
              pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
                  hfs_ignore_clean(&bytes.to_ascii_lowercase())
              }
              #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
              pub struct HgMetadata {
                  pub st_dev: u64,
                  pub st_mode: u32,
                  pub st_nlink: u64,
                  pub st_size: u64,
                  pub st_mtime: i64,
                  pub st_ctime: i64,
              }
              // TODO support other plaforms
              #[cfg(unix)]
              impl HgMetadata {
                  pub fn from_metadata(metadata: Metadata) -> Self {
                      use std::os::unix::fs::MetadataExt;
                      Self {
                          st_dev: metadata.dev(),
                          st_mode: metadata.mode(),
                          st_nlink: metadata.nlink(),
                          st_size: metadata.size(),
                          st_mtime: metadata.mtime(),
                          st_ctime: metadata.ctime(),
                      }
                  }
              }
              /// Returns the canonical path of `name`, given `cwd` and `root`
              pub fn canonical_path(
                  root: impl AsRef<Path>,
                  cwd: impl AsRef<Path>,
                  name: impl AsRef<Path>,
              ) -> Result<PathBuf, HgPathError> {
                  // TODO add missing normalization for other platforms
                  let root = root.as_ref();
                  let cwd = cwd.as_ref();
                  let name = name.as_ref();
                  let name = if !name.is_absolute() {
                      root.join(&cwd).join(&name)
                  } else {
                      name.to_owned()
                  };
                  let auditor = PathAuditor::new(&root);
                  if name != root && name.starts_with(&root) {
                      let name = name.strip_prefix(&root).unwrap();
                      auditor.audit_path(path_to_hg_path_buf(name)?)?;
                      Ok(name.to_owned())
                  } else if name == root {
                      Ok("".into())
                  } else {
                      // Determine whether `name' is in the hierarchy at or beneath `root',
                      // by iterating name=name.parent() until it returns `None` (can't
                      // check name == '/', because that doesn't work on windows).
                      let mut name = name.deref();
                      let original_name = name.to_owned();
                      loop {
                          let same = is_same_file(&name, &root).unwrap_or(false);
                          if same {
                              if name == original_name {
                                  // `name` was actually the same as root (maybe a symlink)
                                  return Ok("".into());
                              }
                              // `name` is a symlink to root, so `original_name` is under
                              // root
                              let rel_path = original_name.strip_prefix(&name).unwrap();
                              auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
                              return Ok(rel_path.to_owned());
                          }
                          name = match name.parent() {
                              None => break,
                              Some(p) => p,
                          };
                      }
                      // TODO hint to the user about using --cwd
                      // Bubble up the responsibility to Python for now
                      Err(HgPathError::NotUnderRoot {
                          path: original_name.to_owned(),
                          root: root.to_owned(),
                      })
                  }
              }
              /// Returns the representation of the path relative to the current working
              /// directory for display purposes.
              ///
              /// `cwd` is a `HgPath`, so it is considered relative to the root directory
              /// of the repository.
              ///
              /// # Examples
              ///
              /// ```
              /// use hg::utils::hg_path::HgPath;
              /// use hg::utils::files::relativize_path;
              /// use std::borrow::Cow;
              ///
              /// let file = HgPath::new(b"nested/file");
              /// let cwd = HgPath::new(b"");
              /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"nested/file"));
              ///
              /// let cwd = HgPath::new(b"nested");
              /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"file"));
              ///
              /// let cwd = HgPath::new(b"other");
              /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"../nested/file"));
              /// ```
              pub fn relativize_path(path: &HgPath, cwd: impl AsRef<HgPath>) -> Cow<[u8]> {
                  if cwd.as_ref().is_empty() {
                      Cow::Borrowed(path.as_bytes())
                  } else {
                      let mut res: Vec<u8> = Vec::new();
                      let mut path_iter = path.as_bytes().split(|b| *b == b'/').peekable();
                      let mut cwd_iter =
                          cwd.as_ref().as_bytes().split(|b| *b == b'/').peekable();
                      loop {
                          match (path_iter.peek(), cwd_iter.peek()) {
                              (Some(a), Some(b)) if a == b => (),
                              _ => break,
                          }
                          path_iter.next();
                          cwd_iter.next();
                      }
                      let mut need_sep = false;
                      for _ in cwd_iter {
                          if need_sep {
                              res.extend(b"/")
                          } else {
                              need_sep = true
                          };
                          res.extend(b"..");
                      }
                      for c in path_iter {
                          if need_sep {
                              res.extend(b"/")
                          } else {
                              need_sep = true
                          };
                          res.extend(c);
                      }
                      Cow::Owned(res)
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn find_dirs_some() {
                      let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn find_dirs_empty() {
                      // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
                      let mut dirs = super::find_dirs(HgPath::new(b""));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_find_dirs_with_base_some() {
                      let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
                      assert_eq!(
                          dirs.next(),
                          Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
                      );
                      assert_eq!(
                          dirs.next(),
                          Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
                      );
                      assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_find_dirs_with_base_empty() {
                      let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
                      assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_canonical_path() {
                      let root = Path::new("/repo");
                      let cwd = Path::new("/dir");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Err(HgPathError::NotUnderRoot {
                              path: PathBuf::from("/dir/filename"),
                              root: root.to_path_buf()
                          })
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Err(HgPathError::NotUnderRoot {
                              path: PathBuf::from("/filename"),
                              root: root.to_path_buf()
                          })
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/");
                      let name = Path::new("repo/filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("filename"))
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/repo");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("filename"))
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/repo/subdir");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("subdir/filename"))
                      );
                  }
                  #[test]
                  fn test_canonical_path_not_rooted() {
                      use std::fs::create_dir;
                      use tempfile::tempdir;
                      let base_dir = tempdir().unwrap();
                      let base_dir_path = base_dir.path();
                      let beneath_repo = base_dir_path.join("a");
                      let root = base_dir_path.join("a/b");
                      let out_of_repo = base_dir_path.join("c");
                      let under_repo_symlink = out_of_repo.join("d");
                      create_dir(&beneath_repo).unwrap();
                      create_dir(&root).unwrap();
                      // TODO make portable
                      std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
                      assert_eq!(
                          canonical_path(&root, Path::new(""), out_of_repo),
                          Ok(PathBuf::from(""))
                      );
                      assert_eq!(
                          canonical_path(&root, Path::new(""), &beneath_repo),
                          Err(HgPathError::NotUnderRoot {
                              path: beneath_repo.to_owned(),
                              root: root.to_owned()
                          })
                      );
                      assert_eq!(
                          canonical_path(&root, Path::new(""), &under_repo_symlink),
                          Ok(PathBuf::from("d"))
                      );
                  }
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages